diff --git a/README.md b/README.md index 1cf9ddd..6d39aa4 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Squid helper handling squidguard blacklists written in python * Only supports domains blacklists actually (ie : google.com, www.google.com, api.google.com, etc.) -* All specified blacklists are loaded in RAM +* In config specified blacklists are loaded in RAM * Usable as an external acl plugin of squid * Written because of poor developpement on squidguard and bad support of blacklists files using squid3 * Tested on Debian 8 / python 2.7.9 @@ -35,7 +35,13 @@ blacklists = ["adult","malware"] * Compatibility with python3 only * Filters for regex urls * Reduce memory footprint -* Code optimisation +* Code optimisation (wip) * Use of constant database (CDB) for on-disk store : https://github.com/acg/python-cdb * Tests * ... + +## DBs support ideas + +* High performances but heavy RAM usage when using dict() +* Sqlite3 tested, light memory footprint, but very slow +* CDB to be tested, but need for speed diff --git a/py-squid-blacklists.py b/py-squid-blacklists.py index 96667f9..c748d5d 100755 --- a/py-squid-blacklists.py +++ b/py-squid-blacklists.py @@ -3,16 +3,18 @@ import sys import os import re -import logging import urllib from urlparse import urlparse try: - from config import * + import config except ImportError: print("Please create config.py using config.py.sample") exit() - -domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(blacklists_dir)) for f in fn if re.match(r"domains*", f)] +try: + import cdb +except ImportError: + print("Please install python-cdb from pypi or via package manager") + exit() def make_list(files): blacklists = [] @@ -22,20 +24,21 @@ def make_list(files): blacklists.append([list_type,l]) return blacklists -def make_db(blacklist_files): +def make_db(blacklist_files,blacklists): lib = dict() - for blacklist in blacklist_files: - cache = dict() - f = open(blacklist[1], "r") - for line in f: - cache[line.strip("\n")] = True - lib[blacklist[0]] = cache - del cache + for bl in blacklist_files: + if(bl[0] in blacklists): + cache = dict() + f = open(bl[1], "r") + for line in f: + cache[line.strip("\n")] = True + lib[bl[0]] = cache + del cache return lib -def compare(outline,blacklist_cache,blacklists): +def compare(outline,blacklist_cache): result = False - for blacklist in blacklists: + for blacklist in blacklist_cache: tmpline = outline while not result and tmpline != "": try: @@ -50,16 +53,16 @@ def squid_response(response): sys.stdout.write("%s\n" % response) sys.stdout.flush() +domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(config.blacklists_dir)) for f in fn if re.match(r"domains*", f)] -blacklist_cache = [] blacklist_files = make_list(domain_files) -blacklist_cache = make_db(blacklist_files) +blacklist_cache = make_db(blacklist_files,config.blacklists) while True: line = sys.stdin.readline().strip() outline = urlparse(line).netloc if line: - if compare(outline,blacklist_cache,blacklists): + if compare(outline,blacklist_cache): squid_response("OK") else: squid_response("ERR")