diff --git a/README.md b/README.md index 6c455c1..38df944 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # py-squid-blacklists Squid helper handling squidguard blacklists written in python -* Only supports domains blacklists actually (ie : google.com, www.google.com, api.google.com, etc.) +* Only supports domains blacklists actually (ie : google.com, www.google.com, mail.google.com, etc.) * In config specified blacklists are loaded in RAM or CDB backend using https://github.com/acg/python-cdb (testing flavour is available) * Usable as an external acl plugin of squid * Written because of poor developpement on squidguard and bad support of blacklists files using squid3 @@ -20,23 +20,25 @@ http_access deny urlblacklist config.py file must be include following statements ``` -blacklists_fetch = "http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz" -blacklists_dir = "/usr/local/py-squid-blacklists/blacklists/" -blacklists = ["adult","malware"] +url = "http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz" +base_dir = "/usr/local/py-squid-blacklists/blacklists/" +categories = ["adult","malware"] +db_backend = "ram" ``` -* blacklists_fetch : squidguard-like blacklists files, this variable is not already usable -* blacklists_dir : path containing blacklists files -* blacklists : blacklists to use for filtering +* url : squidguard-like blacklists files, this variable is not already usable +* categories : blacklists to use for filtering +* base_dir : path containing blacklists files +* db_backend : database flavour (ram|cdb) ## TODO -* Auto-fetcher using blacklists_fetch if blacklists are not already downloaded or stored on the squid machine +* Auto-fetcher using url if blacklists are not already downloaded or stored on the squid machine * Compatibility with python3 only * Filters for regex urls * Reduce memory footprint (wip with CDB backend alternative) * Code optimisation (wip) -* Object oriented programming +* Object oriented programming (wip) * Tests (wip) * ... diff --git a/config.py.sample b/config.py.sample index 6bb9d6d..984ca7e 100644 --- a/config.py.sample +++ b/config.py.sample @@ -1,3 +1,11 @@ -blacklists_fetch = "http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz" -blacklists_dir = "/usr/local/py-squid-blacklists/blacklists/" -blacklists = ["adult","malware"] +# url to retrieve blacklists +url = "http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz" + +# categories +categories = ["adult","malware"] + +# base directory for blacklists +base_dir = "/usr/local/py-squid-blacklists/blacklists/" + +# ram | cdb +db_backend = "cdb" \ No newline at end of file diff --git a/py-squid-blacklists-cdb.py b/py-squid-blacklists-cdb.py index 05c7350..2c4f94d 100755 --- a/py-squid-blacklists-cdb.py +++ b/py-squid-blacklists-cdb.py @@ -3,7 +3,6 @@ import sys import os import re -import urllib from urlparse import urlparse try: diff --git a/py-squid-blacklists.py b/py-squid-blacklists.py index e2826fb..3213220 100755 --- a/py-squid-blacklists.py +++ b/py-squid-blacklists.py @@ -3,7 +3,6 @@ import sys import os import re -import urllib from urlparse import urlparse try: @@ -11,62 +10,81 @@ try: except ImportError: print("Please create config.py using config.py.sample") exit() +try: + import cdb +except ImportError: + print("Please install python-cdb from pypi or via package manager") + exit() -def make_list(files): - blacklists = [] - for l in files: - splitlist = l.split("/") - list_type = splitlist[len(splitlist) - 2] - blacklists.append([list_type, l]) - return blacklists +class PySquidBlacklists: + def __init__(self, config): + self.db_backend = config.db_backend + self.blacklist_categories = config.categories + self.domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(config.base_dir)) for f in + fn if re.match(r"domains*", f)] + self.blacklist_files = self.make_list() + self.blacklist_cache = self.make_db() + + def make_list(self): + blacklists = [] + for l in self.domain_files: + splitlist = l.split("/") + list_type = splitlist[len(splitlist) - 2] + blacklists.append([list_type, l]) + return blacklists + + def make_db(self): + lib = dict() + for bls in self.blacklist_files: + if bls[0] in self.blacklist_categories: + cache = dict() + f = open(bls[1], "r") + for l in f: + cache[l.strip("\n")] = True + lib[bls[0]] = cache + del cache + return lib + + @property + def initialize(): + return True + + def compare(self, outline): + result = False + for blacklist in self.blacklist_cache: + tmpline = outline + while not result and tmpline != "": + try: + result = self.blacklist_cache[blacklist][tmpline] + pass + except KeyError: + pass + tmpline = tmpline.partition('.')[2] + return result + + @staticmethod + def response(r): + sys.stdout.write("%s\n" % r) + sys.stdout.flush() -def make_db(blacklist_files, config): - lib = dict() - for bl in blacklist_files: - if (bl[0] in config.blacklists): - cache = dict() - f = open(bl[1], "r") - for line in f: - cache[line.strip("\n")] = True - lib[bl[0]] = cache - del cache - return lib +class PySquidBlacklistsImporter: + def __init__(self, conf): + self.test = True + self.db = conf.db_backend -def compare(outline, blacklist_cache): - result = False - for blacklist in blacklist_cache: - tmpline = outline - while not result and tmpline != "": - try: - result = blacklist_cache[blacklist][tmpline] - pass - except KeyError: - pass - tmpline = tmpline.partition('.')[2] - return result - - -def squid_response(response): - sys.stdout.write("%s\n" % response) - sys.stdout.flush() - - -domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(config.blacklists_dir)) for f in fn if re.match(r"domains*", f)] - -blacklist_files = make_list(domain_files) -blacklist_cache = make_db(blacklist_files, config) - +bli = PySquidBlacklistsImporter(config) +bl = PySquidBlacklists(config) while True: try: - line = sys.stdin.readline().strip() - outline = urlparse(line).netloc - if line: - if compare(outline, blacklist_cache): - squid_response("OK") - else: - squid_response("ERR") + line = sys.stdin.readline().strip() + outline = urlparse(line).netloc + if line: + if bl.compare(outline): + bl.response("OK") + else: + bl.response("ERR") except KeyboardInterrupt: break