From c7a97fb81f7459b03ea74b4167b2c102515cd0c9 Mon Sep 17 00:00:00 2001 From: Paul Lecuq Date: Sat, 20 Feb 2016 16:29:59 +0100 Subject: [PATCH] merge cdb mode in main script py-squid-blacklists-cdb is useless --- README.md | 6 ++-- py-squid-blacklists.py | 64 ++++++++++++++++++++++++++++++------------ 2 files changed, 49 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 38df944..3d8e256 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Squid helper handling squidguard blacklists written in python Add this configuration to squid.conf : ``` -external_acl_type urlblacklist_lookup ttl=5 %URI /usr/bin/python /usr/local/blacklists/py-squid-blacklists.py +external_acl_type urlblacklist_lookup ttl=5 %URI /usr/bin/python /usr/local/py-squid-blacklists/py-squid-blacklists.py ... acl urlblacklist external urlblacklist_lookup ... @@ -45,5 +45,5 @@ db_backend = "ram" ## DBs support ideas * High performance but heavy RAM usage when using dict() -* Sqlite3 tested, light memory footprint, but very slow -* CDB backend testing +* Sqlite3 tested, small memory footprint, but very slow +* CDB backend seems to be as fast as attended, with a very small footprint diff --git a/py-squid-blacklists.py b/py-squid-blacklists.py index 2f2221c..68f5447 100755 --- a/py-squid-blacklists.py +++ b/py-squid-blacklists.py @@ -25,7 +25,11 @@ class PySquidBlacklists: self.domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(self.base_dir)) for f in fn if re.match(r"domains*", f)] self.blacklist_files = self.make_list() - self.cache = self.make_db() + if self.db_backend == "ram": + self.cache = self.make_ram_db() + elif self.db_backend == "cdb": + self.cache = self.make_cdb_db() + self.loop() def make_list(self): blacklists = [] @@ -35,7 +39,7 @@ class PySquidBlacklists: blacklists.append([list_type, l]) return blacklists - def make_db(self): + def make_ram_db(self): lib = dict() for bls in self.blacklist_files: if self.db_backend == "ram": @@ -48,19 +52,57 @@ class PySquidBlacklists: del cache return lib - def compare(self, outline): + def make_cdb_db(self): + lib = [] + for bl in self.blacklist_files: + bl_cdb_file = ("%s/%s.cdb" % (self.base_dir, bl[0])) + bl_cdb_file_tmp = ("%s/%s.tmp" % (self.base_dir, bl[0])) + if (bl[0] in self.categories): + if not os.path.isfile(bl_cdb_file): + cdb_file = cdb.cdbmake(bl_cdb_file, bl_cdb_file_tmp) + f = open(bl[1], "r") + for line in f: + cdb_file.add(line.strip("\n"), "True") + cdb_file.finish() + lib.append(bl_cdb_file) + return lib + + def domain_compare(self, outline): + global cdb_file result = False for blacklist in self.cache: tmpline = outline + if self.db_backend == "ram": + pass + elif self.db_backend == "cdb": + cdb_file = cdb.init(blacklist) while not result and tmpline != "": try: - result = self.cache[blacklist][tmpline] + if self.db_backend == "ram": + result = self.cache[blacklist][tmpline] + elif self.db_backend == "cdb": + result = cdb_file[tmpline] pass except KeyError: pass tmpline = tmpline.partition('.')[2] return result + def loop(self): + while True: + try: + line = sys.stdin.readline().strip() + if line == "": + exit() + outline = urlparse(line).netloc + if line: + if self.domain_compare(outline): + self.response("OK") + else: + self.response("ERR") + except IOError: + pass + @staticmethod def response(r): sys.stdout.write("%s\n" % r) @@ -73,18 +115,4 @@ class PySquidBlacklistsImporter: self.db = conf.db_backend -bli = PySquidBlacklistsImporter(config) bl = PySquidBlacklists(config) -while True: - try: - line = sys.stdin.readline().strip() - if line == "": - exit() - outline = urlparse(line).netloc - if line: - if bl.compare(outline): - bl.response("OK") - else: - bl.response("ERR") - except IOError: - pass \ No newline at end of file