add CDB based script (testing)

This commit is contained in:
Paul 2016-02-14 14:52:25 +01:00
parent 93d78e7b41
commit 080505a032
2 changed files with 23 additions and 18 deletions

View File

@ -2,10 +2,10 @@
Squid helper handling squidguard blacklists written in python Squid helper handling squidguard blacklists written in python
* Only supports domains blacklists actually (ie : google.com, www.google.com, api.google.com, etc.) * Only supports domains blacklists actually (ie : google.com, www.google.com, api.google.com, etc.)
* In config specified blacklists are loaded in RAM * In config specified blacklists are loaded in RAM or CDB backend using https://github.com/acg/python-cdb (testing flavour is available)
* Usable as an external acl plugin of squid * Usable as an external acl plugin of squid
* Written because of poor developpement on squidguard and bad support of blacklists files using squid3 * Written because of poor developpement on squidguard and bad support of blacklists files using squid3
* Tested on Debian 8 / python 2.7.9 * Tested on Debian 8 / python 2.7.9 / squid 3.4.8
## Usage ## Usage
@ -34,14 +34,14 @@ blacklists = ["adult","malware"]
* Auto-fetcher using blacklists_fetch if blacklists are not already downloaded or stored on the squid machine * Auto-fetcher using blacklists_fetch if blacklists are not already downloaded or stored on the squid machine
* Compatibility with python3 only * Compatibility with python3 only
* Filters for regex urls * Filters for regex urls
* Reduce memory footprint * Reduce memory footprint (wip with CDB backend alternative)
* Code optimisation (wip) * Code optimisation (wip)
* Use of constant database (CDB) for on-disk store : https://github.com/acg/python-cdb * Object oriented programming
* Tests * Tests (wip)
* ... * ...
## DBs support ideas ## DBs support ideas
* High performances but heavy RAM usage when using dict() * High performance but heavy RAM usage when using dict()
* Sqlite3 tested, light memory footprint, but very slow * Sqlite3 tested, light memory footprint, but very slow
* CDB to be tested, but need for speed * CDB backend testing

View File

@ -24,25 +24,30 @@ def make_list(files):
blacklists.append([list_type,l]) blacklists.append([list_type,l])
return blacklists return blacklists
def make_db(blacklist_files,blacklists): def make_db(blacklist_files,config):
lib = dict() lib = []
for bl in blacklist_files: for bl in blacklist_files:
if(bl[0] in blacklists): bl_cdb_file = ("%s/%s.cdb" % (config.blacklists_dir,bl[0]))
cache = dict() bl_cdb_file_tmp = ("%s/%s.tmp" % (config.blacklists_dir,bl[0]))
f = open(bl[1], "r") if(bl[0] in config.blacklists):
for line in f: if not os.path.isfile(bl_cdb_file):
cache[line.strip("\n")] = True cdb_file = cdb.cdbmake(bl_cdb_file,bl_cdb_file_tmp)
lib[bl[0]] = cache cache = dict()
del cache f = open(bl[1], "r")
for line in f:
cdb_file.add(line.strip("\n"),"True")
cdb_file.finish()
lib.append(bl_cdb_file)
return lib return lib
def compare(outline,blacklist_cache): def compare(outline,blacklist_cache):
result = False result = False
for blacklist in blacklist_cache: for blacklist in blacklist_cache:
cdb_file = cdb.init(blacklist)
tmpline = outline tmpline = outline
while not result and tmpline != "": while not result and tmpline != "":
try: try:
result = blacklist_cache[blacklist][tmpline] result = cdb_file[tmpline]
pass pass
except KeyError: except KeyError:
pass pass
@ -56,7 +61,7 @@ def squid_response(response):
domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(config.blacklists_dir)) for f in fn if re.match(r"domains*", f)] domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(config.blacklists_dir)) for f in fn if re.match(r"domains*", f)]
blacklist_files = make_list(domain_files) blacklist_files = make_list(domain_files)
blacklist_cache = make_db(blacklist_files,config.blacklists) blacklist_cache = make_db(blacklist_files,config)
while True: while True:
line = sys.stdin.readline().strip() line = sys.stdin.readline().strip()