add CDB based script (testing)
This commit is contained in:
parent
93d78e7b41
commit
080505a032
14
README.md
14
README.md
@ -2,10 +2,10 @@
|
||||
Squid helper handling squidguard blacklists written in python
|
||||
|
||||
* Only supports domains blacklists actually (ie : google.com, www.google.com, api.google.com, etc.)
|
||||
* In config specified blacklists are loaded in RAM
|
||||
* In config specified blacklists are loaded in RAM or CDB backend using https://github.com/acg/python-cdb (testing flavour is available)
|
||||
* Usable as an external acl plugin of squid
|
||||
* Written because of poor developpement on squidguard and bad support of blacklists files using squid3
|
||||
* Tested on Debian 8 / python 2.7.9
|
||||
* Tested on Debian 8 / python 2.7.9 / squid 3.4.8
|
||||
|
||||
## Usage
|
||||
|
||||
@ -34,14 +34,14 @@ blacklists = ["adult","malware"]
|
||||
* Auto-fetcher using blacklists_fetch if blacklists are not already downloaded or stored on the squid machine
|
||||
* Compatibility with python3 only
|
||||
* Filters for regex urls
|
||||
* Reduce memory footprint
|
||||
* Reduce memory footprint (wip with CDB backend alternative)
|
||||
* Code optimisation (wip)
|
||||
* Use of constant database (CDB) for on-disk store : https://github.com/acg/python-cdb
|
||||
* Tests
|
||||
* Object oriented programming
|
||||
* Tests (wip)
|
||||
* ...
|
||||
|
||||
## DBs support ideas
|
||||
|
||||
* High performances but heavy RAM usage when using dict()
|
||||
* High performance but heavy RAM usage when using dict()
|
||||
* Sqlite3 tested, light memory footprint, but very slow
|
||||
* CDB to be tested, but need for speed
|
||||
* CDB backend testing
|
||||
|
@ -24,25 +24,30 @@ def make_list(files):
|
||||
blacklists.append([list_type,l])
|
||||
return blacklists
|
||||
|
||||
def make_db(blacklist_files,blacklists):
|
||||
lib = dict()
|
||||
def make_db(blacklist_files,config):
|
||||
lib = []
|
||||
for bl in blacklist_files:
|
||||
if(bl[0] in blacklists):
|
||||
bl_cdb_file = ("%s/%s.cdb" % (config.blacklists_dir,bl[0]))
|
||||
bl_cdb_file_tmp = ("%s/%s.tmp" % (config.blacklists_dir,bl[0]))
|
||||
if(bl[0] in config.blacklists):
|
||||
if not os.path.isfile(bl_cdb_file):
|
||||
cdb_file = cdb.cdbmake(bl_cdb_file,bl_cdb_file_tmp)
|
||||
cache = dict()
|
||||
f = open(bl[1], "r")
|
||||
for line in f:
|
||||
cache[line.strip("\n")] = True
|
||||
lib[bl[0]] = cache
|
||||
del cache
|
||||
cdb_file.add(line.strip("\n"),"True")
|
||||
cdb_file.finish()
|
||||
lib.append(bl_cdb_file)
|
||||
return lib
|
||||
|
||||
def compare(outline,blacklist_cache):
|
||||
result = False
|
||||
for blacklist in blacklist_cache:
|
||||
cdb_file = cdb.init(blacklist)
|
||||
tmpline = outline
|
||||
while not result and tmpline != "":
|
||||
try:
|
||||
result = blacklist_cache[blacklist][tmpline]
|
||||
result = cdb_file[tmpline]
|
||||
pass
|
||||
except KeyError:
|
||||
pass
|
||||
@ -56,7 +61,7 @@ def squid_response(response):
|
||||
domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(config.blacklists_dir)) for f in fn if re.match(r"domains*", f)]
|
||||
|
||||
blacklist_files = make_list(domain_files)
|
||||
blacklist_cache = make_db(blacklist_files,config.blacklists)
|
||||
blacklist_cache = make_db(blacklist_files,config)
|
||||
|
||||
while True:
|
||||
line = sys.stdin.readline().strip()
|
||||
|
Loading…
Reference in New Issue
Block a user