Only loads blacklists specified in config, updates on README

This commit is contained in:
Paul 2016-02-14 12:23:43 +01:00
parent 7dbf74f814
commit 0ead24747b
2 changed files with 28 additions and 19 deletions

View File

@ -2,7 +2,7 @@
Squid helper handling squidguard blacklists written in python Squid helper handling squidguard blacklists written in python
* Only supports domains blacklists actually (ie : google.com, www.google.com, api.google.com, etc.) * Only supports domains blacklists actually (ie : google.com, www.google.com, api.google.com, etc.)
* All specified blacklists are loaded in RAM * In config specified blacklists are loaded in RAM
* Usable as an external acl plugin of squid * Usable as an external acl plugin of squid
* Written because of poor developpement on squidguard and bad support of blacklists files using squid3 * Written because of poor developpement on squidguard and bad support of blacklists files using squid3
* Tested on Debian 8 / python 2.7.9 * Tested on Debian 8 / python 2.7.9
@ -35,7 +35,13 @@ blacklists = ["adult","malware"]
* Compatibility with python3 only * Compatibility with python3 only
* Filters for regex urls * Filters for regex urls
* Reduce memory footprint * Reduce memory footprint
* Code optimisation * Code optimisation (wip)
* Use of constant database (CDB) for on-disk store : https://github.com/acg/python-cdb * Use of constant database (CDB) for on-disk store : https://github.com/acg/python-cdb
* Tests * Tests
* ... * ...
## DBs support ideas
* High performances but heavy RAM usage when using dict()
* Sqlite3 tested, light memory footprint, but very slow
* CDB to be tested, but need for speed

View File

@ -3,16 +3,18 @@
import sys import sys
import os import os
import re import re
import logging
import urllib import urllib
from urlparse import urlparse from urlparse import urlparse
try: try:
from config import * import config
except ImportError: except ImportError:
print("Please create config.py using config.py.sample") print("Please create config.py using config.py.sample")
exit() exit()
try:
domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(blacklists_dir)) for f in fn if re.match(r"domains*", f)] import cdb
except ImportError:
print("Please install python-cdb from pypi or via package manager")
exit()
def make_list(files): def make_list(files):
blacklists = [] blacklists = []
@ -22,20 +24,21 @@ def make_list(files):
blacklists.append([list_type,l]) blacklists.append([list_type,l])
return blacklists return blacklists
def make_db(blacklist_files): def make_db(blacklist_files,blacklists):
lib = dict() lib = dict()
for blacklist in blacklist_files: for bl in blacklist_files:
if(bl[0] in blacklists):
cache = dict() cache = dict()
f = open(blacklist[1], "r") f = open(bl[1], "r")
for line in f: for line in f:
cache[line.strip("\n")] = True cache[line.strip("\n")] = True
lib[blacklist[0]] = cache lib[bl[0]] = cache
del cache del cache
return lib return lib
def compare(outline,blacklist_cache,blacklists): def compare(outline,blacklist_cache):
result = False result = False
for blacklist in blacklists: for blacklist in blacklist_cache:
tmpline = outline tmpline = outline
while not result and tmpline != "": while not result and tmpline != "":
try: try:
@ -50,16 +53,16 @@ def squid_response(response):
sys.stdout.write("%s\n" % response) sys.stdout.write("%s\n" % response)
sys.stdout.flush() sys.stdout.flush()
domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(config.blacklists_dir)) for f in fn if re.match(r"domains*", f)]
blacklist_cache = []
blacklist_files = make_list(domain_files) blacklist_files = make_list(domain_files)
blacklist_cache = make_db(blacklist_files) blacklist_cache = make_db(blacklist_files,config.blacklists)
while True: while True:
line = sys.stdin.readline().strip() line = sys.stdin.readline().strip()
outline = urlparse(line).netloc outline = urlparse(line).netloc
if line: if line:
if compare(outline,blacklist_cache,blacklists): if compare(outline,blacklist_cache):
squid_response("OK") squid_response("OK")
else: else:
squid_response("ERR") squid_response("ERR")