Code refactoring

Importer class provisionned
Improved speed using CDB
This commit is contained in:
Paul 2016-02-26 15:25:17 +01:00
parent 76442061b8
commit b7299fdb17
2 changed files with 66 additions and 63 deletions

View File

@ -2,7 +2,7 @@
url = "http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz" url = "http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz"
# categories # categories
categories = ["adult","malware"] categories = ["adult", "malware"]
# base directory for blacklists # base directory for blacklists
base_dir = "/usr/local/py-squid-blacklists/blacklists/" base_dir = "/usr/local/py-squid-blacklists/blacklists/"

View File

@ -18,70 +18,28 @@ except ImportError:
class PySquidBlacklists: class PySquidBlacklists:
def __init__(self, config): def __init__(self, config, bli):
self.db_backend = config.db_backend
self.categories = config.categories
self.base_dir = config.base_dir self.base_dir = config.base_dir
self.domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(self.base_dir)) for f in self.db_backend = config.db_backend
fn if re.match(r"domains*", f)] self.cache = bli.cache
self.blacklist_files = self.make_list() self.cdb_cache = dict()
if self.db_backend == "ram":
self.cache = self.make_ram_db()
elif self.db_backend == "cdb":
self.cache = self.make_cdb_db()
self.loop()
def make_list(self):
blacklists = []
for l in self.domain_files:
splitlist = l.split("/")
list_type = splitlist[len(splitlist) - 2]
blacklists.append([list_type, l])
return blacklists
def make_ram_db(self):
lib = dict()
for bls in self.blacklist_files:
if self.db_backend == "ram":
if bls[0] in self.categories:
cache = dict()
f = open(bls[1], "r")
for l in f:
cache[l.strip("\n")] = True
lib[bls[0]] = cache
del cache
return lib
def make_cdb_db(self):
lib = []
for bl in self.blacklist_files:
bl_cdb_file = ("%s/%s.cdb" % (self.base_dir, bl[0]))
bl_cdb_file_tmp = ("%s/%s.tmp" % (self.base_dir, bl[0]))
if (bl[0] in self.categories):
if not os.path.isfile(bl_cdb_file):
cdb_file = cdb.cdbmake(bl_cdb_file, bl_cdb_file_tmp)
f = open(bl[1], "r")
for line in f:
cdb_file.add(line.strip("\n"), "True")
cdb_file.finish()
lib.append(bl_cdb_file)
return lib
def domain_compare(self, outline):
global cdb_file
result = False
for blacklist in self.cache: for blacklist in self.cache:
tmpline = outline
if self.db_backend == "ram": if self.db_backend == "ram":
pass pass
elif self.db_backend == "cdb": elif self.db_backend == "cdb":
cdb_file = cdb.init(blacklist) self.cdb_cache[blacklist] = cdb.init(blacklist)
self.loop()
def domain_compare(self):
result = False
for blacklist in self.cache:
tmpline = self.outline
while not result and tmpline != "": while not result and tmpline != "":
try: try:
if self.db_backend == "ram": if self.db_backend == "ram":
result = self.cache[blacklist][tmpline] result = self.cache[blacklist][tmpline]
elif self.db_backend == "cdb": elif self.db_backend == "cdb":
result = cdb_file[tmpline] result = self.cdb_cache[blacklist][tmpline]
pass pass
except KeyError: except KeyError:
pass pass
@ -94,9 +52,9 @@ class PySquidBlacklists:
line = sys.stdin.readline().strip() line = sys.stdin.readline().strip()
if line == "": if line == "":
exit() exit()
outline = urlparse(line).netloc self.outline = urlparse(line).netloc
if line: if line:
if self.domain_compare(outline): if self.domain_compare():
self.response("OK") self.response("OK")
else: else:
self.response("ERR") self.response("ERR")
@ -111,8 +69,53 @@ class PySquidBlacklists:
class PySquidBlacklistsImporter: class PySquidBlacklistsImporter:
def __init__(self, conf): def __init__(self, conf):
self.test = True self.db_backend = config.db_backend
self.db = conf.db_backend self.categories = config.categories
self.base_dir = config.base_dir
self.domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(self.base_dir)) for f in
fn if re.match(r"domains*", f)]
self.blacklist_files = self.make_list()
self.cache = None
if self.db_backend == "ram":
self.make_ram_db()
elif self.db_backend == "cdb":
self.make_cdb_db()
def make_list(self):
blacklists = []
for l in self.domain_files:
splitlist = l.split("/")
list_type = splitlist[len(splitlist) - 2]
blacklists.append([list_type, l])
return blacklists
def make_ram_db(self):
lib = dict()
for bls in self.blacklist_files:
if bls[0] in self.categories:
blcache = dict()
f = open(bls[1], "r")
for l in f:
blcache[l.strip("\n")] = True
lib[bls[0]] = blcache
del blcache
self.cache = lib
def make_cdb_db(self):
lib = []
for bl in self.blacklist_files:
bl_cdb_file = ("%s/%s.cdb" % (self.base_dir, bl[0]))
bl_cdb_file_tmp = ("%s/%s.tmp" % (self.base_dir, bl[0]))
if bl[0] in self.categories:
if not os.path.isfile(bl_cdb_file):
cdb_file = cdb.cdbmake(bl_cdb_file, bl_cdb_file_tmp)
f = open(bl[1], "r")
for line in f:
cdb_file.add(line.strip("\n"), "True")
cdb_file.finish()
lib.append(bl_cdb_file)
self.cache = lib
bl = PySquidBlacklists(config) bli = PySquidBlacklistsImporter(config)
bl = PySquidBlacklists(config, bli)