Code refactoring
Importer class provisionned Improved speed using CDB
This commit is contained in:
parent
76442061b8
commit
b7299fdb17
@ -2,7 +2,7 @@
|
|||||||
url = "http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz"
|
url = "http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz"
|
||||||
|
|
||||||
# categories
|
# categories
|
||||||
categories = ["adult","malware"]
|
categories = ["adult", "malware"]
|
||||||
|
|
||||||
# base directory for blacklists
|
# base directory for blacklists
|
||||||
base_dir = "/usr/local/py-squid-blacklists/blacklists/"
|
base_dir = "/usr/local/py-squid-blacklists/blacklists/"
|
||||||
|
@ -18,70 +18,28 @@ except ImportError:
|
|||||||
|
|
||||||
|
|
||||||
class PySquidBlacklists:
|
class PySquidBlacklists:
|
||||||
def __init__(self, config):
|
def __init__(self, config, bli):
|
||||||
self.db_backend = config.db_backend
|
|
||||||
self.categories = config.categories
|
|
||||||
self.base_dir = config.base_dir
|
self.base_dir = config.base_dir
|
||||||
self.domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(self.base_dir)) for f in
|
self.db_backend = config.db_backend
|
||||||
fn if re.match(r"domains*", f)]
|
self.cache = bli.cache
|
||||||
self.blacklist_files = self.make_list()
|
self.cdb_cache = dict()
|
||||||
if self.db_backend == "ram":
|
|
||||||
self.cache = self.make_ram_db()
|
|
||||||
elif self.db_backend == "cdb":
|
|
||||||
self.cache = self.make_cdb_db()
|
|
||||||
self.loop()
|
|
||||||
|
|
||||||
def make_list(self):
|
|
||||||
blacklists = []
|
|
||||||
for l in self.domain_files:
|
|
||||||
splitlist = l.split("/")
|
|
||||||
list_type = splitlist[len(splitlist) - 2]
|
|
||||||
blacklists.append([list_type, l])
|
|
||||||
return blacklists
|
|
||||||
|
|
||||||
def make_ram_db(self):
|
|
||||||
lib = dict()
|
|
||||||
for bls in self.blacklist_files:
|
|
||||||
if self.db_backend == "ram":
|
|
||||||
if bls[0] in self.categories:
|
|
||||||
cache = dict()
|
|
||||||
f = open(bls[1], "r")
|
|
||||||
for l in f:
|
|
||||||
cache[l.strip("\n")] = True
|
|
||||||
lib[bls[0]] = cache
|
|
||||||
del cache
|
|
||||||
return lib
|
|
||||||
|
|
||||||
def make_cdb_db(self):
|
|
||||||
lib = []
|
|
||||||
for bl in self.blacklist_files:
|
|
||||||
bl_cdb_file = ("%s/%s.cdb" % (self.base_dir, bl[0]))
|
|
||||||
bl_cdb_file_tmp = ("%s/%s.tmp" % (self.base_dir, bl[0]))
|
|
||||||
if (bl[0] in self.categories):
|
|
||||||
if not os.path.isfile(bl_cdb_file):
|
|
||||||
cdb_file = cdb.cdbmake(bl_cdb_file, bl_cdb_file_tmp)
|
|
||||||
f = open(bl[1], "r")
|
|
||||||
for line in f:
|
|
||||||
cdb_file.add(line.strip("\n"), "True")
|
|
||||||
cdb_file.finish()
|
|
||||||
lib.append(bl_cdb_file)
|
|
||||||
return lib
|
|
||||||
|
|
||||||
def domain_compare(self, outline):
|
|
||||||
global cdb_file
|
|
||||||
result = False
|
|
||||||
for blacklist in self.cache:
|
for blacklist in self.cache:
|
||||||
tmpline = outline
|
|
||||||
if self.db_backend == "ram":
|
if self.db_backend == "ram":
|
||||||
pass
|
pass
|
||||||
elif self.db_backend == "cdb":
|
elif self.db_backend == "cdb":
|
||||||
cdb_file = cdb.init(blacklist)
|
self.cdb_cache[blacklist] = cdb.init(blacklist)
|
||||||
|
self.loop()
|
||||||
|
|
||||||
|
def domain_compare(self):
|
||||||
|
result = False
|
||||||
|
for blacklist in self.cache:
|
||||||
|
tmpline = self.outline
|
||||||
while not result and tmpline != "":
|
while not result and tmpline != "":
|
||||||
try:
|
try:
|
||||||
if self.db_backend == "ram":
|
if self.db_backend == "ram":
|
||||||
result = self.cache[blacklist][tmpline]
|
result = self.cache[blacklist][tmpline]
|
||||||
elif self.db_backend == "cdb":
|
elif self.db_backend == "cdb":
|
||||||
result = cdb_file[tmpline]
|
result = self.cdb_cache[blacklist][tmpline]
|
||||||
pass
|
pass
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
@ -94,9 +52,9 @@ class PySquidBlacklists:
|
|||||||
line = sys.stdin.readline().strip()
|
line = sys.stdin.readline().strip()
|
||||||
if line == "":
|
if line == "":
|
||||||
exit()
|
exit()
|
||||||
outline = urlparse(line).netloc
|
self.outline = urlparse(line).netloc
|
||||||
if line:
|
if line:
|
||||||
if self.domain_compare(outline):
|
if self.domain_compare():
|
||||||
self.response("OK")
|
self.response("OK")
|
||||||
else:
|
else:
|
||||||
self.response("ERR")
|
self.response("ERR")
|
||||||
@ -111,8 +69,53 @@ class PySquidBlacklists:
|
|||||||
|
|
||||||
class PySquidBlacklistsImporter:
|
class PySquidBlacklistsImporter:
|
||||||
def __init__(self, conf):
|
def __init__(self, conf):
|
||||||
self.test = True
|
self.db_backend = config.db_backend
|
||||||
self.db = conf.db_backend
|
self.categories = config.categories
|
||||||
|
self.base_dir = config.base_dir
|
||||||
|
self.domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(self.base_dir)) for f in
|
||||||
|
fn if re.match(r"domains*", f)]
|
||||||
|
self.blacklist_files = self.make_list()
|
||||||
|
self.cache = None
|
||||||
|
if self.db_backend == "ram":
|
||||||
|
self.make_ram_db()
|
||||||
|
elif self.db_backend == "cdb":
|
||||||
|
self.make_cdb_db()
|
||||||
|
|
||||||
|
def make_list(self):
|
||||||
|
blacklists = []
|
||||||
|
for l in self.domain_files:
|
||||||
|
splitlist = l.split("/")
|
||||||
|
list_type = splitlist[len(splitlist) - 2]
|
||||||
|
blacklists.append([list_type, l])
|
||||||
|
return blacklists
|
||||||
|
|
||||||
|
def make_ram_db(self):
|
||||||
|
lib = dict()
|
||||||
|
for bls in self.blacklist_files:
|
||||||
|
if bls[0] in self.categories:
|
||||||
|
blcache = dict()
|
||||||
|
f = open(bls[1], "r")
|
||||||
|
for l in f:
|
||||||
|
blcache[l.strip("\n")] = True
|
||||||
|
lib[bls[0]] = blcache
|
||||||
|
del blcache
|
||||||
|
self.cache = lib
|
||||||
|
|
||||||
|
def make_cdb_db(self):
|
||||||
|
lib = []
|
||||||
|
for bl in self.blacklist_files:
|
||||||
|
bl_cdb_file = ("%s/%s.cdb" % (self.base_dir, bl[0]))
|
||||||
|
bl_cdb_file_tmp = ("%s/%s.tmp" % (self.base_dir, bl[0]))
|
||||||
|
if bl[0] in self.categories:
|
||||||
|
if not os.path.isfile(bl_cdb_file):
|
||||||
|
cdb_file = cdb.cdbmake(bl_cdb_file, bl_cdb_file_tmp)
|
||||||
|
f = open(bl[1], "r")
|
||||||
|
for line in f:
|
||||||
|
cdb_file.add(line.strip("\n"), "True")
|
||||||
|
cdb_file.finish()
|
||||||
|
lib.append(bl_cdb_file)
|
||||||
|
self.cache = lib
|
||||||
|
|
||||||
|
|
||||||
bl = PySquidBlacklists(config)
|
bli = PySquidBlacklistsImporter(config)
|
||||||
|
bl = PySquidBlacklists(config, bli)
|
||||||
|
Loading…
Reference in New Issue
Block a user