Code refactoring

3 classes : Runner, Importer, Config
Improved speed using CDB
Several files renamed
ConfigParser object used for configuration
This commit is contained in:
Paul 2016-02-29 10:02:01 +01:00
parent b7299fdb17
commit ffa099a059
6 changed files with 65 additions and 140 deletions

3
.gitignore vendored
View File

@ -61,7 +61,6 @@ target/
#Ipython Notebook
.ipynb_checkpoints
config.py
blacklists
blacklists.tar.gz
pysquidblacklists/
py-squid-blacklists.conf

View File

@ -4,7 +4,7 @@ Squid helper handling squidguard blacklists written in python
* Only supports domains blacklists actually (ie : google.com, www.google.com, mail.google.com, etc.)
* In config specified blacklists are loaded in RAM or CDB backend using https://github.com/acg/python-cdb
* Usable as an external acl plugin of squid
* Written because of poor developpement on squidguard and some issues using blacklists on squid3
* Written because of poor development on squidguard and some issues using blacklists on squid3
## Usage
@ -17,27 +17,25 @@ acl urlblacklist external urlblacklist_lookup
http_access deny urlblacklist
```
config.py file must be include following statements
Config file must be include following statements
```
url = "http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz"
base_dir = "/usr/local/py-squid-blacklists/blacklists/"
categories = ["adult","malware"]
db_backend = "ram"
url = http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz
base_dir = /usr/local/py-squid-blacklists/
categories = adult,malware
db_backend = cdb
```
* url : squidguard-like blacklists files, this variable is not already usable
* base_dir : root path containing blacklists files, metadata (update datetime)
* categories : blacklists to use for filtering
* base_dir : path containing blacklists files
* db_backend : database flavour (ram|cdb)
## TODO
* Auto-fetcher using url if blacklists are not already downloaded or stored on the squid machine
* Auto-fetcher using url if blacklists are not already downloaded or stored on the squid machine (wip)
* Compatibility with python3 only
* Filters for regex urls
* Reduce memory footprint (wip with CDB backend alternative)
* Code optimisation (profiling) and cleaning (wip)
* Object oriented programming (wip)
* Tests (wip)
* ...

View File

@ -1,11 +0,0 @@
# url to retrieve blacklists
url = "http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz"
# categories
categories = ["adult", "malware"]
# base directory for blacklists
base_dir = "/usr/local/py-squid-blacklists/blacklists/"
# ram | cdb
db_backend = "cdb"

39
py-sb-tool.py Executable file
View File

@ -0,0 +1,39 @@
#!/usr/bin/env python2.7
import os
import sys
import tarfile
import urllib
from pysquidblacklists import PySquidBlacklistsConfig
print("Parsing configuration file ...")
config = PySquidBlacklistsConfig()
config.get_config()
def download(url, path):
bl_file = urllib.URLopener()
bl_file.retrieve(url, path)
def extract(base_dir, archive):
if not os.path.isdir(base_dir):
bl_file = tarfile.open(archive)
bl_file.extractall(base_dir)
else:
pass
def usage():
print("tool.py import : import blacklists using config file")
if len(sys.argv) > 1:
if sys.argv[1] == "import":
print("Retrieving %s, storing it to %s ..." % (config.url, config.archive))
download(config.url, config.archive)
print("Extracting blacklists to %s" % config.base_dir)
extract(config.base_dir, config.archive)
else:
print(usage())

View File

@ -0,0 +1,12 @@
[main]
# url to retrieve blacklists
url = http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz
# categories
categories = adult,malware
# base directory for blacklist extraction
base_dir = /usr/local/py-squid-blacklists/
# ram | cdb
db_backend = cdb

View File

@ -1,121 +1,9 @@
#!/usr/bin/env python2.7
from pysquidblacklists import *
import sys
import os
import re
from urlparse import urlparse
try:
import config
except ImportError:
print("Please create config.py using config.py.sample")
exit()
try:
import cdb
except ImportError:
print("Please install python-cdb from pypi or via package manager")
exit()
class PySquidBlacklists:
def __init__(self, config, bli):
self.base_dir = config.base_dir
self.db_backend = config.db_backend
self.cache = bli.cache
self.cdb_cache = dict()
for blacklist in self.cache:
if self.db_backend == "ram":
pass
elif self.db_backend == "cdb":
self.cdb_cache[blacklist] = cdb.init(blacklist)
self.loop()
def domain_compare(self):
result = False
for blacklist in self.cache:
tmpline = self.outline
while not result and tmpline != "":
try:
if self.db_backend == "ram":
result = self.cache[blacklist][tmpline]
elif self.db_backend == "cdb":
result = self.cdb_cache[blacklist][tmpline]
pass
except KeyError:
pass
tmpline = tmpline.partition('.')[2]
return result
def loop(self):
while True:
try:
line = sys.stdin.readline().strip()
if line == "":
exit()
self.outline = urlparse(line).netloc
if line:
if self.domain_compare():
self.response("OK")
else:
self.response("ERR")
except IOError:
pass
@staticmethod
def response(r):
sys.stdout.write("%s\n" % r)
sys.stdout.flush()
class PySquidBlacklistsImporter:
def __init__(self, conf):
self.db_backend = config.db_backend
self.categories = config.categories
self.base_dir = config.base_dir
self.domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(self.base_dir)) for f in
fn if re.match(r"domains*", f)]
self.blacklist_files = self.make_list()
self.cache = None
if self.db_backend == "ram":
self.make_ram_db()
elif self.db_backend == "cdb":
self.make_cdb_db()
def make_list(self):
blacklists = []
for l in self.domain_files:
splitlist = l.split("/")
list_type = splitlist[len(splitlist) - 2]
blacklists.append([list_type, l])
return blacklists
def make_ram_db(self):
lib = dict()
for bls in self.blacklist_files:
if bls[0] in self.categories:
blcache = dict()
f = open(bls[1], "r")
for l in f:
blcache[l.strip("\n")] = True
lib[bls[0]] = blcache
del blcache
self.cache = lib
def make_cdb_db(self):
lib = []
for bl in self.blacklist_files:
bl_cdb_file = ("%s/%s.cdb" % (self.base_dir, bl[0]))
bl_cdb_file_tmp = ("%s/%s.tmp" % (self.base_dir, bl[0]))
if bl[0] in self.categories:
if not os.path.isfile(bl_cdb_file):
cdb_file = cdb.cdbmake(bl_cdb_file, bl_cdb_file_tmp)
f = open(bl[1], "r")
for line in f:
cdb_file.add(line.strip("\n"), "True")
cdb_file.finish()
lib.append(bl_cdb_file)
self.cache = lib
config = PySquidBlacklistsConfig()
config.get_config()
bli = PySquidBlacklistsImporter(config)
bl = PySquidBlacklists(config, bli)
bl = PySquidBlacklistsRunner(config, bli)
bl.loop()