Code refactoring
3 classes : Runner, Importer, Config Improved speed using CDB Several files renamed ConfigParser object used for configuration
This commit is contained in:
parent
b7299fdb17
commit
ffa099a059
3
.gitignore
vendored
3
.gitignore
vendored
@ -61,7 +61,6 @@ target/
|
|||||||
#Ipython Notebook
|
#Ipython Notebook
|
||||||
.ipynb_checkpoints
|
.ipynb_checkpoints
|
||||||
|
|
||||||
config.py
|
|
||||||
blacklists
|
blacklists
|
||||||
blacklists.tar.gz
|
blacklists.tar.gz
|
||||||
pysquidblacklists/
|
py-squid-blacklists.conf
|
18
README.md
18
README.md
@ -4,7 +4,7 @@ Squid helper handling squidguard blacklists written in python
|
|||||||
* Only supports domains blacklists actually (ie : google.com, www.google.com, mail.google.com, etc.)
|
* Only supports domains blacklists actually (ie : google.com, www.google.com, mail.google.com, etc.)
|
||||||
* In config specified blacklists are loaded in RAM or CDB backend using https://github.com/acg/python-cdb
|
* In config specified blacklists are loaded in RAM or CDB backend using https://github.com/acg/python-cdb
|
||||||
* Usable as an external acl plugin of squid
|
* Usable as an external acl plugin of squid
|
||||||
* Written because of poor developpement on squidguard and some issues using blacklists on squid3
|
* Written because of poor development on squidguard and some issues using blacklists on squid3
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
@ -17,27 +17,25 @@ acl urlblacklist external urlblacklist_lookup
|
|||||||
http_access deny urlblacklist
|
http_access deny urlblacklist
|
||||||
```
|
```
|
||||||
|
|
||||||
config.py file must be include following statements
|
Config file must be include following statements
|
||||||
```
|
```
|
||||||
url = "http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz"
|
url = http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz
|
||||||
base_dir = "/usr/local/py-squid-blacklists/blacklists/"
|
base_dir = /usr/local/py-squid-blacklists/
|
||||||
categories = ["adult","malware"]
|
categories = adult,malware
|
||||||
db_backend = "ram"
|
db_backend = cdb
|
||||||
```
|
```
|
||||||
|
|
||||||
* url : squidguard-like blacklists files, this variable is not already usable
|
* url : squidguard-like blacklists files, this variable is not already usable
|
||||||
|
* base_dir : root path containing blacklists files, metadata (update datetime)
|
||||||
* categories : blacklists to use for filtering
|
* categories : blacklists to use for filtering
|
||||||
* base_dir : path containing blacklists files
|
|
||||||
* db_backend : database flavour (ram|cdb)
|
* db_backend : database flavour (ram|cdb)
|
||||||
|
|
||||||
## TODO
|
## TODO
|
||||||
|
|
||||||
* Auto-fetcher using url if blacklists are not already downloaded or stored on the squid machine
|
* Auto-fetcher using url if blacklists are not already downloaded or stored on the squid machine (wip)
|
||||||
* Compatibility with python3 only
|
* Compatibility with python3 only
|
||||||
* Filters for regex urls
|
* Filters for regex urls
|
||||||
* Reduce memory footprint (wip with CDB backend alternative)
|
|
||||||
* Code optimisation (profiling) and cleaning (wip)
|
* Code optimisation (profiling) and cleaning (wip)
|
||||||
* Object oriented programming (wip)
|
|
||||||
* Tests (wip)
|
* Tests (wip)
|
||||||
* ...
|
* ...
|
||||||
|
|
||||||
|
@ -1,11 +0,0 @@
|
|||||||
# url to retrieve blacklists
|
|
||||||
url = "http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz"
|
|
||||||
|
|
||||||
# categories
|
|
||||||
categories = ["adult", "malware"]
|
|
||||||
|
|
||||||
# base directory for blacklists
|
|
||||||
base_dir = "/usr/local/py-squid-blacklists/blacklists/"
|
|
||||||
|
|
||||||
# ram | cdb
|
|
||||||
db_backend = "cdb"
|
|
39
py-sb-tool.py
Executable file
39
py-sb-tool.py
Executable file
@ -0,0 +1,39 @@
|
|||||||
|
#!/usr/bin/env python2.7
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import tarfile
|
||||||
|
import urllib
|
||||||
|
|
||||||
|
from pysquidblacklists import PySquidBlacklistsConfig
|
||||||
|
|
||||||
|
print("Parsing configuration file ...")
|
||||||
|
config = PySquidBlacklistsConfig()
|
||||||
|
config.get_config()
|
||||||
|
|
||||||
|
|
||||||
|
def download(url, path):
|
||||||
|
bl_file = urllib.URLopener()
|
||||||
|
bl_file.retrieve(url, path)
|
||||||
|
|
||||||
|
|
||||||
|
def extract(base_dir, archive):
|
||||||
|
if not os.path.isdir(base_dir):
|
||||||
|
bl_file = tarfile.open(archive)
|
||||||
|
bl_file.extractall(base_dir)
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def usage():
|
||||||
|
print("tool.py import : import blacklists using config file")
|
||||||
|
|
||||||
|
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
if sys.argv[1] == "import":
|
||||||
|
print("Retrieving %s, storing it to %s ..." % (config.url, config.archive))
|
||||||
|
download(config.url, config.archive)
|
||||||
|
print("Extracting blacklists to %s" % config.base_dir)
|
||||||
|
extract(config.base_dir, config.archive)
|
||||||
|
else:
|
||||||
|
print(usage())
|
12
py-squid-blacklists.conf.sample
Normal file
12
py-squid-blacklists.conf.sample
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
[main]
|
||||||
|
# url to retrieve blacklists
|
||||||
|
url = http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz
|
||||||
|
|
||||||
|
# categories
|
||||||
|
categories = adult,malware
|
||||||
|
|
||||||
|
# base directory for blacklist extraction
|
||||||
|
base_dir = /usr/local/py-squid-blacklists/
|
||||||
|
|
||||||
|
# ram | cdb
|
||||||
|
db_backend = cdb
|
@ -1,121 +1,9 @@
|
|||||||
#!/usr/bin/env python2.7
|
#!/usr/bin/env python2.7
|
||||||
|
from pysquidblacklists import *
|
||||||
|
|
||||||
import sys
|
config = PySquidBlacklistsConfig()
|
||||||
import os
|
config.get_config()
|
||||||
import re
|
|
||||||
from urlparse import urlparse
|
|
||||||
|
|
||||||
try:
|
|
||||||
import config
|
|
||||||
except ImportError:
|
|
||||||
print("Please create config.py using config.py.sample")
|
|
||||||
exit()
|
|
||||||
try:
|
|
||||||
import cdb
|
|
||||||
except ImportError:
|
|
||||||
print("Please install python-cdb from pypi or via package manager")
|
|
||||||
exit()
|
|
||||||
|
|
||||||
|
|
||||||
class PySquidBlacklists:
|
|
||||||
def __init__(self, config, bli):
|
|
||||||
self.base_dir = config.base_dir
|
|
||||||
self.db_backend = config.db_backend
|
|
||||||
self.cache = bli.cache
|
|
||||||
self.cdb_cache = dict()
|
|
||||||
for blacklist in self.cache:
|
|
||||||
if self.db_backend == "ram":
|
|
||||||
pass
|
|
||||||
elif self.db_backend == "cdb":
|
|
||||||
self.cdb_cache[blacklist] = cdb.init(blacklist)
|
|
||||||
self.loop()
|
|
||||||
|
|
||||||
def domain_compare(self):
|
|
||||||
result = False
|
|
||||||
for blacklist in self.cache:
|
|
||||||
tmpline = self.outline
|
|
||||||
while not result and tmpline != "":
|
|
||||||
try:
|
|
||||||
if self.db_backend == "ram":
|
|
||||||
result = self.cache[blacklist][tmpline]
|
|
||||||
elif self.db_backend == "cdb":
|
|
||||||
result = self.cdb_cache[blacklist][tmpline]
|
|
||||||
pass
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
tmpline = tmpline.partition('.')[2]
|
|
||||||
return result
|
|
||||||
|
|
||||||
def loop(self):
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
line = sys.stdin.readline().strip()
|
|
||||||
if line == "":
|
|
||||||
exit()
|
|
||||||
self.outline = urlparse(line).netloc
|
|
||||||
if line:
|
|
||||||
if self.domain_compare():
|
|
||||||
self.response("OK")
|
|
||||||
else:
|
|
||||||
self.response("ERR")
|
|
||||||
except IOError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def response(r):
|
|
||||||
sys.stdout.write("%s\n" % r)
|
|
||||||
sys.stdout.flush()
|
|
||||||
|
|
||||||
|
|
||||||
class PySquidBlacklistsImporter:
|
|
||||||
def __init__(self, conf):
|
|
||||||
self.db_backend = config.db_backend
|
|
||||||
self.categories = config.categories
|
|
||||||
self.base_dir = config.base_dir
|
|
||||||
self.domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(self.base_dir)) for f in
|
|
||||||
fn if re.match(r"domains*", f)]
|
|
||||||
self.blacklist_files = self.make_list()
|
|
||||||
self.cache = None
|
|
||||||
if self.db_backend == "ram":
|
|
||||||
self.make_ram_db()
|
|
||||||
elif self.db_backend == "cdb":
|
|
||||||
self.make_cdb_db()
|
|
||||||
|
|
||||||
def make_list(self):
|
|
||||||
blacklists = []
|
|
||||||
for l in self.domain_files:
|
|
||||||
splitlist = l.split("/")
|
|
||||||
list_type = splitlist[len(splitlist) - 2]
|
|
||||||
blacklists.append([list_type, l])
|
|
||||||
return blacklists
|
|
||||||
|
|
||||||
def make_ram_db(self):
|
|
||||||
lib = dict()
|
|
||||||
for bls in self.blacklist_files:
|
|
||||||
if bls[0] in self.categories:
|
|
||||||
blcache = dict()
|
|
||||||
f = open(bls[1], "r")
|
|
||||||
for l in f:
|
|
||||||
blcache[l.strip("\n")] = True
|
|
||||||
lib[bls[0]] = blcache
|
|
||||||
del blcache
|
|
||||||
self.cache = lib
|
|
||||||
|
|
||||||
def make_cdb_db(self):
|
|
||||||
lib = []
|
|
||||||
for bl in self.blacklist_files:
|
|
||||||
bl_cdb_file = ("%s/%s.cdb" % (self.base_dir, bl[0]))
|
|
||||||
bl_cdb_file_tmp = ("%s/%s.tmp" % (self.base_dir, bl[0]))
|
|
||||||
if bl[0] in self.categories:
|
|
||||||
if not os.path.isfile(bl_cdb_file):
|
|
||||||
cdb_file = cdb.cdbmake(bl_cdb_file, bl_cdb_file_tmp)
|
|
||||||
f = open(bl[1], "r")
|
|
||||||
for line in f:
|
|
||||||
cdb_file.add(line.strip("\n"), "True")
|
|
||||||
cdb_file.finish()
|
|
||||||
lib.append(bl_cdb_file)
|
|
||||||
self.cache = lib
|
|
||||||
|
|
||||||
|
|
||||||
bli = PySquidBlacklistsImporter(config)
|
bli = PySquidBlacklistsImporter(config)
|
||||||
bl = PySquidBlacklists(config, bli)
|
bl = PySquidBlacklistsRunner(config, bli)
|
||||||
|
bl.loop()
|
Loading…
Reference in New Issue
Block a user