Classes added (wip)

This commit is contained in:
Paul 2016-02-20 14:46:12 +01:00
parent 43c6164e10
commit 58945f0fa4
4 changed files with 90 additions and 63 deletions

View File

@ -1,7 +1,7 @@
# py-squid-blacklists # py-squid-blacklists
Squid helper handling squidguard blacklists written in python Squid helper handling squidguard blacklists written in python
* Only supports domains blacklists actually (ie : google.com, www.google.com, api.google.com, etc.) * Only supports domains blacklists actually (ie : google.com, www.google.com, mail.google.com, etc.)
* In config specified blacklists are loaded in RAM or CDB backend using https://github.com/acg/python-cdb (testing flavour is available) * In config specified blacklists are loaded in RAM or CDB backend using https://github.com/acg/python-cdb (testing flavour is available)
* Usable as an external acl plugin of squid * Usable as an external acl plugin of squid
* Written because of poor developpement on squidguard and bad support of blacklists files using squid3 * Written because of poor developpement on squidguard and bad support of blacklists files using squid3
@ -20,23 +20,25 @@ http_access deny urlblacklist
config.py file must be include following statements config.py file must be include following statements
``` ```
blacklists_fetch = "http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz" url = "http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz"
blacklists_dir = "/usr/local/py-squid-blacklists/blacklists/" base_dir = "/usr/local/py-squid-blacklists/blacklists/"
blacklists = ["adult","malware"] categories = ["adult","malware"]
db_backend = "ram"
``` ```
* blacklists_fetch : squidguard-like blacklists files, this variable is not already usable * url : squidguard-like blacklists files, this variable is not already usable
* blacklists_dir : path containing blacklists files * categories : blacklists to use for filtering
* blacklists : blacklists to use for filtering * base_dir : path containing blacklists files
* db_backend : database flavour (ram|cdb)
## TODO ## TODO
* Auto-fetcher using blacklists_fetch if blacklists are not already downloaded or stored on the squid machine * Auto-fetcher using url if blacklists are not already downloaded or stored on the squid machine
* Compatibility with python3 only * Compatibility with python3 only
* Filters for regex urls * Filters for regex urls
* Reduce memory footprint (wip with CDB backend alternative) * Reduce memory footprint (wip with CDB backend alternative)
* Code optimisation (wip) * Code optimisation (wip)
* Object oriented programming * Object oriented programming (wip)
* Tests (wip) * Tests (wip)
* ... * ...

View File

@ -1,3 +1,11 @@
blacklists_fetch = "http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz" # url to retrieve blacklists
blacklists_dir = "/usr/local/py-squid-blacklists/blacklists/" url = "http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz"
blacklists = ["adult","malware"]
# categories
categories = ["adult","malware"]
# base directory for blacklists
base_dir = "/usr/local/py-squid-blacklists/blacklists/"
# ram | cdb
db_backend = "cdb"

View File

@ -3,7 +3,6 @@
import sys import sys
import os import os
import re import re
import urllib
from urlparse import urlparse from urlparse import urlparse
try: try:

View File

@ -3,7 +3,6 @@
import sys import sys
import os import os
import re import re
import urllib
from urlparse import urlparse from urlparse import urlparse
try: try:
@ -11,62 +10,81 @@ try:
except ImportError: except ImportError:
print("Please create config.py using config.py.sample") print("Please create config.py using config.py.sample")
exit() exit()
try:
import cdb
except ImportError:
print("Please install python-cdb from pypi or via package manager")
exit()
def make_list(files): class PySquidBlacklists:
blacklists = [] def __init__(self, config):
for l in files: self.db_backend = config.db_backend
splitlist = l.split("/") self.blacklist_categories = config.categories
list_type = splitlist[len(splitlist) - 2] self.domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(config.base_dir)) for f in
blacklists.append([list_type, l]) fn if re.match(r"domains*", f)]
return blacklists self.blacklist_files = self.make_list()
self.blacklist_cache = self.make_db()
def make_list(self):
blacklists = []
for l in self.domain_files:
splitlist = l.split("/")
list_type = splitlist[len(splitlist) - 2]
blacklists.append([list_type, l])
return blacklists
def make_db(self):
lib = dict()
for bls in self.blacklist_files:
if bls[0] in self.blacklist_categories:
cache = dict()
f = open(bls[1], "r")
for l in f:
cache[l.strip("\n")] = True
lib[bls[0]] = cache
del cache
return lib
@property
def initialize():
return True
def compare(self, outline):
result = False
for blacklist in self.blacklist_cache:
tmpline = outline
while not result and tmpline != "":
try:
result = self.blacklist_cache[blacklist][tmpline]
pass
except KeyError:
pass
tmpline = tmpline.partition('.')[2]
return result
@staticmethod
def response(r):
sys.stdout.write("%s\n" % r)
sys.stdout.flush()
def make_db(blacklist_files, config): class PySquidBlacklistsImporter:
lib = dict() def __init__(self, conf):
for bl in blacklist_files: self.test = True
if (bl[0] in config.blacklists): self.db = conf.db_backend
cache = dict()
f = open(bl[1], "r")
for line in f:
cache[line.strip("\n")] = True
lib[bl[0]] = cache
del cache
return lib
def compare(outline, blacklist_cache): bli = PySquidBlacklistsImporter(config)
result = False bl = PySquidBlacklists(config)
for blacklist in blacklist_cache:
tmpline = outline
while not result and tmpline != "":
try:
result = blacklist_cache[blacklist][tmpline]
pass
except KeyError:
pass
tmpline = tmpline.partition('.')[2]
return result
def squid_response(response):
sys.stdout.write("%s\n" % response)
sys.stdout.flush()
domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(config.blacklists_dir)) for f in fn if re.match(r"domains*", f)]
blacklist_files = make_list(domain_files)
blacklist_cache = make_db(blacklist_files, config)
while True: while True:
try: try:
line = sys.stdin.readline().strip() line = sys.stdin.readline().strip()
outline = urlparse(line).netloc outline = urlparse(line).netloc
if line: if line:
if compare(outline, blacklist_cache): if bl.compare(outline):
squid_response("OK") bl.response("OK")
else: else:
squid_response("ERR") bl.response("ERR")
except KeyboardInterrupt: except KeyboardInterrupt:
break break