reworked py-squid-blacklists for python3, renamed pybl
This commit is contained in:
parent
09390850c4
commit
b08aac8b01
4
.gitignore
vendored
4
.gitignore
vendored
@ -65,6 +65,4 @@ target/
|
|||||||
blacklists
|
blacklists
|
||||||
blacklists.tar.gz
|
blacklists.tar.gz
|
||||||
py-squid-blacklists.conf
|
py-squid-blacklists.conf
|
||||||
|
pybl.conf
|
||||||
# Others
|
|
||||||
.idea/
|
|
23
LICENSE
23
LICENSE
@ -1,23 +0,0 @@
|
|||||||
Copyright (c) 2016, Paul Lecuq
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice, this
|
|
||||||
list of conditions and the following disclaimer.
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer in the documentation
|
|
||||||
and/or other materials provided with the distribution.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
||||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
||||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
||||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
||||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
59
README.md
59
README.md
@ -1,16 +1,19 @@
|
|||||||
# py-squid-blacklists
|
# pybl
|
||||||
|
|
||||||
Squid helper handling squidguard blacklists written in python
|
Squid helper handling squidguard blacklists written in python
|
||||||
|
|
||||||
* Only supports domains blacklists actually (ie : google.com, www.google.com, mail.google.com, etc.)
|
* Only supports domains blacklists actually (ie : google.com, www.google.com, mail.google.com, etc.)
|
||||||
* In config specified blacklists are loaded in RAM or CDB backend using https://github.com/acg/python-cdb
|
* In config specified blacklists are loaded in memory or CDB backend using https://github.com/bbayles/python-pure-cdb
|
||||||
* Usable as an external acl plugin of squid
|
* Usable as an external acl plugin for squid 3
|
||||||
* Written because of poor development on squidguard and some issues using blacklists on squid3
|
* Written because of poor development on squidguard and some issues using blacklists on squid
|
||||||
|
* Python 3 supported as of 2020
|
||||||
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
Add this configuration to squid.conf :
|
Add this configuration to squid.conf :
|
||||||
```
|
```
|
||||||
external_acl_type urlblacklist_lookup ttl=5 %URI /usr/bin/python /usr/local/py-squid-blacklists/py-squid-blacklists.py
|
external_acl_type urlblacklist_lookup ttl=5 %DST /usr/bin/python /usr/local/pybl/pybl.py
|
||||||
...
|
...
|
||||||
acl urlblacklist external urlblacklist_lookup
|
acl urlblacklist external urlblacklist_lookup
|
||||||
...
|
...
|
||||||
@ -20,15 +23,16 @@ http_access deny urlblacklist
|
|||||||
Config file must be include following statements
|
Config file must be include following statements
|
||||||
```
|
```
|
||||||
url = http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz
|
url = http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz
|
||||||
base_dir = /usr/local/py-squid-blacklists/
|
basedir = /usr/local/pybl/
|
||||||
categories = adult,malware
|
categories = adult,malware # categories are coma separated values
|
||||||
db_backend = cdb
|
backend = cdb
|
||||||
```
|
```
|
||||||
|
|
||||||
* url : squidguard-like blacklists files, this variable is not already usable
|
* url : squidguard-like blacklists files, this variable is not already usable
|
||||||
* base_dir : root path containing blacklists files, metadata (update datetime)
|
* basedir : root path containing blacklists files, metadata (update datetime)
|
||||||
* categories : blacklists to use for filtering
|
* categories : blacklists to use for filtering
|
||||||
* db_backend : database flavour (ram|cdb)
|
* backend : database flavour (ram|cdb)
|
||||||
|
|
||||||
|
|
||||||
## TODO
|
## TODO
|
||||||
|
|
||||||
@ -39,6 +43,7 @@ db_backend = cdb
|
|||||||
* Tests (wip)
|
* Tests (wip)
|
||||||
* ...
|
* ...
|
||||||
|
|
||||||
|
|
||||||
## DBs support ideas
|
## DBs support ideas
|
||||||
|
|
||||||
* High performance but heavy RAM usage when using dict()
|
* High performance but heavy RAM usage when using dict()
|
||||||
@ -46,7 +51,7 @@ db_backend = cdb
|
|||||||
* CDB backend seems to be as fast as attended, with a very small footprint
|
* CDB backend seems to be as fast as attended, with a very small footprint
|
||||||
|
|
||||||
|
|
||||||
## DBs Benchmarks
|
## DBs Benchmarks (2016)
|
||||||
|
|
||||||
RAM usage for one thread with categories ["adult","malware"]
|
RAM usage for one thread with categories ["adult","malware"]
|
||||||
|
|
||||||
@ -54,3 +59,35 @@ Debian 8 / python 2.7.9 / squid 3.4.8
|
|||||||
|
|
||||||
* ram : 90Mo
|
* ram : 90Mo
|
||||||
* cdb : 6Mo
|
* cdb : 6Mo
|
||||||
|
|
||||||
|
Ubuntu 20.04 / python 3.8.2 / squid 4.9
|
||||||
|
|
||||||
|
* ram : 249Mo
|
||||||
|
* cdb : 12Mo
|
||||||
|
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
Copyright (c) 2016, 2020 PaulBSD
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer in the documentation
|
||||||
|
and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
@ -1,39 +0,0 @@
|
|||||||
#!/usr/bin/env python2.7
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import tarfile
|
|
||||||
import urllib
|
|
||||||
|
|
||||||
from pysquidblacklists import PySquidBlacklistsConfig
|
|
||||||
|
|
||||||
print("Parsing configuration file ...")
|
|
||||||
config = PySquidBlacklistsConfig()
|
|
||||||
config.get_config()
|
|
||||||
|
|
||||||
|
|
||||||
def download(url, path):
|
|
||||||
bl_file = urllib.URLopener()
|
|
||||||
bl_file.retrieve(url, path)
|
|
||||||
|
|
||||||
|
|
||||||
def extract(base_dir, archive):
|
|
||||||
if not os.path.isdir(base_dir):
|
|
||||||
bl_file = tarfile.open(archive)
|
|
||||||
bl_file.extractall(base_dir)
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def usage():
|
|
||||||
print("tool.py import : import blacklists using config file")
|
|
||||||
|
|
||||||
|
|
||||||
if len(sys.argv) > 1:
|
|
||||||
if sys.argv[1] == "import":
|
|
||||||
print("Retrieving %s, storing it to %s ..." % (config.url, config.archive))
|
|
||||||
download(config.url, config.archive)
|
|
||||||
print("Extracting blacklists to %s" % config.base_dir)
|
|
||||||
extract(config.base_dir, config.archive)
|
|
||||||
else:
|
|
||||||
print(usage())
|
|
@ -1,9 +0,0 @@
|
|||||||
#!/usr/bin/env python2.7
|
|
||||||
from pysquidblacklists import *
|
|
||||||
|
|
||||||
config = PySquidBlacklistsConfig()
|
|
||||||
config.get_config(__file__)
|
|
||||||
|
|
||||||
bli = PySquidBlacklistsImporter(config)
|
|
||||||
bl = PySquidBlacklistsRunner(config, bli)
|
|
||||||
bl.loop()
|
|
12
pybl-cmd.py
Executable file
12
pybl-cmd.py
Executable file
@ -0,0 +1,12 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from pybl import *
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
config = PyBLConfig()
|
||||||
|
config.getconfig(__file__)
|
||||||
|
|
||||||
|
bli = PyBLImporter(config)
|
||||||
|
bl = PyBLRunner(config, bli)
|
||||||
|
bl.loop()
|
48
pybl-tool.py
Executable file
48
pybl-tool.py
Executable file
@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from pybl import PyBLConfig
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import tarfile
|
||||||
|
import urllib.request
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
|
def download(url, path):
|
||||||
|
try:
|
||||||
|
print("Retrieving {}, storing it to {} ...".format(config.url, config.archive))
|
||||||
|
filename, _ = urllib.request.urlretrieve(url, filename=path)
|
||||||
|
print("{} Successfully downloaded".format(filename))
|
||||||
|
except Exception as exp:
|
||||||
|
print(exp)
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def folder_exists(basedir):
|
||||||
|
ret = os.path.isdir(basedir)
|
||||||
|
print("Destination folder {} already exists".format(basedir))
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def extract(basedir, archive):
|
||||||
|
print("Extracting blacklists to {} ...".format(config.basedir))
|
||||||
|
bl_file = tarfile.open(archive)
|
||||||
|
bl_file.extractall(basedir)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description='Action')
|
||||||
|
parser.add_argument('--config', metavar='config', type=str, nargs=1, default='pybl.conf', help='path to config file')
|
||||||
|
parser.add_argument('--force', action="store_true", help='force download and extract archive')
|
||||||
|
parser.add_argument('action', metavar='action', type=str, help='action blacklists using config file', choices=["import"])
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.action == "import":
|
||||||
|
config = PyBLConfig()
|
||||||
|
print("Parsing configuration file ...")
|
||||||
|
config.getconfig(args.config[0])
|
||||||
|
exists = folder_exists(config.basedir)
|
||||||
|
if not exists or args.force:
|
||||||
|
download(config.url, config.archive)
|
||||||
|
extract(config.basedir, config.archive)
|
@ -6,7 +6,7 @@ url = http://dsi.ut-capitole.fr/blacklists/download/blacklists.tar.gz
|
|||||||
categories = adult,malware
|
categories = adult,malware
|
||||||
|
|
||||||
# base directory for blacklist extraction
|
# base directory for blacklist extraction
|
||||||
base_dir = /usr/local/py-squid-blacklists/
|
basedir = /usr/local/py-squid-blacklists/
|
||||||
|
|
||||||
# ram | cdb
|
# ram | cdb
|
||||||
db_backend = cdb
|
backend = cdb
|
47
pybl/PyBLConfig.py
Normal file
47
pybl/PyBLConfig.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
import configparser
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
class PyBLConfig:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.default_filename = "pybl.conf"
|
||||||
|
self.default_config_path = "/etc/{}".format(self.default_filename)
|
||||||
|
self.url = None
|
||||||
|
self.filename = None
|
||||||
|
self.basedir = ""
|
||||||
|
self.blacklistsdir = ""
|
||||||
|
self.archive = ""
|
||||||
|
self.backend = ""
|
||||||
|
self.categories = None
|
||||||
|
self.config = configparser.RawConfigParser()
|
||||||
|
self.config_path = None
|
||||||
|
|
||||||
|
|
||||||
|
def getconfig(self, pwd):
|
||||||
|
self.getpath(pwd)
|
||||||
|
self.config.read(self.config_path)
|
||||||
|
self.url = str(self.config.get("main", "url"))
|
||||||
|
self.filename = self.url.split("/").pop()
|
||||||
|
self.basedir = str(self.config.get("main", "basedir"))
|
||||||
|
self.blacklistsdir = "{}/blacklists".format(self.basedir)
|
||||||
|
self.archive = "{}/{}".format("/tmp", self.filename)
|
||||||
|
self.backend = str(self.config.get("main", "backend"))
|
||||||
|
self.categories = []
|
||||||
|
for cat in self.config.get("main", "categories").split(","):
|
||||||
|
self.categories.append(str(cat))
|
||||||
|
|
||||||
|
|
||||||
|
def setconfig(self, section, attr):
|
||||||
|
self.config.set(section, attr)
|
||||||
|
|
||||||
|
|
||||||
|
def getpath(self, pwd):
|
||||||
|
config_path = "{}/{}".format(os.path.dirname(os.path.abspath(pwd)), self.default_filename)
|
||||||
|
|
||||||
|
if os.path.exists(config_path):
|
||||||
|
self.config_path = config_path
|
||||||
|
elif os.path.exists(self.default_config_path):
|
||||||
|
self.config_path = self.default_config_path
|
||||||
|
else:
|
||||||
|
sys.exit("No config file available at common paths (current dir or /etc). Must initialize it")
|
71
pybl/PyBLImporter.py
Normal file
71
pybl/PyBLImporter.py
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
import os
|
||||||
|
import re
|
||||||
|
import cdblib.compat as cdb
|
||||||
|
|
||||||
|
|
||||||
|
class PyBLImporter:
|
||||||
|
|
||||||
|
def __init__(self, config):
|
||||||
|
"""Importer initializer"""
|
||||||
|
self.backend = config.backend
|
||||||
|
self.categories = config.categories
|
||||||
|
self.basedir = config.basedir
|
||||||
|
self.blacklistsdir = config.blacklistsdir
|
||||||
|
if os.path.isdir(self.basedir):
|
||||||
|
self.domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(self.blacklistsdir)) for f in fn if re.match(r"domains$", f)]
|
||||||
|
else:
|
||||||
|
exit("blacklistsdir doesn't exists. Please update using pybl-tool.py")
|
||||||
|
self.blacklist_files = self.makelists()
|
||||||
|
self.cache = None
|
||||||
|
if self.backend == "ram":
|
||||||
|
self.makeram()
|
||||||
|
elif self.backend == "cdb":
|
||||||
|
self.makecdb()
|
||||||
|
|
||||||
|
|
||||||
|
def makelists(self):
|
||||||
|
"""Create blacklists of domains"""
|
||||||
|
blacklists = []
|
||||||
|
for l in self.domain_files:
|
||||||
|
splitlist = l.split("/")
|
||||||
|
list_type = splitlist[len(splitlist) - 2]
|
||||||
|
blacklists.append([list_type, l])
|
||||||
|
return blacklists
|
||||||
|
|
||||||
|
|
||||||
|
def makeram(self):
|
||||||
|
"""Make dict based in-memory database"""
|
||||||
|
lib = dict()
|
||||||
|
for bls in self.blacklist_files:
|
||||||
|
cat = bls[0]
|
||||||
|
if cat in self.categories:
|
||||||
|
blc = dict()
|
||||||
|
f = open(bls[1], 'r')
|
||||||
|
for l in f:
|
||||||
|
blc[l.strip("\n")] = True
|
||||||
|
lib[cat] = blc
|
||||||
|
del blc
|
||||||
|
self.cache = lib
|
||||||
|
|
||||||
|
|
||||||
|
def makecdb(self):
|
||||||
|
"""Make CDB database"""
|
||||||
|
lib = []
|
||||||
|
for bl in self.blacklist_files:
|
||||||
|
bl_cdb_file = "{}/{}.cdb".format(self.basedir, bl[0])
|
||||||
|
bl_cdb_file_tmp = "{}/{}.tmp".format(self.basedir, bl[0])
|
||||||
|
if bl[0] in self.categories:
|
||||||
|
if not os.path.isfile(bl_cdb_file):
|
||||||
|
cdb_file = cdb.cdbmake(bl_cdb_file, bl_cdb_file_tmp)
|
||||||
|
f = open(bl[1], "r")
|
||||||
|
for l in f:
|
||||||
|
cdb_file.add(l.strip("\n"), "True")
|
||||||
|
cdb_file.finish()
|
||||||
|
lib.append(bl_cdb_file)
|
||||||
|
self.cache = lib
|
||||||
|
|
||||||
|
|
||||||
|
def makepickle(self):
|
||||||
|
"""Support for key value based PickleDB, not yet implemented"""
|
||||||
|
lib = []
|
||||||
|
return lib
|
72
pybl/PyBLRunner.py
Normal file
72
pybl/PyBLRunner.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import logging
|
||||||
|
import logging.handlers
|
||||||
|
import cdblib.compat as cdb
|
||||||
|
|
||||||
|
|
||||||
|
class PyBLRunner:
|
||||||
|
|
||||||
|
def __init__(self, config, bli):
|
||||||
|
self.basedir = config.basedir
|
||||||
|
self.backend = config.backend
|
||||||
|
self.cache = bli.cache
|
||||||
|
self.cdb_cache = dict()
|
||||||
|
if self.backend == "ram":
|
||||||
|
pass
|
||||||
|
elif self.backend == "cdb":
|
||||||
|
for blacklist in self.cache:
|
||||||
|
self.cdb_cache[blacklist] = cdb.init(blacklist)
|
||||||
|
self.loop()
|
||||||
|
|
||||||
|
|
||||||
|
def domaincompare(self, inputstring):
|
||||||
|
result = False
|
||||||
|
for blacklist in self.cache:
|
||||||
|
tmpline = inputstring
|
||||||
|
while not result and tmpline != "":
|
||||||
|
try:
|
||||||
|
if self.backend == "ram":
|
||||||
|
result = self.cache[blacklist][tmpline]
|
||||||
|
elif self.backend == "cdb":
|
||||||
|
result = self.cdb_cache[blacklist][tmpline]
|
||||||
|
pass
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
tmpline = tmpline.partition('.')[2]
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def getfqdn(self, url):
|
||||||
|
# newurl = urlparse(url)
|
||||||
|
# return newurl.netloc.rsplit(" -")[0]
|
||||||
|
return url.rsplit(" -")[0]
|
||||||
|
|
||||||
|
|
||||||
|
def loop(self):
|
||||||
|
while True:
|
||||||
|
my_logger = logging.getLogger('MyLogger')
|
||||||
|
my_logger.setLevel(logging.DEBUG)
|
||||||
|
handler = logging.handlers.SysLogHandler(address = '/dev/log')
|
||||||
|
my_logger.addHandler(handler)
|
||||||
|
|
||||||
|
try:
|
||||||
|
line = sys.stdin.readline().strip()
|
||||||
|
fqdn = self.getfqdn(line)
|
||||||
|
my_logger.critical("|{}|".format(fqdn))
|
||||||
|
if line == "":
|
||||||
|
exit()
|
||||||
|
if line:
|
||||||
|
if self.domaincompare(fqdn):
|
||||||
|
self.response("OK log='{} OK'".format(fqdn))
|
||||||
|
else:
|
||||||
|
self.response("ERR log='{} ERR'".format(fqdn))
|
||||||
|
except IOError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def response(r):
|
||||||
|
sys.stdout.write("{}\n".format(r))
|
||||||
|
sys.stdout.flush()
|
3
pybl/__init__.py
Normal file
3
pybl/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
from .PyBLRunner import PyBLRunner
|
||||||
|
from .PyBLImporter import PyBLImporter
|
||||||
|
from .PyBLConfig import PyBLConfig
|
@ -1,3 +0,0 @@
|
|||||||
from pysquidblacklists import PySquidBlacklistsRunner
|
|
||||||
from pysquidblacklists import PySquidBlacklistsImporter
|
|
||||||
from pysquidblacklists import PySquidBlacklistsConfig
|
|
@ -1,151 +0,0 @@
|
|||||||
import os
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
import configparser
|
|
||||||
import cdb
|
|
||||||
from urlparse import urlparse
|
|
||||||
|
|
||||||
|
|
||||||
class PySquidBlacklistsRunner:
|
|
||||||
def __init__(self, config, bli):
|
|
||||||
self.base_dir = config.base_dir
|
|
||||||
self.db_backend = config.db_backend
|
|
||||||
self.cache = bli.cache
|
|
||||||
self.cdb_cache = dict()
|
|
||||||
if self.db_backend == "ram":
|
|
||||||
pass
|
|
||||||
elif self.db_backend == "cdb":
|
|
||||||
for blacklist in self.cache:
|
|
||||||
self.cdb_cache[blacklist] = cdb.init(blacklist)
|
|
||||||
self.loop()
|
|
||||||
|
|
||||||
def domain_compare(self):
|
|
||||||
result = False
|
|
||||||
for blacklist in self.cache:
|
|
||||||
tmpline = self.outline
|
|
||||||
while not result and tmpline != "":
|
|
||||||
try:
|
|
||||||
if self.db_backend == "ram":
|
|
||||||
result = self.cache[blacklist][tmpline]
|
|
||||||
elif self.db_backend == "cdb":
|
|
||||||
result = self.cdb_cache[blacklist][tmpline]
|
|
||||||
pass
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
tmpline = tmpline.partition('.')[2]
|
|
||||||
return result
|
|
||||||
|
|
||||||
def loop(self):
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
line = sys.stdin.readline().strip()
|
|
||||||
if line == "":
|
|
||||||
exit()
|
|
||||||
self.outline = urlparse(line).netloc
|
|
||||||
if line:
|
|
||||||
if self.domain_compare():
|
|
||||||
self.response("OK")
|
|
||||||
else:
|
|
||||||
self.response("ERR")
|
|
||||||
except IOError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def response(r):
|
|
||||||
sys.stdout.write("%s\n" % r)
|
|
||||||
sys.stdout.flush()
|
|
||||||
|
|
||||||
|
|
||||||
class PySquidBlacklistsImporter:
|
|
||||||
def __init__(self, config):
|
|
||||||
self.db_backend = config.db_backend
|
|
||||||
self.categories = config.categories
|
|
||||||
self.base_dir = config.base_dir
|
|
||||||
self.blacklists_dir = config.blacklists_dir
|
|
||||||
if os.path.isdir(self.base_dir):
|
|
||||||
self.domain_files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(self.blacklists_dir)) for f in
|
|
||||||
fn if re.match(r"domains*", f)]
|
|
||||||
else:
|
|
||||||
exit("blacklists_dir doesn't exists. Please update using py-sb-tool.py")
|
|
||||||
self.blacklist_files = self.make_list()
|
|
||||||
self.cache = None
|
|
||||||
if self.db_backend == "ram":
|
|
||||||
self.make_ram_db()
|
|
||||||
elif self.db_backend == "cdb":
|
|
||||||
self.make_cdb_db()
|
|
||||||
|
|
||||||
def make_list(self):
|
|
||||||
blacklists = []
|
|
||||||
for l in self.domain_files:
|
|
||||||
splitlist = l.split("/")
|
|
||||||
list_type = splitlist[len(splitlist) - 2]
|
|
||||||
blacklists.append([list_type, l])
|
|
||||||
return blacklists
|
|
||||||
|
|
||||||
def make_ram_db(self):
|
|
||||||
lib = dict()
|
|
||||||
for bls in self.blacklist_files:
|
|
||||||
if bls[0] in self.categories:
|
|
||||||
blcache = dict()
|
|
||||||
f = open(bls[1], "r")
|
|
||||||
for l in f:
|
|
||||||
blcache[l.strip("\n")] = True
|
|
||||||
lib[bls[0]] = blcache
|
|
||||||
del blcache
|
|
||||||
self.cache = lib
|
|
||||||
|
|
||||||
def make_cdb_db(self):
|
|
||||||
lib = []
|
|
||||||
for bl in self.blacklist_files:
|
|
||||||
bl_cdb_file = ("%s/%s.cdb" % (self.base_dir, bl[0]))
|
|
||||||
bl_cdb_file_tmp = ("%s/%s.tmp" % (self.base_dir, bl[0]))
|
|
||||||
if bl[0] in self.categories:
|
|
||||||
if not os.path.isfile(bl_cdb_file):
|
|
||||||
cdb_file = cdb.cdbmake(bl_cdb_file, bl_cdb_file_tmp)
|
|
||||||
f = open(bl[1], "r")
|
|
||||||
for line in f:
|
|
||||||
cdb_file.add(line.strip("\n"), "True")
|
|
||||||
cdb_file.finish()
|
|
||||||
lib.append(bl_cdb_file)
|
|
||||||
self.cache = lib
|
|
||||||
|
|
||||||
|
|
||||||
class PySquidBlacklistsConfig:
|
|
||||||
def __init__(self):
|
|
||||||
self.url = None
|
|
||||||
self.filename = None
|
|
||||||
self.base_dir = None
|
|
||||||
self.blacklists_dir = None
|
|
||||||
self.archive = None
|
|
||||||
self.db_backend = None
|
|
||||||
self.categories = None
|
|
||||||
self.config = configparser.RawConfigParser()
|
|
||||||
self.config_path = None
|
|
||||||
|
|
||||||
def get_config(self, pwd):
|
|
||||||
self.get_path(pwd)
|
|
||||||
self.config.read(self.config_path)
|
|
||||||
self.url = str(self.config.get("main", "url"))
|
|
||||||
self.filename = self.url.split("/").pop()
|
|
||||||
self.base_dir = str(self.config.get("main", "base_dir"))
|
|
||||||
self.blacklists_dir = "%sblacklists" % self.base_dir
|
|
||||||
self.archive = "%s%s" % ("/tmp/", self.filename)
|
|
||||||
self.db_backend = str(self.config.get("main", "db_backend"))
|
|
||||||
self.categories = []
|
|
||||||
for cat in self.config.get("main", "categories").split(","):
|
|
||||||
self.categories.append(str(cat))
|
|
||||||
|
|
||||||
def set_config(self, section, attr):
|
|
||||||
self.config.set(section, attr)
|
|
||||||
|
|
||||||
def get_path(self, pwd):
|
|
||||||
filename = "py-squid-blacklists.conf"
|
|
||||||
config_path = "%s/%s" % (os.path.dirname(os.path.abspath(pwd)), filename)
|
|
||||||
default_config_path = "/etc/%s" % filename
|
|
||||||
|
|
||||||
if os.path.exists(config_path):
|
|
||||||
self.config_path = config_path
|
|
||||||
elif os.path.exists(default_config_path):
|
|
||||||
self.config_path = default_config_path
|
|
||||||
else:
|
|
||||||
exit("No config file available at common paths (current dir or /etc). Must initialize it")
|
|
Loading…
Reference in New Issue
Block a user