From 6e6b43eb795652f8c0f489ac3e2abef6364481cf Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Wed, 20 Mar 2019 11:53:32 +0000 Subject: [PATCH 1/5] Add option to load logging conf from JSON file New option `--logging-conf-file` to load `logging` conf from a JSON file. Prefer JSON over the `configparser` format supported by `logging.config.fileConfig` because JSON format is much better (nesting is supported) and its easier to detect errors. --- warcprox/main.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/warcprox/main.py b/warcprox/main.py index 7fe5011..98867b6 100644 --- a/warcprox/main.py +++ b/warcprox/main.py @@ -29,7 +29,9 @@ try: except ImportError: import Queue as queue +import json import logging +import logging.config import sys import hashlib import argparse @@ -239,6 +241,9 @@ def _build_arg_parser(prog='warcprox', show_hidden=False): arg_parser.add_argument( '--trace', dest='trace', action='store_true', help='very verbose logging') + arg_parser.add_argument( + '--logging-conf-file', dest='logging_conf_file', default=None, + help=('reads logging configuration from a JSON file')) arg_parser.add_argument( '--version', action='version', version="warcprox {}".format(warcprox.__version__)) @@ -302,6 +307,11 @@ def main(argv=None): '%(asctime)s %(process)d %(levelname)s %(threadName)s ' '%(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')) + if args.logging_conf_file: + with open(args.logging_conf_file, 'r') as fd: + conf = json.load(fd) + logging.config.dictConfig(conf) + # see https://github.com/pyca/cryptography/issues/2911 cryptography.hazmat.backends.openssl.backend.activate_builtin_random() From 878ab0977f17f46dc981699cd6ca617b576b5994 Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Thu, 21 Mar 2019 19:18:55 +0000 Subject: [PATCH 2/5] Use YAML instead of JSON Add PyYAML<=3.13 dependency. --- setup.py | 1 + warcprox/main.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 4fa0885..6274eeb 100755 --- a/setup.py +++ b/setup.py @@ -33,6 +33,7 @@ deps = [ 'PySocks>=1.6.8', 'cryptography>=2.3', 'idna>=2.5', + 'PyYAML<=3.13', ] try: import concurrent.futures diff --git a/warcprox/main.py b/warcprox/main.py index 98867b6..06d8bfc 100644 --- a/warcprox/main.py +++ b/warcprox/main.py @@ -29,7 +29,6 @@ try: except ImportError: import Queue as queue -import json import logging import logging.config import sys @@ -41,6 +40,7 @@ import traceback import signal import threading import certauth.certauth +import yaml import warcprox import doublethink import cryptography.hazmat.backends.openssl @@ -243,7 +243,7 @@ def _build_arg_parser(prog='warcprox', show_hidden=False): help='very verbose logging') arg_parser.add_argument( '--logging-conf-file', dest='logging_conf_file', default=None, - help=('reads logging configuration from a JSON file')) + help=('reads logging configuration from a YAML file')) arg_parser.add_argument( '--version', action='version', version="warcprox {}".format(warcprox.__version__)) @@ -309,7 +309,7 @@ def main(argv=None): if args.logging_conf_file: with open(args.logging_conf_file, 'r') as fd: - conf = json.load(fd) + conf = yaml.load(fd) logging.config.dictConfig(conf) # see https://github.com/pyca/cryptography/issues/2911 From 436a27b19e12ac831e3d511fd4541fc56902c7d5 Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Thu, 21 Mar 2019 19:34:52 +0000 Subject: [PATCH 3/5] Upgrade PyYAML to >=5.1 --- setup.py | 2 +- warcprox/main.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 6274eeb..56e8390 100755 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ deps = [ 'PySocks>=1.6.8', 'cryptography>=2.3', 'idna>=2.5', - 'PyYAML<=3.13', + 'PyYAML>=5.1', ] try: import concurrent.futures diff --git a/warcprox/main.py b/warcprox/main.py index 06d8bfc..0ef5c58 100644 --- a/warcprox/main.py +++ b/warcprox/main.py @@ -309,7 +309,7 @@ def main(argv=None): if args.logging_conf_file: with open(args.logging_conf_file, 'r') as fd: - conf = yaml.load(fd) + conf = yaml.safe_load(fd) logging.config.dictConfig(conf) # see https://github.com/pyca/cryptography/issues/2911 From 1e0a0ca63ac924e4a62d72000796b57d8dac85aa Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 21 Mar 2019 12:38:29 -0700 Subject: [PATCH 4/5] every change is a point release now --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 56e8390..f9916ae 100755 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ except: setuptools.setup( name='warcprox', - version='2.4b7.dev197', + version='2.4.0', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', From cb2a07bff2e6df60171274de93d0760f0d8f8e58 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 21 Mar 2019 12:59:32 -0700 Subject: [PATCH 5/5] account for surt fix in urlcanon 0.3.0 --- setup.py | 2 +- tests/test_warcprox.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index f9916ae..d3e73cd 100755 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ import setuptools deps = [ 'certauth==1.1.6', 'warctools>=4.10.0', - 'urlcanon>=0.1.dev16', + 'urlcanon>=0.3.0', 'doublethink>=0.2.0.dev87', 'urllib3>=1.14', 'requests>=2.0.1', diff --git a/tests/test_warcprox.py b/tests/test_warcprox.py index 7c6d21a..3c38d4e 100755 --- a/tests/test_warcprox.py +++ b/tests/test_warcprox.py @@ -965,12 +965,12 @@ def test_block_rules(http_daemon, https_daemon, warcprox_, archiving_proxies): }, { "url_match": "SURT_MATCH", - "value": "http://(localhost:%s,)/fuh/" % (http_daemon.server_port), + "value": "http://(localhost,:%s)/fuh/" % (http_daemon.server_port), }, { "url_match": "SURT_MATCH", # this rule won't match because of http scheme, https port - "value": "http://(localhost:%s,)/fuh/" % (https_daemon.server_port), + "value": "http://(localhost,:%s)/fuh/" % (https_daemon.server_port), }, { "domain": "bad.domain.com",