From ca0197330de282f762c819675d327348b99a19b0 Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Wed, 8 Jan 2020 21:19:48 +0000 Subject: [PATCH 1/3] Add port to custom WARC filename vars --- warcprox/main.py | 2 +- warcprox/writer.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/warcprox/main.py b/warcprox/main.py index 3afc1bb..2ba41b3 100644 --- a/warcprox/main.py +++ b/warcprox/main.py @@ -93,7 +93,7 @@ def _build_arg_parser(prog='warcprox', show_hidden=False): default='./warcs', help='where to write warcs') arg_parser.add_argument('--warc-filename', dest='warc_filename', default='{prefix}-{timestamp17}-{serialno}-{randomtoken}', - help='define custom WARC filename with variables {prefix}, {timestamp14}, {timestamp17}, {serialno}, {randomtoken}, {hostname}, {shorthostname}') + help='define custom WARC filename with variables {prefix}, {timestamp14}, {timestamp17}, {serialno}, {randomtoken}, {hostname}, {shorthostname}, {port}') arg_parser.add_argument('-z', '--gzip', dest='gzip', action='store_true', help='write gzip-compressed warc records') hidden.add_argument( diff --git a/warcprox/writer.py b/warcprox/writer.py index cc44be2..6926939 100644 --- a/warcprox/writer.py +++ b/warcprox/writer.py @@ -51,6 +51,7 @@ class WarcWriter: self.finalname = None self.gzip = options.gzip or False self.prefix = options.prefix or 'warcprox' + self.port = options.port or 8000 self.open_suffix = '' if options.no_warc_open_suffix else '.open' self.rollover_size = options.rollover_size or 1000000000 self.rollover_idle_time = options.rollover_idle_time or None @@ -67,7 +68,7 @@ class WarcWriter: """WARC filename is configurable with CLI parameter --warc-filename. Default: '{prefix}-{timestamp17}-{randomtoken}-{serialno}' Available variables are: prefix, timestamp14, timestamp17, serialno, - randomtoken, hostname, shorthostname. + randomtoken, hostname, shorthostname, port. Extension ``.warc`` or ``.warc.gz`` is appended automatically. """ hostname = socket.getfqdn() @@ -77,7 +78,7 @@ class WarcWriter: timestamp17=warcprox.timestamp17(), serialno='{:05d}'.format(serial), randomtoken=self.randomtoken, hostname=hostname, - shorthostname=shorthostname) + shorthostname=shorthostname, port=self.port) if self.gzip: fname = fname + '.warc.gz' else: From a8cd53bfe4d0e072d9fef61932600becbbd529a5 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Wed, 8 Jan 2020 13:24:00 -0800 Subject: [PATCH 2/3] bump version, trough dep version --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 9881520..ae29848 100755 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ deps = [ 'idna>=2.5', 'PyYAML>=5.1', 'cachetools', - 'trough>=0.1.2', + 'trough>=0.1.4', ] try: import concurrent.futures @@ -44,7 +44,7 @@ except: setuptools.setup( name='warcprox', - version='2.4.24', + version='2.4.25', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', From 90fba0151498bea404529900a529df7db8e3f35a Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Wed, 8 Jan 2020 13:37:01 -0800 Subject: [PATCH 3/3] make trough dependency optional --- setup.py | 4 ++-- warcprox/dedup.py | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index ae29848..96089bb 100755 --- a/setup.py +++ b/setup.py @@ -35,7 +35,6 @@ deps = [ 'idna>=2.5', 'PyYAML>=5.1', 'cachetools', - 'trough>=0.1.4', ] try: import concurrent.futures @@ -44,7 +43,7 @@ except: setuptools.setup( name='warcprox', - version='2.4.25', + version='2.4.26', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', @@ -53,6 +52,7 @@ setuptools.setup( license='GPL', packages=['warcprox'], install_requires=deps, + extras_require={'trough': ['trough>=0.1.4',],}, setup_requires=['pytest-runner'], tests_require=['mock', 'pytest', 'warcio'], entry_points={ diff --git a/warcprox/dedup.py b/warcprox/dedup.py index 8227aa7..0181019 100644 --- a/warcprox/dedup.py +++ b/warcprox/dedup.py @@ -506,7 +506,14 @@ class TroughDedupDb(DedupDb, DedupableMixin): 'values (%s, %s, %s, %s);') def __init__(self, options=warcprox.Options()): - import trough.client + try: + import trough.client + except ImportError as e: + logging.critical( + '%s: %s\n\nYou might need to run "pip install ' + 'warcprox[trough]".', type(e).__name__, e) + sys.exit(1) + DedupableMixin.__init__(self, options) self.options = options self._trough_cli = trough.client.TroughClient(