mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Merge branch 'master' into qa
* master: make trough dependency optional bump version, trough dep version Add port to custom WARC filename vars
This commit is contained in:
commit
f12960cf4d
4
setup.py
4
setup.py
@ -35,7 +35,6 @@ deps = [
|
||||
'idna>=2.5',
|
||||
'PyYAML>=5.1',
|
||||
'cachetools',
|
||||
'trough>=0.1.2',
|
||||
]
|
||||
try:
|
||||
import concurrent.futures
|
||||
@ -44,7 +43,7 @@ except:
|
||||
|
||||
setuptools.setup(
|
||||
name='warcprox',
|
||||
version='2.4.24',
|
||||
version='2.4.26',
|
||||
description='WARC writing MITM HTTP/S proxy',
|
||||
url='https://github.com/internetarchive/warcprox',
|
||||
author='Noah Levitt',
|
||||
@ -53,6 +52,7 @@ setuptools.setup(
|
||||
license='GPL',
|
||||
packages=['warcprox'],
|
||||
install_requires=deps,
|
||||
extras_require={'trough': ['trough>=0.1.4',],},
|
||||
setup_requires=['pytest-runner'],
|
||||
tests_require=['mock', 'pytest', 'warcio'],
|
||||
entry_points={
|
||||
|
@ -506,7 +506,14 @@ class TroughDedupDb(DedupDb, DedupableMixin):
|
||||
'values (%s, %s, %s, %s);')
|
||||
|
||||
def __init__(self, options=warcprox.Options()):
|
||||
import trough.client
|
||||
try:
|
||||
import trough.client
|
||||
except ImportError as e:
|
||||
logging.critical(
|
||||
'%s: %s\n\nYou might need to run "pip install '
|
||||
'warcprox[trough]".', type(e).__name__, e)
|
||||
sys.exit(1)
|
||||
|
||||
DedupableMixin.__init__(self, options)
|
||||
self.options = options
|
||||
self._trough_cli = trough.client.TroughClient(
|
||||
|
@ -93,7 +93,7 @@ def _build_arg_parser(prog='warcprox', show_hidden=False):
|
||||
default='./warcs', help='where to write warcs')
|
||||
arg_parser.add_argument('--warc-filename', dest='warc_filename',
|
||||
default='{prefix}-{timestamp17}-{serialno}-{randomtoken}',
|
||||
help='define custom WARC filename with variables {prefix}, {timestamp14}, {timestamp17}, {serialno}, {randomtoken}, {hostname}, {shorthostname}')
|
||||
help='define custom WARC filename with variables {prefix}, {timestamp14}, {timestamp17}, {serialno}, {randomtoken}, {hostname}, {shorthostname}, {port}')
|
||||
arg_parser.add_argument('-z', '--gzip', dest='gzip', action='store_true',
|
||||
help='write gzip-compressed warc records')
|
||||
hidden.add_argument(
|
||||
|
@ -51,6 +51,7 @@ class WarcWriter:
|
||||
self.finalname = None
|
||||
self.gzip = options.gzip or False
|
||||
self.prefix = options.prefix or 'warcprox'
|
||||
self.port = options.port or 8000
|
||||
self.open_suffix = '' if options.no_warc_open_suffix else '.open'
|
||||
self.rollover_size = options.rollover_size or 1000000000
|
||||
self.rollover_idle_time = options.rollover_idle_time or None
|
||||
@ -67,7 +68,7 @@ class WarcWriter:
|
||||
"""WARC filename is configurable with CLI parameter --warc-filename.
|
||||
Default: '{prefix}-{timestamp17}-{randomtoken}-{serialno}'
|
||||
Available variables are: prefix, timestamp14, timestamp17, serialno,
|
||||
randomtoken, hostname, shorthostname.
|
||||
randomtoken, hostname, shorthostname, port.
|
||||
Extension ``.warc`` or ``.warc.gz`` is appended automatically.
|
||||
"""
|
||||
hostname = socket.getfqdn()
|
||||
@ -77,7 +78,7 @@ class WarcWriter:
|
||||
timestamp17=warcprox.timestamp17(),
|
||||
serialno='{:05d}'.format(serial),
|
||||
randomtoken=self.randomtoken, hostname=hostname,
|
||||
shorthostname=shorthostname)
|
||||
shorthostname=shorthostname, port=self.port)
|
||||
if self.gzip:
|
||||
fname = fname + '.warc.gz'
|
||||
else:
|
||||
|
Loading…
x
Reference in New Issue
Block a user