mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Merge branch 'master' into qa
* master: make trough dependency optional bump version, trough dep version Add port to custom WARC filename vars
This commit is contained in:
commit
f12960cf4d
4
setup.py
4
setup.py
@ -35,7 +35,6 @@ deps = [
|
|||||||
'idna>=2.5',
|
'idna>=2.5',
|
||||||
'PyYAML>=5.1',
|
'PyYAML>=5.1',
|
||||||
'cachetools',
|
'cachetools',
|
||||||
'trough>=0.1.2',
|
|
||||||
]
|
]
|
||||||
try:
|
try:
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
@ -44,7 +43,7 @@ except:
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='warcprox',
|
name='warcprox',
|
||||||
version='2.4.24',
|
version='2.4.26',
|
||||||
description='WARC writing MITM HTTP/S proxy',
|
description='WARC writing MITM HTTP/S proxy',
|
||||||
url='https://github.com/internetarchive/warcprox',
|
url='https://github.com/internetarchive/warcprox',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
@ -53,6 +52,7 @@ setuptools.setup(
|
|||||||
license='GPL',
|
license='GPL',
|
||||||
packages=['warcprox'],
|
packages=['warcprox'],
|
||||||
install_requires=deps,
|
install_requires=deps,
|
||||||
|
extras_require={'trough': ['trough>=0.1.4',],},
|
||||||
setup_requires=['pytest-runner'],
|
setup_requires=['pytest-runner'],
|
||||||
tests_require=['mock', 'pytest', 'warcio'],
|
tests_require=['mock', 'pytest', 'warcio'],
|
||||||
entry_points={
|
entry_points={
|
||||||
|
@ -506,7 +506,14 @@ class TroughDedupDb(DedupDb, DedupableMixin):
|
|||||||
'values (%s, %s, %s, %s);')
|
'values (%s, %s, %s, %s);')
|
||||||
|
|
||||||
def __init__(self, options=warcprox.Options()):
|
def __init__(self, options=warcprox.Options()):
|
||||||
import trough.client
|
try:
|
||||||
|
import trough.client
|
||||||
|
except ImportError as e:
|
||||||
|
logging.critical(
|
||||||
|
'%s: %s\n\nYou might need to run "pip install '
|
||||||
|
'warcprox[trough]".', type(e).__name__, e)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
DedupableMixin.__init__(self, options)
|
DedupableMixin.__init__(self, options)
|
||||||
self.options = options
|
self.options = options
|
||||||
self._trough_cli = trough.client.TroughClient(
|
self._trough_cli = trough.client.TroughClient(
|
||||||
|
@ -93,7 +93,7 @@ def _build_arg_parser(prog='warcprox', show_hidden=False):
|
|||||||
default='./warcs', help='where to write warcs')
|
default='./warcs', help='where to write warcs')
|
||||||
arg_parser.add_argument('--warc-filename', dest='warc_filename',
|
arg_parser.add_argument('--warc-filename', dest='warc_filename',
|
||||||
default='{prefix}-{timestamp17}-{serialno}-{randomtoken}',
|
default='{prefix}-{timestamp17}-{serialno}-{randomtoken}',
|
||||||
help='define custom WARC filename with variables {prefix}, {timestamp14}, {timestamp17}, {serialno}, {randomtoken}, {hostname}, {shorthostname}')
|
help='define custom WARC filename with variables {prefix}, {timestamp14}, {timestamp17}, {serialno}, {randomtoken}, {hostname}, {shorthostname}, {port}')
|
||||||
arg_parser.add_argument('-z', '--gzip', dest='gzip', action='store_true',
|
arg_parser.add_argument('-z', '--gzip', dest='gzip', action='store_true',
|
||||||
help='write gzip-compressed warc records')
|
help='write gzip-compressed warc records')
|
||||||
hidden.add_argument(
|
hidden.add_argument(
|
||||||
|
@ -51,6 +51,7 @@ class WarcWriter:
|
|||||||
self.finalname = None
|
self.finalname = None
|
||||||
self.gzip = options.gzip or False
|
self.gzip = options.gzip or False
|
||||||
self.prefix = options.prefix or 'warcprox'
|
self.prefix = options.prefix or 'warcprox'
|
||||||
|
self.port = options.port or 8000
|
||||||
self.open_suffix = '' if options.no_warc_open_suffix else '.open'
|
self.open_suffix = '' if options.no_warc_open_suffix else '.open'
|
||||||
self.rollover_size = options.rollover_size or 1000000000
|
self.rollover_size = options.rollover_size or 1000000000
|
||||||
self.rollover_idle_time = options.rollover_idle_time or None
|
self.rollover_idle_time = options.rollover_idle_time or None
|
||||||
@ -67,7 +68,7 @@ class WarcWriter:
|
|||||||
"""WARC filename is configurable with CLI parameter --warc-filename.
|
"""WARC filename is configurable with CLI parameter --warc-filename.
|
||||||
Default: '{prefix}-{timestamp17}-{randomtoken}-{serialno}'
|
Default: '{prefix}-{timestamp17}-{randomtoken}-{serialno}'
|
||||||
Available variables are: prefix, timestamp14, timestamp17, serialno,
|
Available variables are: prefix, timestamp14, timestamp17, serialno,
|
||||||
randomtoken, hostname, shorthostname.
|
randomtoken, hostname, shorthostname, port.
|
||||||
Extension ``.warc`` or ``.warc.gz`` is appended automatically.
|
Extension ``.warc`` or ``.warc.gz`` is appended automatically.
|
||||||
"""
|
"""
|
||||||
hostname = socket.getfqdn()
|
hostname = socket.getfqdn()
|
||||||
@ -77,7 +78,7 @@ class WarcWriter:
|
|||||||
timestamp17=warcprox.timestamp17(),
|
timestamp17=warcprox.timestamp17(),
|
||||||
serialno='{:05d}'.format(serial),
|
serialno='{:05d}'.format(serial),
|
||||||
randomtoken=self.randomtoken, hostname=hostname,
|
randomtoken=self.randomtoken, hostname=hostname,
|
||||||
shorthostname=shorthostname)
|
shorthostname=shorthostname, port=self.port)
|
||||||
if self.gzip:
|
if self.gzip:
|
||||||
fname = fname + '.warc.gz'
|
fname = fname + '.warc.gz'
|
||||||
else:
|
else:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user