Merge pull request #146 from vbanos/warc-filename-port

Add port to custom WARC filename vars
This commit is contained in:
Noah Levitt 2020-01-08 13:22:50 -08:00 committed by GitHub
commit ee6bc151e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 4 additions and 3 deletions

View File

@ -93,7 +93,7 @@ def _build_arg_parser(prog='warcprox', show_hidden=False):
default='./warcs', help='where to write warcs')
arg_parser.add_argument('--warc-filename', dest='warc_filename',
default='{prefix}-{timestamp17}-{serialno}-{randomtoken}',
help='define custom WARC filename with variables {prefix}, {timestamp14}, {timestamp17}, {serialno}, {randomtoken}, {hostname}, {shorthostname}')
help='define custom WARC filename with variables {prefix}, {timestamp14}, {timestamp17}, {serialno}, {randomtoken}, {hostname}, {shorthostname}, {port}')
arg_parser.add_argument('-z', '--gzip', dest='gzip', action='store_true',
help='write gzip-compressed warc records')
hidden.add_argument(

View File

@ -51,6 +51,7 @@ class WarcWriter:
self.finalname = None
self.gzip = options.gzip or False
self.prefix = options.prefix or 'warcprox'
self.port = options.port or 8000
self.open_suffix = '' if options.no_warc_open_suffix else '.open'
self.rollover_size = options.rollover_size or 1000000000
self.rollover_idle_time = options.rollover_idle_time or None
@ -67,7 +68,7 @@ class WarcWriter:
"""WARC filename is configurable with CLI parameter --warc-filename.
Default: '{prefix}-{timestamp17}-{randomtoken}-{serialno}'
Available variables are: prefix, timestamp14, timestamp17, serialno,
randomtoken, hostname, shorthostname.
randomtoken, hostname, shorthostname, port.
Extension ``.warc`` or ``.warc.gz`` is appended automatically.
"""
hostname = socket.getfqdn()
@ -77,7 +78,7 @@ class WarcWriter:
timestamp17=warcprox.timestamp17(),
serialno='{:05d}'.format(serial),
randomtoken=self.randomtoken, hostname=hostname,
shorthostname=shorthostname)
shorthostname=shorthostname, port=self.port)
if self.gzip:
fname = fname + '.warc.gz'
else: