Add port to custom WARC filename vars

This commit is contained in:
Vangelis Banos 2020-01-08 21:19:48 +00:00
parent 469b41773a
commit ca0197330d
2 changed files with 4 additions and 3 deletions

View File

@ -93,7 +93,7 @@ def _build_arg_parser(prog='warcprox', show_hidden=False):
default='./warcs', help='where to write warcs') default='./warcs', help='where to write warcs')
arg_parser.add_argument('--warc-filename', dest='warc_filename', arg_parser.add_argument('--warc-filename', dest='warc_filename',
default='{prefix}-{timestamp17}-{serialno}-{randomtoken}', default='{prefix}-{timestamp17}-{serialno}-{randomtoken}',
help='define custom WARC filename with variables {prefix}, {timestamp14}, {timestamp17}, {serialno}, {randomtoken}, {hostname}, {shorthostname}') help='define custom WARC filename with variables {prefix}, {timestamp14}, {timestamp17}, {serialno}, {randomtoken}, {hostname}, {shorthostname}, {port}')
arg_parser.add_argument('-z', '--gzip', dest='gzip', action='store_true', arg_parser.add_argument('-z', '--gzip', dest='gzip', action='store_true',
help='write gzip-compressed warc records') help='write gzip-compressed warc records')
hidden.add_argument( hidden.add_argument(

View File

@ -51,6 +51,7 @@ class WarcWriter:
self.finalname = None self.finalname = None
self.gzip = options.gzip or False self.gzip = options.gzip or False
self.prefix = options.prefix or 'warcprox' self.prefix = options.prefix or 'warcprox'
self.port = options.port or 8000
self.open_suffix = '' if options.no_warc_open_suffix else '.open' self.open_suffix = '' if options.no_warc_open_suffix else '.open'
self.rollover_size = options.rollover_size or 1000000000 self.rollover_size = options.rollover_size or 1000000000
self.rollover_idle_time = options.rollover_idle_time or None self.rollover_idle_time = options.rollover_idle_time or None
@ -67,7 +68,7 @@ class WarcWriter:
"""WARC filename is configurable with CLI parameter --warc-filename. """WARC filename is configurable with CLI parameter --warc-filename.
Default: '{prefix}-{timestamp17}-{randomtoken}-{serialno}' Default: '{prefix}-{timestamp17}-{randomtoken}-{serialno}'
Available variables are: prefix, timestamp14, timestamp17, serialno, Available variables are: prefix, timestamp14, timestamp17, serialno,
randomtoken, hostname, shorthostname. randomtoken, hostname, shorthostname, port.
Extension ``.warc`` or ``.warc.gz`` is appended automatically. Extension ``.warc`` or ``.warc.gz`` is appended automatically.
""" """
hostname = socket.getfqdn() hostname = socket.getfqdn()
@ -77,7 +78,7 @@ class WarcWriter:
timestamp17=warcprox.timestamp17(), timestamp17=warcprox.timestamp17(),
serialno='{:05d}'.format(serial), serialno='{:05d}'.format(serial),
randomtoken=self.randomtoken, hostname=hostname, randomtoken=self.randomtoken, hostname=hostname,
shorthostname=shorthostname) shorthostname=shorthostname, port=self.port)
if self.gzip: if self.gzip:
fname = fname + '.warc.gz' fname = fname + '.warc.gz'
else: else: