From b57ec9c589f75bf872e083f18d4c26bb54f5b635 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Tue, 31 Aug 2021 17:09:06 +0000 Subject: [PATCH 1/2] Check warcprox meta headers for hop information necessary to record a hop path if provided --- warcprox/crawl_log.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/warcprox/crawl_log.py b/warcprox/crawl_log.py index 4e67723..73d6457 100644 --- a/warcprox/crawl_log.py +++ b/warcprox/crawl_log.py @@ -64,12 +64,17 @@ class CrawlLogger(object): else: content_length = 0 payload_digest = '-' + logging.info('metadata %s', recorded_url.warcprox_meta) + hop_path = recorded_url.warcprox_meta["hop_path"] if "hop_path" in recorded_url.warcprox_meta and recorded_url.warcprox_meta["hop_path"] and len(recorded_url.warcprox_meta["hop_path"].strip()) > 0 else '-' + hop_path_parent = recorded_url.warcprox_meta["hop_path_parent"] if "hop_path_parent" in recorded_url.warcprox_meta else None + if hop_path_parent and hop_path_parent != recorded_url.url: + hop_path = str(hop_path if hop_path and hop_path != "-" else "") + "B" fields = [ '{:%Y-%m-%dT%H:%M:%S}.{:03d}Z'.format(now, now.microsecond//1000), '% 5s' % status, '% 10s' % content_length, recorded_url.url, - '-', # hop path + hop_path, recorded_url.referer or '-', recorded_url.mimetype if recorded_url.mimetype is not None and recorded_url.mimetype.strip() else '-', '-', From aeecb6515f04aa3ba64a8a51dd4cd3d9e474ce2e Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Tue, 28 Dec 2021 11:58:30 -0800 Subject: [PATCH 2/2] bump version --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 190ac54..60da37c 100755 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ ''' setup.py - setuptools installation configuration for warcprox -Copyright (C) 2013-2020 Internet Archive +Copyright (C) 2013-2021 Internet Archive This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License @@ -43,7 +43,7 @@ except: setuptools.setup( name='warcprox', - version='2.4.27', + version='2.4.28', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt',