diff --git a/setup.py b/setup.py index 6233983..221ff7b 100755 --- a/setup.py +++ b/setup.py @@ -40,7 +40,7 @@ except: setuptools.setup( name='warcprox', - version='2.4b3.dev178', + version='2.4b3.dev179', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', diff --git a/tests/test_warcprox.py b/tests/test_warcprox.py index 13b6bad..0375ca1 100755 --- a/tests/test_warcprox.py +++ b/tests/test_warcprox.py @@ -1799,7 +1799,7 @@ def test_crawl_log(warcprox_, http_daemon, archiving_proxies): assert fields[10] == b'-' assert fields[11] == b'-' extra_info = json.loads(fields[12].decode('utf-8')) - assert extra_info == {'contentSize': 91} + assert extra_info == {'contentSize': 91, 'method': 'HEAD'} # WARCPROX_WRITE_RECORD url = 'http://fakeurl/' @@ -1838,8 +1838,9 @@ def test_crawl_log(warcprox_, http_daemon, archiving_proxies): assert fields[11] == b'-' extra_info = json.loads(fields[12].decode('utf-8')) assert set(extra_info.keys()) == { - 'contentSize', 'warcFilename', 'warcFileOffset'} + 'contentSize', 'warcFilename', 'warcFileOffset', 'method'} assert extra_info['contentSize'] == 38 + assert extra_info['method'] == 'WARCPROX_WRITE_RECORD' def test_long_warcprox_meta( warcprox_, http_daemon, archiving_proxies, playback_proxies): diff --git a/warcprox/crawl_log.py b/warcprox/crawl_log.py index f28683a..a953402 100644 --- a/warcprox/crawl_log.py +++ b/warcprox/crawl_log.py @@ -40,6 +40,8 @@ class CrawlLogger(object): if records: extra_info['warcFilename'] = records[0].warc_filename extra_info['warcFileOffset'] = records[0].offset + if recorded_url.method != 'GET': + extra_info['method'] = recorded_url.method if recorded_url.response_recorder: content_length = recorded_url.response_recorder.len - recorded_url.response_recorder.payload_offset payload_digest = warcprox.digest_str(