record request method in crawl log if not GET

This commit is contained in:
Noah Levitt 2018-07-17 13:47:52 -05:00
parent 8c22c55955
commit 2df82bd403
3 changed files with 6 additions and 3 deletions

View File

@ -40,7 +40,7 @@ except:
setuptools.setup(
name='warcprox',
version='2.4b3.dev178',
version='2.4b3.dev179',
description='WARC writing MITM HTTP/S proxy',
url='https://github.com/internetarchive/warcprox',
author='Noah Levitt',

View File

@ -1799,7 +1799,7 @@ def test_crawl_log(warcprox_, http_daemon, archiving_proxies):
assert fields[10] == b'-'
assert fields[11] == b'-'
extra_info = json.loads(fields[12].decode('utf-8'))
assert extra_info == {'contentSize': 91}
assert extra_info == {'contentSize': 91, 'method': 'HEAD'}
# WARCPROX_WRITE_RECORD
url = 'http://fakeurl/'
@ -1838,8 +1838,9 @@ def test_crawl_log(warcprox_, http_daemon, archiving_proxies):
assert fields[11] == b'-'
extra_info = json.loads(fields[12].decode('utf-8'))
assert set(extra_info.keys()) == {
'contentSize', 'warcFilename', 'warcFileOffset'}
'contentSize', 'warcFilename', 'warcFileOffset', 'method'}
assert extra_info['contentSize'] == 38
assert extra_info['method'] == 'WARCPROX_WRITE_RECORD'
def test_long_warcprox_meta(
warcprox_, http_daemon, archiving_proxies, playback_proxies):

View File

@ -40,6 +40,8 @@ class CrawlLogger(object):
if records:
extra_info['warcFilename'] = records[0].warc_filename
extra_info['warcFileOffset'] = records[0].offset
if recorded_url.method != 'GET':
extra_info['method'] = recorded_url.method
if recorded_url.response_recorder:
content_length = recorded_url.response_recorder.len - recorded_url.response_recorder.payload_offset
payload_digest = warcprox.digest_str(