mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
record request method in crawl log if not GET
This commit is contained in:
parent
8c22c55955
commit
2df82bd403
2
setup.py
2
setup.py
@ -40,7 +40,7 @@ except:
|
||||
|
||||
setuptools.setup(
|
||||
name='warcprox',
|
||||
version='2.4b3.dev178',
|
||||
version='2.4b3.dev179',
|
||||
description='WARC writing MITM HTTP/S proxy',
|
||||
url='https://github.com/internetarchive/warcprox',
|
||||
author='Noah Levitt',
|
||||
|
@ -1799,7 +1799,7 @@ def test_crawl_log(warcprox_, http_daemon, archiving_proxies):
|
||||
assert fields[10] == b'-'
|
||||
assert fields[11] == b'-'
|
||||
extra_info = json.loads(fields[12].decode('utf-8'))
|
||||
assert extra_info == {'contentSize': 91}
|
||||
assert extra_info == {'contentSize': 91, 'method': 'HEAD'}
|
||||
|
||||
# WARCPROX_WRITE_RECORD
|
||||
url = 'http://fakeurl/'
|
||||
@ -1838,8 +1838,9 @@ def test_crawl_log(warcprox_, http_daemon, archiving_proxies):
|
||||
assert fields[11] == b'-'
|
||||
extra_info = json.loads(fields[12].decode('utf-8'))
|
||||
assert set(extra_info.keys()) == {
|
||||
'contentSize', 'warcFilename', 'warcFileOffset'}
|
||||
'contentSize', 'warcFilename', 'warcFileOffset', 'method'}
|
||||
assert extra_info['contentSize'] == 38
|
||||
assert extra_info['method'] == 'WARCPROX_WRITE_RECORD'
|
||||
|
||||
def test_long_warcprox_meta(
|
||||
warcprox_, http_daemon, archiving_proxies, playback_proxies):
|
||||
|
@ -40,6 +40,8 @@ class CrawlLogger(object):
|
||||
if records:
|
||||
extra_info['warcFilename'] = records[0].warc_filename
|
||||
extra_info['warcFileOffset'] = records[0].offset
|
||||
if recorded_url.method != 'GET':
|
||||
extra_info['method'] = recorded_url.method
|
||||
if recorded_url.response_recorder:
|
||||
content_length = recorded_url.response_recorder.len - recorded_url.response_recorder.payload_offset
|
||||
payload_digest = warcprox.digest_str(
|
||||
|
Loading…
x
Reference in New Issue
Block a user