mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
record request method in crawl log if not GET
This commit is contained in:
parent
8c22c55955
commit
2df82bd403
2
setup.py
2
setup.py
@ -40,7 +40,7 @@ except:
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='warcprox',
|
name='warcprox',
|
||||||
version='2.4b3.dev178',
|
version='2.4b3.dev179',
|
||||||
description='WARC writing MITM HTTP/S proxy',
|
description='WARC writing MITM HTTP/S proxy',
|
||||||
url='https://github.com/internetarchive/warcprox',
|
url='https://github.com/internetarchive/warcprox',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
@ -1799,7 +1799,7 @@ def test_crawl_log(warcprox_, http_daemon, archiving_proxies):
|
|||||||
assert fields[10] == b'-'
|
assert fields[10] == b'-'
|
||||||
assert fields[11] == b'-'
|
assert fields[11] == b'-'
|
||||||
extra_info = json.loads(fields[12].decode('utf-8'))
|
extra_info = json.loads(fields[12].decode('utf-8'))
|
||||||
assert extra_info == {'contentSize': 91}
|
assert extra_info == {'contentSize': 91, 'method': 'HEAD'}
|
||||||
|
|
||||||
# WARCPROX_WRITE_RECORD
|
# WARCPROX_WRITE_RECORD
|
||||||
url = 'http://fakeurl/'
|
url = 'http://fakeurl/'
|
||||||
@ -1838,8 +1838,9 @@ def test_crawl_log(warcprox_, http_daemon, archiving_proxies):
|
|||||||
assert fields[11] == b'-'
|
assert fields[11] == b'-'
|
||||||
extra_info = json.loads(fields[12].decode('utf-8'))
|
extra_info = json.loads(fields[12].decode('utf-8'))
|
||||||
assert set(extra_info.keys()) == {
|
assert set(extra_info.keys()) == {
|
||||||
'contentSize', 'warcFilename', 'warcFileOffset'}
|
'contentSize', 'warcFilename', 'warcFileOffset', 'method'}
|
||||||
assert extra_info['contentSize'] == 38
|
assert extra_info['contentSize'] == 38
|
||||||
|
assert extra_info['method'] == 'WARCPROX_WRITE_RECORD'
|
||||||
|
|
||||||
def test_long_warcprox_meta(
|
def test_long_warcprox_meta(
|
||||||
warcprox_, http_daemon, archiving_proxies, playback_proxies):
|
warcprox_, http_daemon, archiving_proxies, playback_proxies):
|
||||||
|
@ -40,6 +40,8 @@ class CrawlLogger(object):
|
|||||||
if records:
|
if records:
|
||||||
extra_info['warcFilename'] = records[0].warc_filename
|
extra_info['warcFilename'] = records[0].warc_filename
|
||||||
extra_info['warcFileOffset'] = records[0].offset
|
extra_info['warcFileOffset'] = records[0].offset
|
||||||
|
if recorded_url.method != 'GET':
|
||||||
|
extra_info['method'] = recorded_url.method
|
||||||
if recorded_url.response_recorder:
|
if recorded_url.response_recorder:
|
||||||
content_length = recorded_url.response_recorder.len - recorded_url.response_recorder.payload_offset
|
content_length = recorded_url.response_recorder.len - recorded_url.response_recorder.payload_offset
|
||||||
payload_digest = warcprox.digest_str(
|
payload_digest = warcprox.digest_str(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user