mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
WARCPROX_WRITE_RECORD is exempt from method filter
This commit is contained in:
parent
f5498e1822
commit
ddb60876a3
2
setup.py
2
setup.py
@ -51,7 +51,7 @@ except:
|
||||
|
||||
setuptools.setup(
|
||||
name='warcprox',
|
||||
version='2.1b1.dev46',
|
||||
version='2.1b1.dev47',
|
||||
description='WARC writing MITM HTTP/S proxy',
|
||||
url='https://github.com/internetarchive/warcprox',
|
||||
author='Noah Levitt',
|
||||
|
@ -44,6 +44,7 @@ import traceback
|
||||
import signal
|
||||
from collections import Counter
|
||||
import socket
|
||||
import urllib
|
||||
|
||||
try:
|
||||
import http.server as http_server
|
||||
@ -59,6 +60,23 @@ import certauth.certauth
|
||||
|
||||
import warcprox
|
||||
|
||||
import http.client
|
||||
orig_send = http.client.HTTPConnection.send
|
||||
def _send(self, data):
|
||||
if isinstance(data, bytes):
|
||||
logging.info('sending data (bytes): ')
|
||||
logging.root.handlers[0].stream.buffer.write(data)
|
||||
logging.root.handlers[0].stream.buffer.write(b'\n')
|
||||
elif isinstance(data, str):
|
||||
logging.info('sending data (str): ')
|
||||
logging.root.handlers[0].stream.write(data)
|
||||
logging.root.handlers[0].stream.write('\n')
|
||||
else:
|
||||
logging.info('sending data from %s', repr(data))
|
||||
orig_send(self, data)
|
||||
### uncomment this to see raw requests going over the wire
|
||||
# http.client.HTTPConnection.send = _send
|
||||
|
||||
logging.basicConfig(
|
||||
stream=sys.stdout, level=logging.INFO, # level=warcprox.TRACE,
|
||||
format='%(asctime)s %(process)d %(levelname)s %(threadName)s '
|
||||
@ -1153,7 +1171,8 @@ def test_missing_content_length(archiving_proxies, http_daemon, https_daemon):
|
||||
assert not 'content-length' in response.headers
|
||||
|
||||
def test_method_filter(
|
||||
https_daemon, http_daemon, archiving_proxies, playback_proxies):
|
||||
https_daemon, http_daemon, archiving_proxies, playback_proxies,
|
||||
warcprox_):
|
||||
# we've configured warcprox with method_filters=['GET','POST'] so HEAD
|
||||
# requests should not be archived
|
||||
|
||||
@ -1168,6 +1187,30 @@ def test_method_filter(
|
||||
assert response.status_code == 404
|
||||
assert response.content == b'404 Not in Archive\n'
|
||||
|
||||
# WARCPROX_WRITE_RECORD is exempt from method filter
|
||||
headers = {
|
||||
'Content-Type': 'text/plain',
|
||||
'WARC-Type': 'metadata',
|
||||
'Host': 'N/A'
|
||||
}
|
||||
url = 'http://fakeurl/'
|
||||
payload = b'I am the WARCPROX_WRITE_RECORD payload'
|
||||
request = urllib.request.Request(
|
||||
url, method="WARCPROX_WRITE_RECORD", headers=headers, data=payload)
|
||||
|
||||
# XXX setting request.type="http" is a hack to stop urllib from trying
|
||||
# to tunnel if url is https
|
||||
request.type = 'http'
|
||||
request.set_proxy('localhost:%s' % warcprox_.proxy.server_port, 'http')
|
||||
|
||||
with urllib.request.urlopen(request) as response:
|
||||
assert response.getcode() == 204
|
||||
|
||||
response = _poll_playback_until(
|
||||
playback_proxies, url, status=200, timeout_sec=10)
|
||||
assert response.status_code == 200
|
||||
assert response.content == payload
|
||||
|
||||
def test_dedup_ok_flag(
|
||||
https_daemon, http_daemon, warcprox_, archiving_proxies,
|
||||
rethinkdb_big_table):
|
||||
|
@ -182,7 +182,13 @@ class PlaybackProxyHandler(MitmProxyHandler):
|
||||
return self._send_headers_and_refd_payload(record.content[1], refers_to, refers_to_target_uri, refers_to_date)
|
||||
|
||||
else:
|
||||
raise Exception('unknown warc record type {}'.format(warc_type))
|
||||
# send it back raw, whatever it is
|
||||
headers_buf = bytearray()
|
||||
headers_buf.extend(b'HTTP/1.0 200 OK\r\n')
|
||||
headers_buf.extend(b'content-length: ' + record.get_header(b'content-length') + b'\r\n')
|
||||
headers_buf.extend(b'content-type: ' + record.get_header(b'content-type') + b'\r\n')
|
||||
headers_buf.extend(b'\r\n')
|
||||
return self._send_response(headers_buf, record.content_file)
|
||||
|
||||
finally:
|
||||
fh.close()
|
||||
|
@ -64,8 +64,12 @@ class WarcWriterThread(threading.Thread):
|
||||
else:
|
||||
self._run()
|
||||
|
||||
_ALWAYS_ACCEPT = {'WARCPROX_WRITE_RECORD'}
|
||||
def _filter_accepts(self, recorded_url):
|
||||
return not self.method_filter or recorded_url.method.upper() in self.method_filter
|
||||
if not self.method_filter:
|
||||
return True
|
||||
meth = recorded_url.method.upper()
|
||||
return meth in self._ALWAYS_ACCEPT or meth in self.method_filter
|
||||
|
||||
def _run(self):
|
||||
while not self.stop.is_set():
|
||||
|
Loading…
x
Reference in New Issue
Block a user