mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
a test for alex's method filter
This commit is contained in:
parent
f948850692
commit
de7a23325b
2
setup.py
2
setup.py
@ -51,7 +51,7 @@ except:
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='warcprox',
|
name='warcprox',
|
||||||
version='2.0b2.dev37',
|
version='2.0b2.dev38',
|
||||||
description='WARC writing MITM HTTP/S proxy',
|
description='WARC writing MITM HTTP/S proxy',
|
||||||
url='https://github.com/internetarchive/warcprox',
|
url='https://github.com/internetarchive/warcprox',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
@ -111,9 +111,7 @@ def dump_state(signum=None, frame=None):
|
|||||||
signal.signal(signal.SIGQUIT, dump_state)
|
signal.signal(signal.SIGQUIT, dump_state)
|
||||||
|
|
||||||
class _TestHttpRequestHandler(http_server.BaseHTTPRequestHandler):
|
class _TestHttpRequestHandler(http_server.BaseHTTPRequestHandler):
|
||||||
def do_GET(self):
|
def build_response(self):
|
||||||
logging.info('GET {}'.format(self.path))
|
|
||||||
|
|
||||||
m = re.match(r'^/([^/]+)/([^/]+)$', self.path)
|
m = re.match(r'^/([^/]+)/([^/]+)$', self.path)
|
||||||
if m is not None:
|
if m is not None:
|
||||||
special_header = 'warcprox-test-header: {}!'.format(m.group(1)).encode('utf-8')
|
special_header = 'warcprox-test-header: {}!'.format(m.group(1)).encode('utf-8')
|
||||||
@ -134,10 +132,19 @@ class _TestHttpRequestHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
+ b'Content-Type: text/plain\r\n'
|
+ b'Content-Type: text/plain\r\n'
|
||||||
+ b'Content-Length: ' + str(len(payload)).encode('ascii') + b'\r\n'
|
+ b'Content-Length: ' + str(len(payload)).encode('ascii') + b'\r\n'
|
||||||
+ b'\r\n')
|
+ b'\r\n')
|
||||||
|
return headers, payload
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
logging.info('GET {}'.format(self.path))
|
||||||
|
headers, payload = self.build_response()
|
||||||
self.connection.sendall(headers)
|
self.connection.sendall(headers)
|
||||||
self.connection.sendall(payload)
|
self.connection.sendall(payload)
|
||||||
|
|
||||||
|
def do_HEAD(self):
|
||||||
|
logging.info('HEAD {}'.format(self.path))
|
||||||
|
headers, payload = self.build_response()
|
||||||
|
self.connection.sendall(headers)
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def cert(request):
|
def cert(request):
|
||||||
f = tempfile.NamedTemporaryFile(prefix='warcprox-test-https-', suffix='.pem', delete=False)
|
f = tempfile.NamedTemporaryFile(prefix='warcprox-test-https-', suffix='.pem', delete=False)
|
||||||
@ -346,10 +353,14 @@ def warcprox_(request, captures_db, dedup_db, stats_db, service_registry):
|
|||||||
playback_index_db=playback_index_db, options=options)
|
playback_index_db=playback_index_db, options=options)
|
||||||
options.playback_proxy = playback_proxy.server_port
|
options.playback_proxy = playback_proxy.server_port
|
||||||
|
|
||||||
|
options.method_filter = ['GET','POST']
|
||||||
|
|
||||||
writer_pool = warcprox.writer.WarcWriterPool(options)
|
writer_pool = warcprox.writer.WarcWriterPool(options)
|
||||||
warc_writer_thread = warcprox.writerthread.WarcWriterThread(
|
warc_writer_thread = warcprox.writerthread.WarcWriterThread(
|
||||||
recorded_url_q=recorded_url_q, writer_pool=writer_pool,
|
recorded_url_q=recorded_url_q, writer_pool=writer_pool,
|
||||||
dedup_db=dedup_db, listeners=[captures_db or dedup_db, playback_index_db, stats_db])
|
dedup_db=dedup_db, listeners=[
|
||||||
|
captures_db or dedup_db, playback_index_db, stats_db],
|
||||||
|
options=options)
|
||||||
|
|
||||||
warcprox_ = warcprox.controller.WarcproxController(proxy=proxy,
|
warcprox_ = warcprox.controller.WarcproxController(proxy=proxy,
|
||||||
warc_writer_thread=warc_writer_thread, playback_proxy=playback_proxy,
|
warc_writer_thread=warc_writer_thread, playback_proxy=playback_proxy,
|
||||||
@ -1145,6 +1156,22 @@ def test_missing_content_length(archiving_proxies, http_daemon, https_daemon):
|
|||||||
b'This response is missing a Content-Length http header.')
|
b'This response is missing a Content-Length http header.')
|
||||||
assert not 'content-length' in response.headers
|
assert not 'content-length' in response.headers
|
||||||
|
|
||||||
|
def test_method_filter(
|
||||||
|
https_daemon, http_daemon, archiving_proxies, playback_proxies):
|
||||||
|
# we've configured warcprox with method_filters=['GET','POST'] so HEAD
|
||||||
|
# requests should not be archived
|
||||||
|
|
||||||
|
url = 'http://localhost:{}/z/a'.format(http_daemon.server_port)
|
||||||
|
|
||||||
|
response = requests.head(url, proxies=archiving_proxies)
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.headers['warcprox-test-header'] == 'z!'
|
||||||
|
assert response.content == b''
|
||||||
|
|
||||||
|
response = _poll_playback_until(playback_proxies, url, status=200, timeout_sec=10)
|
||||||
|
assert response.status_code == 404
|
||||||
|
assert response.content == b'404 Not in Archive\n'
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
pytest.main()
|
pytest.main()
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user