mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
set via header on request and response, record request via in warc (because it is sent to the remote site), do not record response via in warc (because it is not sent by the remote site)
This commit is contained in:
parent
47680cc17d
commit
ca7625b18d
@ -30,12 +30,12 @@ before_install:
|
||||
- docker run -d --publish=28015:28015 rethinkdb
|
||||
|
||||
before_script:
|
||||
- pip install . pytest requests
|
||||
- pip install . pytest requests warcio
|
||||
|
||||
script:
|
||||
- py.test -vv tests
|
||||
- py.test -vv --rethinkdb-servers=localhost tests
|
||||
- py.test -vv --rethinkdb-servers=localhost --rethinkdb-big-table tests
|
||||
- py.test -v tests
|
||||
- py.test -v --rethinkdb-servers=localhost tests
|
||||
- py.test -v --rethinkdb-servers=localhost --rethinkdb-big-table tests
|
||||
|
||||
notifications:
|
||||
slack:
|
||||
|
2
setup.py
2
setup.py
@ -51,7 +51,7 @@ except:
|
||||
|
||||
setuptools.setup(
|
||||
name='warcprox',
|
||||
version='2.1b1.dev71',
|
||||
version='2.1b1.dev72',
|
||||
description='WARC writing MITM HTTP/S proxy',
|
||||
url='https://github.com/internetarchive/warcprox',
|
||||
author='Noah Levitt',
|
||||
|
@ -40,9 +40,9 @@ do
|
||||
&& (cd /warcprox && git diff HEAD) | patch -p1 \
|
||||
&& virtualenv -p $python /tmp/venv \
|
||||
&& source /tmp/venv/bin/activate \
|
||||
&& pip --log-file /tmp/pip.log install . pytest requests \
|
||||
&& py.test -vv tests \
|
||||
&& py.test -vv --rethinkdb-servers=localhost tests \
|
||||
&& py.test -vv --rethinkdb-servers=localhost --rethinkdb-big-table tests"
|
||||
&& pip --log-file /tmp/pip.log install . pytest requests warcio \
|
||||
&& py.test -v tests \
|
||||
&& py.test -v --rethinkdb-servers=localhost tests \
|
||||
&& py.test -v --rethinkdb-servers=localhost --rethinkdb-big-table tests"
|
||||
done
|
||||
|
||||
|
@ -45,6 +45,7 @@ import signal
|
||||
from collections import Counter
|
||||
import socket
|
||||
import datetime
|
||||
import warcio.archiveiterator
|
||||
|
||||
try:
|
||||
import http.server as http_server
|
||||
@ -79,7 +80,7 @@ def _send(self, data):
|
||||
logging.info('sending data from %s', repr(data))
|
||||
orig_send(self, data)
|
||||
### uncomment this to block see raw requests going over the wire
|
||||
# http_client.HTTPConnection.send = _send
|
||||
http_client.HTTPConnection.send = _send
|
||||
|
||||
logging.basicConfig(
|
||||
stream=sys.stdout, level=logging.INFO, # level=warcprox.TRACE,
|
||||
@ -1391,6 +1392,25 @@ def test_choose_a_port_for_me(service_registry):
|
||||
controller.stop.set()
|
||||
th.join()
|
||||
|
||||
def test_via_response_header(warcprox_, http_daemon, archiving_proxies, playback_proxies):
|
||||
url = 'http://localhost:%s/a/z' % http_daemon.server_port
|
||||
response = requests.get(url, proxies=archiving_proxies)
|
||||
assert response.headers['via'] == '1.1 warcprox'
|
||||
|
||||
playback_response = _poll_playback_until(
|
||||
playback_proxies, url, status=200, timeout_sec=10)
|
||||
assert response.status_code == 200
|
||||
assert not 'via' in playback_response
|
||||
|
||||
warc = warcprox_.warc_writer_thread.writer_pool.default_warc_writer._fpath
|
||||
with open(warc, 'rb') as f:
|
||||
for record in warcio.archiveiterator.ArchiveIterator(f):
|
||||
if record.rec_headers.get_header('warc-target-uri') == url:
|
||||
if record.rec_type == 'response':
|
||||
assert not record.http_headers.get_header('via')
|
||||
elif record.rec_type == 'request':
|
||||
assert record.http_headers.get_header('via') == '1.1 warcprox'
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main()
|
||||
|
||||
|
@ -163,7 +163,9 @@ class ProxyingRecordingHTTPResponse(http_client.HTTPResponse):
|
||||
|
||||
status_and_headers = 'HTTP/1.1 {} {}\r\n'.format(
|
||||
self.status, self.reason)
|
||||
for k,v in self.msg.items():
|
||||
self.headers['Via'] = via_header_value(
|
||||
self.headers.get('Via'), '%0.1f' % (self.version / 10))
|
||||
for k,v in self.headers.items():
|
||||
if k.lower() not in (
|
||||
'connection', 'proxy-connection', 'keep-alive',
|
||||
'proxy-authenticate', 'proxy-authorization', 'upgrade',
|
||||
@ -174,6 +176,15 @@ class ProxyingRecordingHTTPResponse(http_client.HTTPResponse):
|
||||
|
||||
self.recorder.payload_starts_now()
|
||||
|
||||
def via_header_value(orig, request_version):
|
||||
via = orig
|
||||
if via:
|
||||
via += ', '
|
||||
else:
|
||||
via = ''
|
||||
via = via + '%s %s' % (request_version, 'warcprox')
|
||||
return via
|
||||
|
||||
class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
'''
|
||||
An http proxy implementation of BaseHTTPRequestHandler, that acts as a
|
||||
@ -356,6 +367,10 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
'Proxy-Authenticate', 'Proxy-Authorization', 'Upgrade'):
|
||||
del self.headers[key]
|
||||
|
||||
self.headers['Via'] = via_header_value(
|
||||
self.headers.get('Via'),
|
||||
self.request_version.replace('HTTP/', ''))
|
||||
|
||||
# Add headers to the request
|
||||
# XXX in at least python3.3 str(self.headers) uses \n not \r\n :(
|
||||
req_str += '\r\n'.join(
|
||||
|
Loading…
x
Reference in New Issue
Block a user