diff --git a/.travis.yml b/.travis.yml index 0f7a315..3d02ebf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,12 +30,12 @@ before_install: - docker run -d --publish=28015:28015 rethinkdb before_script: -- pip install . pytest requests +- pip install . pytest requests warcio script: -- py.test -vv tests -- py.test -vv --rethinkdb-servers=localhost tests -- py.test -vv --rethinkdb-servers=localhost --rethinkdb-big-table tests +- py.test -v tests +- py.test -v --rethinkdb-servers=localhost tests +- py.test -v --rethinkdb-servers=localhost --rethinkdb-big-table tests notifications: slack: diff --git a/setup.py b/setup.py index 4483f14..547e1fa 100755 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ except: setuptools.setup( name='warcprox', - version='2.1b1.dev71', + version='2.1b1.dev72', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', diff --git a/tests/run-tests.sh b/tests/run-tests.sh index 0c5b254..334cfc2 100755 --- a/tests/run-tests.sh +++ b/tests/run-tests.sh @@ -40,9 +40,9 @@ do && (cd /warcprox && git diff HEAD) | patch -p1 \ && virtualenv -p $python /tmp/venv \ && source /tmp/venv/bin/activate \ - && pip --log-file /tmp/pip.log install . pytest requests \ - && py.test -vv tests \ - && py.test -vv --rethinkdb-servers=localhost tests \ - && py.test -vv --rethinkdb-servers=localhost --rethinkdb-big-table tests" + && pip --log-file /tmp/pip.log install . pytest requests warcio \ + && py.test -v tests \ + && py.test -v --rethinkdb-servers=localhost tests \ + && py.test -v --rethinkdb-servers=localhost --rethinkdb-big-table tests" done diff --git a/tests/test_warcprox.py b/tests/test_warcprox.py index c61f6d8..e4692cf 100755 --- a/tests/test_warcprox.py +++ b/tests/test_warcprox.py @@ -45,6 +45,7 @@ import signal from collections import Counter import socket import datetime +import warcio.archiveiterator try: import http.server as http_server @@ -79,7 +80,7 @@ def _send(self, data): logging.info('sending data from %s', repr(data)) orig_send(self, data) ### uncomment this to block see raw requests going over the wire -# http_client.HTTPConnection.send = _send +http_client.HTTPConnection.send = _send logging.basicConfig( stream=sys.stdout, level=logging.INFO, # level=warcprox.TRACE, @@ -1391,6 +1392,25 @@ def test_choose_a_port_for_me(service_registry): controller.stop.set() th.join() +def test_via_response_header(warcprox_, http_daemon, archiving_proxies, playback_proxies): + url = 'http://localhost:%s/a/z' % http_daemon.server_port + response = requests.get(url, proxies=archiving_proxies) + assert response.headers['via'] == '1.1 warcprox' + + playback_response = _poll_playback_until( + playback_proxies, url, status=200, timeout_sec=10) + assert response.status_code == 200 + assert not 'via' in playback_response + + warc = warcprox_.warc_writer_thread.writer_pool.default_warc_writer._fpath + with open(warc, 'rb') as f: + for record in warcio.archiveiterator.ArchiveIterator(f): + if record.rec_headers.get_header('warc-target-uri') == url: + if record.rec_type == 'response': + assert not record.http_headers.get_header('via') + elif record.rec_type == 'request': + assert record.http_headers.get_header('via') == '1.1 warcprox' + if __name__ == '__main__': pytest.main() diff --git a/warcprox/mitmproxy.py b/warcprox/mitmproxy.py index 5f592e4..d69f26e 100644 --- a/warcprox/mitmproxy.py +++ b/warcprox/mitmproxy.py @@ -163,7 +163,9 @@ class ProxyingRecordingHTTPResponse(http_client.HTTPResponse): status_and_headers = 'HTTP/1.1 {} {}\r\n'.format( self.status, self.reason) - for k,v in self.msg.items(): + self.headers['Via'] = via_header_value( + self.headers.get('Via'), '%0.1f' % (self.version / 10)) + for k,v in self.headers.items(): if k.lower() not in ( 'connection', 'proxy-connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'upgrade', @@ -174,6 +176,15 @@ class ProxyingRecordingHTTPResponse(http_client.HTTPResponse): self.recorder.payload_starts_now() +def via_header_value(orig, request_version): + via = orig + if via: + via += ', ' + else: + via = '' + via = via + '%s %s' % (request_version, 'warcprox') + return via + class MitmProxyHandler(http_server.BaseHTTPRequestHandler): ''' An http proxy implementation of BaseHTTPRequestHandler, that acts as a @@ -356,6 +367,10 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler): 'Proxy-Authenticate', 'Proxy-Authorization', 'Upgrade'): del self.headers[key] + self.headers['Via'] = via_header_value( + self.headers.get('Via'), + self.request_version.replace('HTTP/', '')) + # Add headers to the request # XXX in at least python3.3 str(self.headers) uses \n not \r\n :( req_str += '\r\n'.join(