diff --git a/pywb/recorder/test/test_recorder.py b/pywb/recorder/test/test_recorder.py index 8b218c69..5e7f960f 100644 --- a/pywb/recorder/test/test_recorder.py +++ b/pywb/recorder/test/test_recorder.py @@ -571,7 +571,7 @@ class TestRecorder(LiveServerTests, FakeRedisTests, TempDirTests, BaseTestClass) with open(warcs[b'meta/meta.warc.gz'], 'rb') as fh: decomp = DecompressingBufferedReader(fh) - record = ArcWarcRecordLoader().parse_record_stream(decomp) + record = ArcWarcRecordLoader().parse_record_stream(decomp, ensure_http_headers=True) status_headers = record.rec_headers assert len(record.rec_headers.headers) == 9 diff --git a/pywb/rewrite/test/test_header_rewriter.py b/pywb/rewrite/test/test_header_rewriter.py index 2d918f44..6e00b0c1 100644 --- a/pywb/rewrite/test/test_header_rewriter.py +++ b/pywb/rewrite/test/test_header_rewriter.py @@ -56,12 +56,20 @@ from warcio.timeutils import datetime_to_http_date from datetime import datetime import pprint +from mock import patch urlrewriter = UrlRewriter('20131010/http://example.com/', '/web/') headerrewriter = HeaderRewriter() +def _repr_format(sh): + headers_str = pprint.pformat(sh.headers, indent=2, width=80) + return "StatusAndHeaders(protocol = '{0}', statusline = '{1}', \ +headers = {2})".format(sh.protocol, sh.statusline, headers_str) + + +@patch('warcio.statusandheaders.StatusAndHeaders.__repr__', _repr_format) def _test_headers(headers, status='200 OK', rewriter=urlrewriter): rewritten = headerrewriter.rewrite(StatusAndHeaders(status, headers), rewriter, rewriter.get_cookie_rewriter()) return pprint.pprint(vars(rewritten)) diff --git a/pywb/urlrewrite/rewriterapp.py b/pywb/urlrewrite/rewriterapp.py index 2dc2f0aa..f9ef36eb 100644 --- a/pywb/urlrewrite/rewriterapp.py +++ b/pywb/urlrewrite/rewriterapp.py @@ -198,7 +198,8 @@ class RewriterApp(object): False) stream = BufferedReader(r.raw, block_size=BUFF_SIZE) - record = self.loader.parse_record_stream(stream) + record = self.loader.parse_record_stream(stream, + ensure_http_headers=True) memento_dt = r.headers.get('Memento-Datetime') target_uri = r.headers.get('WARC-Target-URI') diff --git a/pywb/warc/archiveindexer.py b/pywb/warc/archiveindexer.py index d8059d45..afcb1564 100644 --- a/pywb/warc/archiveindexer.py +++ b/pywb/warc/archiveindexer.py @@ -323,7 +323,8 @@ class DefaultRecordParser(object): def __call__(self, fh): aiter = ArchiveIterator(fh, self.options.get('minimal', False), self.options.get('verify_http', False), - self.options.get('arc2warc', False)) + self.options.get('arc2warc', False), + ensure_http_headers=True) entry_iter = self.create_record_iter(aiter) diff --git a/pywb/warc/test/test_indexing.py b/pywb/warc/test/test_indexing.py index afc148ff..fbe1413b 100644 --- a/pywb/warc/test/test_indexing.py +++ b/pywb/warc/test/test_indexing.py @@ -84,9 +84,9 @@ urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX # bad arcs -- test error edge cases >>> print_cdx_index('bad.arc', include_all=True) CDX N b a m s k r M S V g -com,example)/ 20140401000000 http://example.com/ text/html - 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 67 134 bad.arc -com,example)/ 20140102000000 http://example.com/ text/plain - 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 59 202 bad.arc -com,example)/ 20140401000000 http://example.com/ text/html - 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 68 262 bad.arc +com,example)/ 20140401000000 http://example.com/ text/html 200 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 67 134 bad.arc +com,example)/ 20140102000000 http://example.com/ text/plain 200 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 59 202 bad.arc +com,example)/ 20140401000000 http://example.com/ text/html 200 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 68 262 bad.arc # POST request tests diff --git a/requirements.txt b/requirements.txt index b5304592..5e6367b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ six -warcio==1.1 +warcio==1.2 chardet requests redis diff --git a/setup.py b/setup.py index 3dbf6608..50920dd4 100755 --- a/setup.py +++ b/setup.py @@ -25,10 +25,10 @@ class PyTest(TestCommand): import pytest import os os.environ.pop('PYWB_CONFIG_FILE', None) - cmdline = ' --cov-config .coveragerc --cov pywb' + cmdline = '--cov-config .coveragerc --cov pywb' cmdline += ' -v --doctest-module ./pywb/ tests/' - errcode = pytest.main(cmdline) + errcode = pytest.main(cmdline.split(' ')) sys.exit(errcode) diff --git a/tests/base_config_test.py b/tests/base_config_test.py index e2d45ad8..18a66baa 100644 --- a/tests/base_config_test.py +++ b/tests/base_config_test.py @@ -1,6 +1,6 @@ from gevent import monkey; monkey.patch_all(thread=False) -from webtest import TestApp +import webtest from pywb.webagg.test.testutils import BaseTestClass @@ -14,6 +14,6 @@ class BaseConfigTest(BaseTestClass): def setup_class(cls, config_file): super(BaseConfigTest, cls).setup_class() config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file) - cls.testapp = TestApp(FrontEndApp(config_file=config_file)) + cls.testapp = webtest.TestApp(FrontEndApp(config_file=config_file)) diff --git a/tests/test_auto_colls.py b/tests/test_auto_colls.py index 4859d83f..47dbc182 100644 --- a/tests/test_auto_colls.py +++ b/tests/test_auto_colls.py @@ -12,7 +12,7 @@ import gevent from six import StringIO -from webtest import TestApp +import webtest from pytest import raises from mock import patch @@ -61,7 +61,7 @@ class TestManagedColls(TempDirTests, BaseTestClass): def _create_app(self): config_file = 'config_test.yaml' config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file) - self.testapp = TestApp(FrontEndApp(config_file=config_file)) + self.testapp = webtest.TestApp(FrontEndApp(config_file=config_file)) @patch('pywb.apps.cli.BaseCli.run_gevent', lambda *args, **kwargs: None) def test_run_cli(self): diff --git a/tests/test_cdx_server_app.py b/tests/test_cdx_server_app.py index 3594fc8e..2e9bb126 100644 --- a/tests/test_cdx_server_app.py +++ b/tests/test_cdx_server_app.py @@ -4,7 +4,7 @@ import re import json import os -from webtest import TestApp +import webtest from six.moves.urllib.parse import urlencode @@ -20,7 +20,7 @@ class TestCDXApp(BaseTestClass): def setup_class(cls): super(TestCDXApp, cls).setup_class() config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'config_test.yaml') - cls.testapp = TestApp(AutoConfigApp(config_file=config_file)) + cls.testapp = webtest.TestApp(AutoConfigApp(config_file=config_file)) def query(self, url, is_error=False, **params): params['url'] = url