1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

setup: update to warcio==1.2

add ensure_http_headers=True when reading WARC records
tests: fix pytest warnings, use webtest.TestApp instead of TestApp
This commit is contained in:
Ilya Kreymer 2017-04-29 13:47:54 -07:00
parent 14af9287dc
commit 58f39f0558
10 changed files with 25 additions and 15 deletions

View File

@ -571,7 +571,7 @@ class TestRecorder(LiveServerTests, FakeRedisTests, TempDirTests, BaseTestClass)
with open(warcs[b'meta/meta.warc.gz'], 'rb') as fh:
decomp = DecompressingBufferedReader(fh)
record = ArcWarcRecordLoader().parse_record_stream(decomp)
record = ArcWarcRecordLoader().parse_record_stream(decomp, ensure_http_headers=True)
status_headers = record.rec_headers
assert len(record.rec_headers.headers) == 9

View File

@ -56,12 +56,20 @@ from warcio.timeutils import datetime_to_http_date
from datetime import datetime
import pprint
from mock import patch
urlrewriter = UrlRewriter('20131010/http://example.com/', '/web/')
headerrewriter = HeaderRewriter()
def _repr_format(sh):
headers_str = pprint.pformat(sh.headers, indent=2, width=80)
return "StatusAndHeaders(protocol = '{0}', statusline = '{1}', \
headers = {2})".format(sh.protocol, sh.statusline, headers_str)
@patch('warcio.statusandheaders.StatusAndHeaders.__repr__', _repr_format)
def _test_headers(headers, status='200 OK', rewriter=urlrewriter):
rewritten = headerrewriter.rewrite(StatusAndHeaders(status, headers), rewriter, rewriter.get_cookie_rewriter())
return pprint.pprint(vars(rewritten))

View File

@ -198,7 +198,8 @@ class RewriterApp(object):
False)
stream = BufferedReader(r.raw, block_size=BUFF_SIZE)
record = self.loader.parse_record_stream(stream)
record = self.loader.parse_record_stream(stream,
ensure_http_headers=True)
memento_dt = r.headers.get('Memento-Datetime')
target_uri = r.headers.get('WARC-Target-URI')

View File

@ -323,7 +323,8 @@ class DefaultRecordParser(object):
def __call__(self, fh):
aiter = ArchiveIterator(fh, self.options.get('minimal', False),
self.options.get('verify_http', False),
self.options.get('arc2warc', False))
self.options.get('arc2warc', False),
ensure_http_headers=True)
entry_iter = self.create_record_iter(aiter)

View File

@ -84,9 +84,9 @@ urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX
# bad arcs -- test error edge cases
>>> print_cdx_index('bad.arc', include_all=True)
CDX N b a m s k r M S V g
com,example)/ 20140401000000 http://example.com/ text/html - 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 67 134 bad.arc
com,example)/ 20140102000000 http://example.com/ text/plain - 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 59 202 bad.arc
com,example)/ 20140401000000 http://example.com/ text/html - 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 68 262 bad.arc
com,example)/ 20140401000000 http://example.com/ text/html 200 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 67 134 bad.arc
com,example)/ 20140102000000 http://example.com/ text/plain 200 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 59 202 bad.arc
com,example)/ 20140401000000 http://example.com/ text/html 200 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 68 262 bad.arc
# POST request tests

View File

@ -1,5 +1,5 @@
six
warcio==1.1
warcio==1.2
chardet
requests
redis

View File

@ -28,7 +28,7 @@ class PyTest(TestCommand):
cmdline = '--cov-config .coveragerc --cov pywb'
cmdline += ' -v --doctest-module ./pywb/ tests/'
errcode = pytest.main(cmdline)
errcode = pytest.main(cmdline.split(' '))
sys.exit(errcode)

View File

@ -1,6 +1,6 @@
from gevent import monkey; monkey.patch_all(thread=False)
from webtest import TestApp
import webtest
from pywb.webagg.test.testutils import BaseTestClass
@ -14,6 +14,6 @@ class BaseConfigTest(BaseTestClass):
def setup_class(cls, config_file):
super(BaseConfigTest, cls).setup_class()
config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file)
cls.testapp = TestApp(FrontEndApp(config_file=config_file))
cls.testapp = webtest.TestApp(FrontEndApp(config_file=config_file))

View File

@ -12,7 +12,7 @@ import gevent
from six import StringIO
from webtest import TestApp
import webtest
from pytest import raises
from mock import patch
@ -61,7 +61,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
def _create_app(self):
config_file = 'config_test.yaml'
config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file)
self.testapp = TestApp(FrontEndApp(config_file=config_file))
self.testapp = webtest.TestApp(FrontEndApp(config_file=config_file))
@patch('pywb.apps.cli.BaseCli.run_gevent', lambda *args, **kwargs: None)
def test_run_cli(self):

View File

@ -4,7 +4,7 @@ import re
import json
import os
from webtest import TestApp
import webtest
from six.moves.urllib.parse import urlencode
@ -20,7 +20,7 @@ class TestCDXApp(BaseTestClass):
def setup_class(cls):
super(TestCDXApp, cls).setup_class()
config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'config_test.yaml')
cls.testapp = TestApp(AutoConfigApp(config_file=config_file))
cls.testapp = webtest.TestApp(AutoConfigApp(config_file=config_file))
def query(self, url, is_error=False, **params):
params['url'] = url