1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

setup: update to warcio==1.2

add ensure_http_headers=True when reading WARC records
tests: fix pytest warnings, use webtest.TestApp instead of TestApp
This commit is contained in:
Ilya Kreymer 2017-04-29 13:47:54 -07:00
parent 14af9287dc
commit 58f39f0558
10 changed files with 25 additions and 15 deletions

View File

@ -571,7 +571,7 @@ class TestRecorder(LiveServerTests, FakeRedisTests, TempDirTests, BaseTestClass)
with open(warcs[b'meta/meta.warc.gz'], 'rb') as fh: with open(warcs[b'meta/meta.warc.gz'], 'rb') as fh:
decomp = DecompressingBufferedReader(fh) decomp = DecompressingBufferedReader(fh)
record = ArcWarcRecordLoader().parse_record_stream(decomp) record = ArcWarcRecordLoader().parse_record_stream(decomp, ensure_http_headers=True)
status_headers = record.rec_headers status_headers = record.rec_headers
assert len(record.rec_headers.headers) == 9 assert len(record.rec_headers.headers) == 9

View File

@ -56,12 +56,20 @@ from warcio.timeutils import datetime_to_http_date
from datetime import datetime from datetime import datetime
import pprint import pprint
from mock import patch
urlrewriter = UrlRewriter('20131010/http://example.com/', '/web/') urlrewriter = UrlRewriter('20131010/http://example.com/', '/web/')
headerrewriter = HeaderRewriter() headerrewriter = HeaderRewriter()
def _repr_format(sh):
headers_str = pprint.pformat(sh.headers, indent=2, width=80)
return "StatusAndHeaders(protocol = '{0}', statusline = '{1}', \
headers = {2})".format(sh.protocol, sh.statusline, headers_str)
@patch('warcio.statusandheaders.StatusAndHeaders.__repr__', _repr_format)
def _test_headers(headers, status='200 OK', rewriter=urlrewriter): def _test_headers(headers, status='200 OK', rewriter=urlrewriter):
rewritten = headerrewriter.rewrite(StatusAndHeaders(status, headers), rewriter, rewriter.get_cookie_rewriter()) rewritten = headerrewriter.rewrite(StatusAndHeaders(status, headers), rewriter, rewriter.get_cookie_rewriter())
return pprint.pprint(vars(rewritten)) return pprint.pprint(vars(rewritten))

View File

@ -198,7 +198,8 @@ class RewriterApp(object):
False) False)
stream = BufferedReader(r.raw, block_size=BUFF_SIZE) stream = BufferedReader(r.raw, block_size=BUFF_SIZE)
record = self.loader.parse_record_stream(stream) record = self.loader.parse_record_stream(stream,
ensure_http_headers=True)
memento_dt = r.headers.get('Memento-Datetime') memento_dt = r.headers.get('Memento-Datetime')
target_uri = r.headers.get('WARC-Target-URI') target_uri = r.headers.get('WARC-Target-URI')

View File

@ -323,7 +323,8 @@ class DefaultRecordParser(object):
def __call__(self, fh): def __call__(self, fh):
aiter = ArchiveIterator(fh, self.options.get('minimal', False), aiter = ArchiveIterator(fh, self.options.get('minimal', False),
self.options.get('verify_http', False), self.options.get('verify_http', False),
self.options.get('arc2warc', False)) self.options.get('arc2warc', False),
ensure_http_headers=True)
entry_iter = self.create_record_iter(aiter) entry_iter = self.create_record_iter(aiter)

View File

@ -84,9 +84,9 @@ urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX
# bad arcs -- test error edge cases # bad arcs -- test error edge cases
>>> print_cdx_index('bad.arc', include_all=True) >>> print_cdx_index('bad.arc', include_all=True)
CDX N b a m s k r M S V g CDX N b a m s k r M S V g
com,example)/ 20140401000000 http://example.com/ text/html - 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 67 134 bad.arc com,example)/ 20140401000000 http://example.com/ text/html 200 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 67 134 bad.arc
com,example)/ 20140102000000 http://example.com/ text/plain - 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 59 202 bad.arc com,example)/ 20140102000000 http://example.com/ text/plain 200 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 59 202 bad.arc
com,example)/ 20140401000000 http://example.com/ text/html - 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 68 262 bad.arc com,example)/ 20140401000000 http://example.com/ text/html 200 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 68 262 bad.arc
# POST request tests # POST request tests

View File

@ -1,5 +1,5 @@
six six
warcio==1.1 warcio==1.2
chardet chardet
requests requests
redis redis

View File

@ -25,10 +25,10 @@ class PyTest(TestCommand):
import pytest import pytest
import os import os
os.environ.pop('PYWB_CONFIG_FILE', None) os.environ.pop('PYWB_CONFIG_FILE', None)
cmdline = ' --cov-config .coveragerc --cov pywb' cmdline = '--cov-config .coveragerc --cov pywb'
cmdline += ' -v --doctest-module ./pywb/ tests/' cmdline += ' -v --doctest-module ./pywb/ tests/'
errcode = pytest.main(cmdline) errcode = pytest.main(cmdline.split(' '))
sys.exit(errcode) sys.exit(errcode)

View File

@ -1,6 +1,6 @@
from gevent import monkey; monkey.patch_all(thread=False) from gevent import monkey; monkey.patch_all(thread=False)
from webtest import TestApp import webtest
from pywb.webagg.test.testutils import BaseTestClass from pywb.webagg.test.testutils import BaseTestClass
@ -14,6 +14,6 @@ class BaseConfigTest(BaseTestClass):
def setup_class(cls, config_file): def setup_class(cls, config_file):
super(BaseConfigTest, cls).setup_class() super(BaseConfigTest, cls).setup_class()
config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file) config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file)
cls.testapp = TestApp(FrontEndApp(config_file=config_file)) cls.testapp = webtest.TestApp(FrontEndApp(config_file=config_file))

View File

@ -12,7 +12,7 @@ import gevent
from six import StringIO from six import StringIO
from webtest import TestApp import webtest
from pytest import raises from pytest import raises
from mock import patch from mock import patch
@ -61,7 +61,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
def _create_app(self): def _create_app(self):
config_file = 'config_test.yaml' config_file = 'config_test.yaml'
config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file) config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file)
self.testapp = TestApp(FrontEndApp(config_file=config_file)) self.testapp = webtest.TestApp(FrontEndApp(config_file=config_file))
@patch('pywb.apps.cli.BaseCli.run_gevent', lambda *args, **kwargs: None) @patch('pywb.apps.cli.BaseCli.run_gevent', lambda *args, **kwargs: None)
def test_run_cli(self): def test_run_cli(self):

View File

@ -4,7 +4,7 @@ import re
import json import json
import os import os
from webtest import TestApp import webtest
from six.moves.urllib.parse import urlencode from six.moves.urllib.parse import urlencode
@ -20,7 +20,7 @@ class TestCDXApp(BaseTestClass):
def setup_class(cls): def setup_class(cls):
super(TestCDXApp, cls).setup_class() super(TestCDXApp, cls).setup_class()
config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'config_test.yaml') config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'config_test.yaml')
cls.testapp = TestApp(AutoConfigApp(config_file=config_file)) cls.testapp = webtest.TestApp(AutoConfigApp(config_file=config_file))
def query(self, url, is_error=False, **params): def query(self, url, is_error=False, **params):
params['url'] = url params['url'] = url