-{% for key, val in wbrequest.user_metadata.iteritems() %}
+{% for key, val in wbrequest.user_metadata.items() %}
{{ key }}: | {{ val }} |
{% endfor %}
---|
diff --git a/pywb/utils/canonicalize.py b/pywb/utils/canonicalize.py
index c64dfc04..2eab5f32 100644
--- a/pywb/utils/canonicalize.py
+++ b/pywb/utils/canonicalize.py
@@ -39,7 +39,8 @@ def canonicalize(url, surt_ordered=True):
"""
try:
key = surt.surt(url)
- except Exception as e:
+ except Exception as e: #pragma: no cover
+ # doesn't happen with surt from 0.3b
# urn is already canonical, so just use as-is
if url.startswith('urn:'):
return url
diff --git a/pywb/utils/loaders.py b/pywb/utils/loaders.py
index ea901aef..8c47e99e 100644
--- a/pywb/utils/loaders.py
+++ b/pywb/utils/loaders.py
@@ -46,14 +46,14 @@ def load_yaml_config(config_file):
#=================================================================
-def to_native_str(value, encoding='iso-8859-1'):
+def to_native_str(value, encoding='iso-8859-1', func=lambda x: x):
if isinstance(value, str):
return value
- if six.PY3 and isinstance(value, six.binary_type):
- return value.decode(encoding)
- elif six.PY2 and isinstance(value, six.text_type):
- return value.encode(encoding)
+ if six.PY3 and isinstance(value, six.binary_type): #pragma: no cover
+ return func(value.decode(encoding))
+ elif six.PY2 and isinstance(value, six.text_type): #pragma: no cover
+ return func(value.encode(encoding))
#=================================================================
diff --git a/pywb/utils/statusandheaders.py b/pywb/utils/statusandheaders.py
index b7be3c88..d8bd3f60 100644
--- a/pywb/utils/statusandheaders.py
+++ b/pywb/utils/statusandheaders.py
@@ -64,7 +64,7 @@ class StatusAndHeaders(object):
self.headers[index] = (curr_name, header_dict[name_lower])
del header_dict[name_lower]
- for name, value in header_dict.iteritems():
+ for name, value in six.iteritems(header_dict):
self.headers.append((name, value))
def remove_header(self, name):
diff --git a/pywb/warc/cdxindexer.py b/pywb/warc/cdxindexer.py
index ab981804..13e7ba26 100644
--- a/pywb/warc/cdxindexer.py
+++ b/pywb/warc/cdxindexer.py
@@ -266,7 +266,10 @@ def write_multi_cdx_index(output, inputs, **options):
# write to one cdx file
else:
if output == '-':
- outfile = sys.stdout
+ if hasattr(sys.stdout, 'buffer'):
+ outfile = sys.stdout.buffer
+ else:
+ outfile = sys.stdout
else:
outfile = open(output, 'wb')
diff --git a/pywb/warc/resolvingloader.py b/pywb/warc/resolvingloader.py
index 954861a1..b6398177 100644
--- a/pywb/warc/resolvingloader.py
+++ b/pywb/warc/resolvingloader.py
@@ -15,6 +15,33 @@ class ResolvingLoader(object):
self.no_record_parse = no_record_parse
def __call__(self, cdx, failed_files, cdx_loader, *args, **kwargs):
+ headers_record, payload_record = self.load_headers_and_payload(cdx, failed_files, cdx_loader)
+
+ # Default handling logic when loading http status/headers
+
+ # special case: set header to payload if old-style revisit
+ # with missing header
+ if not headers_record:
+ headers_record = payload_record
+ elif headers_record != payload_record:
+ # close remainder of stream as this record only used for
+ # (already parsed) headers
+ headers_record.stream.close()
+
+ # special case: check if headers record is actually empty
+ # (eg empty revisit), then use headers from revisit
+ if not headers_record.status_headers.headers:
+ headers_record = payload_record
+
+ if not headers_record or not payload_record:
+ raise ArchiveLoadFailed('Could not load ' + str(cdx))
+
+ # ensure status line is valid from here
+ headers_record.status_headers.validate_statusline('204 No Content')
+
+ return (headers_record.status_headers, payload_record.stream)
+
+ def load_headers_and_payload(self, cdx, failed_files, cdx_loader):
"""
Resolve headers and payload for a given capture
In the simple case, headers and payload are in the same record.
@@ -53,27 +80,8 @@ class ResolvingLoader(object):
elif (has_orig):
payload_record = self._resolve_path_load(cdx, True, failed_files)
- # special case: set header to payload if old-style revisit
- # with missing header
- if not headers_record:
- headers_record = payload_record
- elif headers_record != payload_record:
- # close remainder of stream as this record only used for
- # (already parsed) headers
- headers_record.stream.close()
+ return headers_record, payload_record
- # special case: check if headers record is actually empty
- # (eg empty revisit), then use headers from revisit
- if not headers_record.status_headers.headers:
- headers_record = payload_record
-
- if not headers_record or not payload_record:
- raise ArchiveLoadFailed('Could not load ' + str(cdx))
-
- # ensure status line is valid from here
- headers_record.status_headers.validate_statusline('204 No Content')
-
- return (headers_record.status_headers, payload_record.stream)
def _resolve_path_load(self, cdx, is_original, failed_files):
"""
@@ -109,6 +117,9 @@ class ResolvingLoader(object):
if not possible_paths:
continue
+ if isinstance(possible_paths, str):
+ possible_paths = [possible_paths]
+
for path in possible_paths:
any_found = True
try:
diff --git a/pywb/warc/test/test_indexing.py b/pywb/warc/test/test_indexing.py
index 556a5c3a..42dd9e65 100644
--- a/pywb/warc/test/test_indexing.py
+++ b/pywb/warc/test/test_indexing.py
@@ -235,10 +235,10 @@ def test_sorted_warc_gz():
def cli_lines(cmds):
buff = BytesIO()
- orig = sys.stdout
- sys.stdout = buff
+ orig = sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else None
+ sys.stdout.buffer = buff
main(cmds)
- sys.stdout = orig
+ sys.stdout.buffer = orig
lines = buff.getvalue().rstrip().split(b'\n')
# print first, last, num lines
diff --git a/pywb/webapp/cdx_api_handler.py b/pywb/webapp/cdx_api_handler.py
index 980c16d3..1835647a 100644
--- a/pywb/webapp/cdx_api_handler.py
+++ b/pywb/webapp/cdx_api_handler.py
@@ -23,11 +23,8 @@ class CDXAPIHandler(BaseHandler):
cdx_iter = self.index_handler.load_cdx(wbrequest, params)
- def to_utf8():
- for cdx in cdx_iter:
- yield cdx.encode('utf-8')
-
- return WbResponse.text_stream(to_utf8())
+ return WbResponse.text_stream(cdx_iter,
+ content_type='text/plain')
@staticmethod
def extract_params_from_wsgi_env(env):
diff --git a/pywb/webapp/handlers.py b/pywb/webapp/handlers.py
index 90ae7eb5..1191f2ec 100644
--- a/pywb/webapp/handlers.py
+++ b/pywb/webapp/handlers.py
@@ -210,7 +210,7 @@ class StaticHandler(BaseHandler):
if 'wsgi.file_wrapper' in wbrequest.env:
reader = wbrequest.env['wsgi.file_wrapper'](data)
else:
- reader = iter(lambda: data.read(), '')
+ reader = iter(lambda: data.read(), b'')
content_type = 'application/octet-stream'
@@ -218,9 +218,9 @@ class StaticHandler(BaseHandler):
if guessed[0]:
content_type = guessed[0]
- return WbResponse.text_stream(reader,
- content_type=content_type,
- headers=headers)
+ return WbResponse.bin_stream(reader,
+ content_type=content_type,
+ headers=headers)
except IOError:
raise NotFoundException('Static File Not Found: ' +
diff --git a/pywb/webapp/live_rewrite_handler.py b/pywb/webapp/live_rewrite_handler.py
index 88564eef..9afdbf3e 100644
--- a/pywb/webapp/live_rewrite_handler.py
+++ b/pywb/webapp/live_rewrite_handler.py
@@ -59,7 +59,7 @@ class RewriteHandler(SearchPageWbUrlHandler):
except Exception as exc:
import traceback
- err_details = traceback.format_exc(exc)
+ err_details = traceback.format_exc()
print(err_details)
url = wbrequest.wb_url.url
@@ -174,7 +174,7 @@ class RewriteHandler(SearchPageWbUrlHandler):
@staticmethod
def create_cache_key(prefix, url):
hash_ = hashlib.md5()
- hash_.update(url)
+ hash_.update(url.encode('utf-8'))
key = hash_.hexdigest()
key = prefix + key
return key
diff --git a/pywb/webapp/views.py b/pywb/webapp/views.py
index c52a49ab..26a8bd51 100644
--- a/pywb/webapp/views.py
+++ b/pywb/webapp/views.py
@@ -136,7 +136,7 @@ class J2TemplateView(object):
template_result = self.render_to_string(**kwargs)
status = kwargs.get('status', '200 OK')
content_type = kwargs.get('content_type', 'text/html; charset=utf-8')
- return WbResponse.text_response(template_result.encode('utf-8'),
+ return WbResponse.text_response(template_result,
status=status,
content_type=content_type)
@@ -217,5 +217,6 @@ class J2HtmlCapturesView(J2TemplateView):
class MementoTimemapView(object):
def render_response(self, wbrequest, cdx_lines, **kwargs):
memento_lines = make_timemap(wbrequest, cdx_lines)
+
return WbResponse.text_stream(memento_lines,
content_type=LINK_FORMAT)
diff --git a/tests/fixture.py b/tests/fixture.py
index 16120790..cce1e457 100644
--- a/tests/fixture.py
+++ b/tests/fixture.py
@@ -20,6 +20,6 @@ class PrintReporter:
"""Reporter callback for replay view.
"""
def __call__(self, wbrequest, cdx, response):
- print wbrequest
- print cdx
+ print(wbrequest)
+ print(cdx)
pass
diff --git a/tests/memento_fixture.py b/tests/memento_fixture.py
index 1b650d48..150cbcf2 100644
--- a/tests/memento_fixture.py
+++ b/tests/memento_fixture.py
@@ -8,7 +8,7 @@ LINK_FORMAT = 'application/link-format'
class MementoMixin(object):
def get_links(self, resp):
- return map(lambda x: x.strip(), re.split(', (?![0-9])', resp.headers[LINK]))
+ return list(map(lambda x: x.strip(), re.split(', (?![0-9])', resp.headers[LINK])))
def make_timemap_link(self, url, coll='pywb'):
format_ = '
; rel="timemap"; type="{1}"'
diff --git a/tests/perms_fixture.py b/tests/perms_fixture.py
index 67fd74cd..739cf360 100644
--- a/tests/perms_fixture.py
+++ b/tests/perms_fixture.py
@@ -15,13 +15,14 @@ class TestExclusionPerms(Perms):
Perm Checker fixture to block a single url for testing
"""
# sample_archive has captures for this URLKEY
- URLKEY_EXCLUDED = 'org,iana)/_img/bookmark_icon.ico'
+ URLKEY_EXCLUDED = b'org,iana)/_img/bookmark_icon.ico'
def allow_url_lookup(self, urlkey):
"""
Return true/false if url (canonicalized url)
should be allowed
"""
+ print(urlkey)
if urlkey == self.URLKEY_EXCLUDED:
return False
diff --git a/tests/server_mock.py b/tests/server_mock.py
index 0ea7fd01..f15a9a6f 100644
--- a/tests/server_mock.py
+++ b/tests/server_mock.py
@@ -1,6 +1,6 @@
from pywb.webapp.pywb_init import create_wb_router
from pywb.framework.wsgi_wrappers import init_app
-from webtest import TestApp
+from webtest import TestApp, TestResponse
app = None
testapp = None
@@ -12,6 +12,14 @@ def make_app(config_file, pywb_router=create_wb_router):
testapp = TestApp(app)
+ class Resp(TestResponse):
+ def __init__(self, *args, **kwargs):
+ super(Resp, self).__init__(*args, **kwargs)
+ if self.headers.get('Content-Type'):
+ self.charset = 'utf-8'
+
+ TestApp.RequestClass.ResponseClass = Resp
+
return app, testapp
def make_setup_module(config, pywb_router=create_wb_router):
diff --git a/tests/test_auto_colls.py b/tests/test_auto_colls.py
index 95538ed3..81f3aa15 100644
--- a/tests/test_auto_colls.py
+++ b/tests/test_auto_colls.py
@@ -8,7 +8,7 @@ import webtest
import time
import threading
-from io import BytesIO
+from six import StringIO
from pywb.webapp.pywb_init import create_wb_router
from pywb.manager.manager import main
@@ -78,7 +78,7 @@ class TestManagedColls(object):
J2TemplateView.shared_jinja_env = None
#@patch('waitress.serve', lambda *args, **kwargs: None)
- @patch('BaseHTTPServer.HTTPServer.serve_forever', lambda *args, **kwargs: None)
+ @patch('six.moves.BaseHTTPServer.HTTPServer.serve_forever', lambda *args, **kwargs: None)
def test_run_cli(self):
""" test new wayback cli interface
test autoindex error before collections inited
@@ -144,7 +144,7 @@ class TestManagedColls(object):
# Spurrious file in collections
with open(os.path.join(self.root_dir, 'collections', 'blah'), 'w+b') as fh:
- fh.write('foo\n')
+ fh.write(b'foo\n')
with raises(IOError):
main(['add', 'test', 'non-existent-file.warc.gz'])
@@ -228,13 +228,14 @@ class TestManagedColls(object):
a_static = os.path.join(self.root_dir, 'collections', 'test', 'static', 'abc.js')
with open(a_static, 'w+b') as fh:
- fh.write('/* Some JS File */')
+ fh.write(b'/* Some JS File */')
self._create_app()
resp = self.testapp.get('/static/test/abc.js')
assert resp.status_int == 200
assert resp.content_type == 'application/javascript'
- assert '/* Some JS File */' in resp.body
+ resp.charset = 'utf-8'
+ assert '/* Some JS File */' in resp.text
def test_add_shared_static(self):
""" Test adding shared static file to root static/ dir, check access
@@ -242,13 +243,14 @@ class TestManagedColls(object):
a_static = os.path.join(self.root_dir, 'static', 'foo.css')
with open(a_static, 'w+b') as fh:
- fh.write('/* Some CSS File */')
+ fh.write(b'/* Some CSS File */')
self._create_app()
resp = self.testapp.get('/static/__shared/foo.css')
assert resp.status_int == 200
assert resp.content_type == 'text/css'
- assert '/* Some CSS File */' in resp.body
+ resp.charset = 'utf-8'
+ assert '/* Some CSS File */' in resp.text
def test_add_title_metadata_index_page(self):
""" Test adding title metadata to a collection, test
@@ -260,7 +262,8 @@ class TestManagedColls(object):
resp = self.testapp.get('/')
assert resp.status_int == 200
assert resp.content_type == 'text/html'
- assert '(Collection Title)' in resp.body
+ resp.charset = 'utf-8'
+ assert '(Collection Title)' in resp.text
def test_other_metadata_search_page(self):
main(['metadata', 'foo', '--set',
@@ -272,16 +275,17 @@ class TestManagedColls(object):
self._create_app()
resp = self.testapp.get('/foo/')
+ resp.charset = 'utf-8'
assert resp.status_int == 200
assert resp.content_type == 'text/html'
- assert 'Collection Title' in resp.body
+ assert 'Collection Title' in resp.text
- assert 'desc' in resp.body
- assert 'Some Description Text' in resp.body
+ assert 'desc' in resp.text
+ assert 'Some Description Text' in resp.text
- assert 'other' in resp.body
- assert 'custom value' in resp.body
+ assert 'other' in resp.text
+ assert 'custom value' in resp.text
def test_custom_template_search(self):
""" Test manually added custom search template search.html
@@ -289,13 +293,14 @@ class TestManagedColls(object):
a_static = os.path.join(self.root_dir, 'collections', 'test', 'templates', 'search.html')
with open(a_static, 'w+b') as fh:
- fh.write('pywb custom search page')
+ fh.write(b'pywb custom search page')
self._create_app()
resp = self.testapp.get('/test/')
+ resp.charset = 'utf-8'
assert resp.status_int == 200
assert resp.content_type == 'text/html'
- assert 'pywb custom search page' in resp.body
+ assert 'pywb custom search page' in resp.text
def test_custom_config(self):
""" Test custom created config.yaml which overrides auto settings
@@ -304,8 +309,8 @@ class TestManagedColls(object):
"""
config_path = os.path.join(self.root_dir, 'collections', 'test', 'config.yaml')
with open(config_path, 'w+b') as fh:
- fh.write('search_html: ./templates/custom_search.html\n')
- fh.write('index_paths: ./cdx2/\n')
+ fh.write(b'search_html: ./templates/custom_search.html\n')
+ fh.write(b'index_paths: ./cdx2/\n')
custom_search = os.path.join(self.root_dir, 'collections', 'test',
'templates', 'custom_search.html')
@@ -314,17 +319,18 @@ class TestManagedColls(object):
main(['metadata', 'test', '--set', 'some=value'])
with open(custom_search, 'w+b') as fh:
- fh.write('config.yaml overriden search page: ')
- fh.write('{{ wbrequest.user_metadata | tojson }}\n')
+ fh.write(b'config.yaml overriden search page: ')
+ fh.write(b'{{ wbrequest.user_metadata | tojson }}\n')
os.rename(os.path.join(self.root_dir, 'collections', 'test', INDEX_DIR),
os.path.join(self.root_dir, 'collections', 'test', 'cdx2'))
self._create_app()
resp = self.testapp.get('/test/')
+ resp.charset = 'utf-8'
assert resp.status_int == 200
assert resp.content_type == 'text/html'
- assert 'config.yaml overriden search page: {"some": "value"}' in resp.body
+ assert 'config.yaml overriden search page: {"some": "value"}' in resp.text
resp = self.testapp.get('/test/20140103030321/http://example.com?example=1')
assert resp.status_int == 200
@@ -352,14 +358,15 @@ class TestManagedColls(object):
with open(filename, 'r+b') as fh:
buf = fh.read()
- buf = buf.replace('