diff --git a/pywb/framework/wbrequestresponse.py b/pywb/framework/wbrequestresponse.py index 43680545..4a638382 100644 --- a/pywb/framework/wbrequestresponse.py +++ b/pywb/framework/wbrequestresponse.py @@ -109,13 +109,9 @@ class WbRequest(object): if not self.wb_url: return None - if not url: - url = self.wb_url.url - - if self.urlrewriter.rewrite_opts.get('rewrite_ascii_urls_only'): - return self.wb_url.url - else: - return self.wb_url.to_iri(url) + # pencode urls to force actual urls to appear, unless ascii_links_only set to true + pencode = self.urlrewriter.rewrite_opts.get('punycode_link_only', False) + return self.wb_url.get_url(url, pencode) def _is_ajax(self): value = self.env.get('HTTP_X_REQUESTED_WITH') diff --git a/pywb/rewrite/test/test_html_rewriter.py b/pywb/rewrite/test/test_html_rewriter.py index a59a7b13..7137b6fa 100644 --- a/pywb/rewrite/test/test_html_rewriter.py +++ b/pywb/rewrite/test/test_html_rewriter.py @@ -51,10 +51,14 @@ ur""" >>> parse('X') X -# Unicode ->>> parse('испытание') +# Unicode -- default with %-encoding +>>> parse(u'испытание') +испытание + +>>> parse(u'испытание', urlrewriter=urlrewriter_pencode) испытание + # Meta tag >>> parse('') @@ -168,7 +172,14 @@ from pywb.rewrite.html_rewriter import HTMLRewriter import pprint -urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/web/') +urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', + '/web/', + rewrite_opts=dict(punycode_links_only=False)) + +urlrewriter_pencode = UrlRewriter('20131226101010/http://example.com/some/path/index.html', + '/web/', + rewrite_opts=dict(punycode_links_only=True)) + no_base_canon_rewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/web/', @@ -176,6 +187,7 @@ no_base_canon_rewriter = UrlRewriter('20131226101010/http://example.com/some/pat rewrite_base=False)) def parse(data, head_insert=None, urlrewriter=urlrewriter): + data = data.encode('utf-8') parser = HTMLRewriter(urlrewriter, head_insert = head_insert) #data = data.decode('utf-8') result = parser.rewrite(data) + parser.close() diff --git a/pywb/rewrite/test/test_regex_rewriters.py b/pywb/rewrite/test/test_regex_rewriters.py index e598390c..8ec88fbb 100644 --- a/pywb/rewrite/test/test_regex_rewriters.py +++ b/pywb/rewrite/test/test_regex_rewriters.py @@ -33,6 +33,9 @@ r""" >>> _test_js(r'location = /http:\/\/example.com/abc.html/') 'WB_wombat_location = /http:\\/\\/example.com/abc.html/' +>>> _test_js(r'location = \/http:\/\/example.com\/abc.html\/') +'WB_wombat_location = \\/http:\\/\\/example.com\\/abc.html\\/' + >>> _test_js('"/location" == some_location_val; locations = location;') '"/location" == some_location_val; locations = WB_wombat_location;' diff --git a/pywb/rewrite/test/test_wburl.py b/pywb/rewrite/test/test_wburl.py index 78bf9764..0665d6d4 100644 --- a/pywb/rewrite/test/test_wburl.py +++ b/pywb/rewrite/test/test_wburl.py @@ -4,59 +4,39 @@ ur""" # Replay Urls # ====================== ->>> repr_unicode(WbUrl('20131010000506/example.com')) -('replay', '20131010000506', '', 'http://example.com', '20131010000506/http://example.com') +>>> repr(WbUrl('20131010000506/example.com')) +"('replay', '20131010000506', '', 'http://example.com', '20131010000506/http://example.com')" ->>> repr_unicode(WbUrl('20130102im_/https://example.com')) -('replay', '20130102', 'im_', 'https://example.com', '20130102im_/https://example.com') +>>> repr(WbUrl('20130102im_/https://example.com')) +"('replay', '20130102', 'im_', 'https://example.com', '20130102im_/https://example.com')" ->>> repr_unicode(WbUrl('20130102im_/https:/example.com')) -('replay', '20130102', 'im_', 'https://example.com', '20130102im_/https://example.com') +>>> repr(WbUrl('20130102im_/https:/example.com')) +"('replay', '20130102', 'im_', 'https://example.com', '20130102im_/https://example.com')" # Protocol agnostic convert to http ->>> repr_unicode(WbUrl('20130102im_///example.com')) -('replay', '20130102', 'im_', 'http://example.com', '20130102im_/http://example.com') +>>> repr(WbUrl('20130102im_///example.com')) +"('replay', '20130102', 'im_', 'http://example.com', '20130102im_/http://example.com')" ->>> repr_unicode(WbUrl('cs_/example.com')) -('latest_replay', '', 'cs_', 'http://example.com', 'cs_/http://example.com') +>>> repr(WbUrl('cs_/example.com')) +"('latest_replay', '', 'cs_', 'http://example.com', 'cs_/http://example.com')" ->>> repr_unicode(WbUrl('https://example.com/xyz')) -('latest_replay', '', '', 'https://example.com/xyz', 'https://example.com/xyz') +>>> repr(WbUrl('https://example.com/xyz')) +"('latest_replay', '', '', 'https://example.com/xyz', 'https://example.com/xyz')" ->>> repr_unicode(WbUrl('https:/example.com/xyz')) -('latest_replay', '', '', 'https://example.com/xyz', 'https://example.com/xyz') +>>> repr(WbUrl('https:/example.com/xyz')) +"('latest_replay', '', '', 'https://example.com/xyz', 'https://example.com/xyz')" ->>> repr_unicode(WbUrl('https://example.com/xyz?a=%2f&b=%2E')) -('latest_replay', '', '', 'https://example.com/xyz?a=/&b=.', 'https://example.com/xyz?a=/&b=.') +>>> repr(WbUrl('https://example.com/xyz?a=%2f&b=%2E')) +"('latest_replay', '', '', 'https://example.com/xyz?a=%2f&b=%2E', 'https://example.com/xyz?a=%2f&b=%2E')" # Test scheme partially encoded urls ->>> repr_unicode(WbUrl('https%3A//example.com/')) -('latest_replay', '', '', 'https://example.com/', 'https://example.com/') +>>> repr(WbUrl('https%3A//example.com/')) +"('latest_replay', '', '', 'https://example.com/', 'https://example.com/')" ->>> repr_unicode(WbUrl('2014/http%3A%2F%2Fexample.com/')) -('replay', '2014', '', 'http://example.com/', '2014/http://example.com/') - -# Test IDNs - -To IRI ->>> print(WbUrl.to_iri(u'https://пример.испытание')) -https://пример.испытание - ->>> print(WbUrl.to_iri(u'пример.испытание')) -пример.испытание - ->>> print(WbUrl.to_iri('http://' + quote_plus(u'пример.испытание'.encode('utf-8')))) -http://пример.испытание - ->>> print(WbUrl.to_iri(u'//пример.испытание/abc/испытание')) -//пример.испытание/abc/испытание - ->>> print(WbUrl.to_iri(quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc/' + quote_plus(u'пример'.encode('utf-8')))) -пример.испытание/abc/пример - ->>> print(WbUrl.to_iri('https://xn--e1afmkfd.xn--80akhbyknj4f')) -https://пример.испытание +>>> repr(WbUrl('2014/http%3A%2F%2Fexample.com/')) +"('replay', '2014', '', 'http://example.com/', '2014/http://example.com/')" +# ===== Test IDNs To URI >>> print(WbUrl.to_uri(u'https://пример.испытание')) @@ -69,73 +49,99 @@ xn--e1afmkfd.xn--80akhbyknj4f http://xn--e1afmkfd.xn--80akhbyknj4f >>> print(WbUrl.to_uri(u'//пример.испытание/abc/испытание')) -//xn--e1afmkfd.xn--80akhbyknj4f/abc%2F%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5 +//xn--e1afmkfd.xn--80akhbyknj4f/abc/%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5 >>> print(WbUrl.to_uri('//' + quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc/' + quote_plus(u'пример'.encode('utf-8')))) //xn--e1afmkfd.xn--80akhbyknj4f/abc/%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80 ->>> print(WbUrl.to_uri('https://xn--e1afmkfd.xn--80akhbyknj4f/abc/')) -https://xn--e1afmkfd.xn--80akhbyknj4f/abc/ +>>> print(WbUrl.to_uri('https://xn--e1afmkfd.xn--80akhbyknj4f/foo/bar?abc=def')) +https://xn--e1afmkfd.xn--80akhbyknj4f/foo/bar?abc=def +# truncated >>> print(WbUrl.to_uri('http://' + quote_plus(u'пример.испытание'.encode('utf-8'))[1:])) http://xn--d0-olcluwd.xn--80akhbyknj4f + +# To %-encoded host uri -- instead of punycode, %-encode host + +>>> print(to_uri_pencode(u'https://пример.испытание')) +https://%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80.%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5 + +>>> print(to_uri_pencode(u'пример.испытание')) +%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80.%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5 + +>>> print(to_uri_pencode('http://' + quote_plus(u'пример.испытание'.encode('utf-8')))) +http://%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80.%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5 + +>>> print(to_uri_pencode(u'//пример.испытание/abc/испытание')) +//%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80.%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5/abc/%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5 + +>>> print(to_uri_pencode(quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc/' + quote_plus(u'пример'.encode('utf-8')))) +%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80.%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5/abc/%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80 + +>>> print(to_uri_pencode('https://xn--e1afmkfd.xn--80akhbyknj4f/foo/bar?abc=def')) +https://%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80.%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5/foo/bar?abc=def + +>>> print(to_uri_pencode('http://' + quote_plus(u'пример.испытание'.encode('utf-8'))[1:])) +http://d0%D1%80%D0%B8%D0%BC%D0%B5%D1%80.%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5 + + # IRI representation ->>> repr_unicode(WbUrl(u'http://пример.испытание')) -('latest_replay', '', '', 'http://пример.испытание', 'http://пример.испытание') +>>> repr(WbUrl(u'http://пример.испытание')) +"('latest_replay', '', '', 'http://xn--e1afmkfd.xn--80akhbyknj4f', 'http://xn--e1afmkfd.xn--80akhbyknj4f')" ->>> repr_unicode(WbUrl(u'https://пример.испытание/abc/')) -('latest_replay', '', '', 'https://пример.испытание/abc/', 'https://пример.испытание/abc/') +>>> repr(WbUrl(u'https://пример.испытание/abc/def_ghi/')) +"('latest_replay', '', '', 'https://xn--e1afmkfd.xn--80akhbyknj4f/abc/def_ghi/', 'https://xn--e1afmkfd.xn--80akhbyknj4f/abc/def_ghi/')" ->>> repr_unicode(WbUrl(u'//пример.испытание/abc/')) -('latest_replay', '', '', 'http://пример.испытание/abc/', 'http://пример.испытание/abc/') +>>> repr(WbUrl(u'//пример.испытание/abc/')) +"('latest_replay', '', '', 'http://xn--e1afmkfd.xn--80akhbyknj4f/abc/', 'http://xn--e1afmkfd.xn--80akhbyknj4f/abc/')" ->>> repr_unicode(WbUrl(u'2014id_/https://пример.испытание/abc')) -('replay', '2014', 'id_', 'https://пример.испытание/abc', '2014id_/https://пример.испытание/abc') +>>> repr(WbUrl(u'2014id_/https://пример.испытание/abc')) +"('replay', '2014', 'id_', 'https://xn--e1afmkfd.xn--80akhbyknj4f/abc', '2014id_/https://xn--e1afmkfd.xn--80akhbyknj4f/abc')" # percent-encoded form (as sent by browser usually) ->>> repr_unicode(WbUrl('2014id_/http://' + quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc')) -('replay', '2014', 'id_', 'http://пример.испытание/abc', '2014id_/http://пример.испытание/abc') +>>> repr(WbUrl('2014id_/http://' + quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc')) +"('replay', '2014', 'id_', 'http://xn--e1afmkfd.xn--80akhbyknj4f/abc', '2014id_/http://xn--e1afmkfd.xn--80akhbyknj4f/abc')" # percent-encoded form -- scheme relative ->>> repr_unicode(WbUrl('2014id_///' + quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc')) -('replay', '2014', 'id_', 'http://пример.испытание/abc', '2014id_/http://пример.испытание/abc') +>>> repr(WbUrl('2014id_///' + quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc')) +"('replay', '2014', 'id_', 'http://xn--e1afmkfd.xn--80akhbyknj4f/abc', '2014id_/http://xn--e1afmkfd.xn--80akhbyknj4f/abc')" # invalid: truncated and superfluous '%', ignore invalid (no exception) ->>> repr_unicode(WbUrl('2014id_/http://' + quote_plus(u'пример.испытание'.encode('utf-8'))[1:] + '%' + '/abc')) -('replay', '2014', 'id_', 'http://d0ример.испытание%/abc', '2014id_/http://d0ример.испытание%/abc') +>>> repr(WbUrl('2014id_/http://' + quote_plus(u'пример.испытание'.encode('utf-8'))[1:] + '%' + '/abc')) +"('replay', '2014', 'id_', 'http://xn--d0-olcluwd.xn--%-7sbpkb3ampk3g/abc', '2014id_/http://xn--d0-olcluwd.xn--%-7sbpkb3ampk3g/abc')" # Query Urls # ====================== ->>> repr_unicode(WbUrl('*/http://example.com/abc?def=a')) -('query', '', '', 'http://example.com/abc?def=a', '*/http://example.com/abc?def=a') +>>> repr(WbUrl('*/http://example.com/abc?def=a')) +"('query', '', '', 'http://example.com/abc?def=a', '*/http://example.com/abc?def=a')" ->>> repr_unicode(WbUrl('*/http://example.com/abc?def=a*')) -('url_query', '', '', 'http://example.com/abc?def=a', '*/http://example.com/abc?def=a*') +>>> repr(WbUrl('*/http://example.com/abc?def=a*')) +"('url_query', '', '', 'http://example.com/abc?def=a', '*/http://example.com/abc?def=a*')" ->>> repr_unicode(WbUrl('2010*/http://example.com/abc?def=a')) -('query', '2010', '', 'http://example.com/abc?def=a', '2010*/http://example.com/abc?def=a') +>>> repr(WbUrl('2010*/http://example.com/abc?def=a')) +"('query', '2010', '', 'http://example.com/abc?def=a', '2010*/http://example.com/abc?def=a')" # timestamp range query ->>> repr_unicode(WbUrl('2009-2015*/http://example.com/abc?def=a')) -('query', '2009', '', 'http://example.com/abc?def=a', '2009-2015*/http://example.com/abc?def=a') +>>> repr(WbUrl('2009-2015*/http://example.com/abc?def=a')) +"('query', '2009', '', 'http://example.com/abc?def=a', '2009-2015*/http://example.com/abc?def=a')" ->>> repr_unicode(WbUrl('json/*/http://example.com/abc?def=a')) -('query', '', 'json', 'http://example.com/abc?def=a', 'json/*/http://example.com/abc?def=a') +>>> repr(WbUrl('json/*/http://example.com/abc?def=a')) +"('query', '', 'json', 'http://example.com/abc?def=a', 'json/*/http://example.com/abc?def=a')" ->>> repr_unicode(WbUrl('timemap-link/2011*/http://example.com/abc?def=a')) -('query', '2011', 'timemap-link', 'http://example.com/abc?def=a', 'timemap-link/2011*/http://example.com/abc?def=a') +>>> repr(WbUrl('timemap-link/2011*/http://example.com/abc?def=a')) +"('query', '2011', 'timemap-link', 'http://example.com/abc?def=a', 'timemap-link/2011*/http://example.com/abc?def=a')" # strip off repeated, likely scheme-agnostic, slashes altogether ->>> repr_unicode(WbUrl('///example.com')) -('latest_replay', '', '', 'http://example.com', 'http://example.com') +>>> repr(WbUrl('///example.com')) +"('latest_replay', '', '', 'http://example.com', 'http://example.com')" ->>> repr_unicode(WbUrl('//example.com/')) -('latest_replay', '', '', 'http://example.com/', 'http://example.com/') +>>> repr(WbUrl('//example.com/')) +"('latest_replay', '', '', 'http://example.com/', 'http://example.com/')" ->>> repr_unicode(WbUrl('/example.com/')) -('latest_replay', '', '', 'http://example.com/', 'http://example.com/') +>>> repr(WbUrl('/example.com/')) +"('latest_replay', '', '', 'http://example.com/', 'http://example.com/')" # Is_ Tests >>> u = WbUrl('*/http://example.com/abc?def=a*') @@ -156,7 +162,7 @@ True # Error Urls # ====================== # no longer rejecting this here -#>>> x = WbUrl('/#$%#/') +#>>> x = WbUrl('/#$%#/')" Traceback (most recent call last): Exception: Bad Request Url: http://#$%#/ @@ -180,14 +186,8 @@ from urllib import quote_plus, unquote_plus from StringIO import StringIO -def repr_unicode(wburl): - buff = StringIO() - buff.write("('{0}', '{1}', '{2}', '".format(wburl.type, wburl.timestamp, wburl.mod)) - buff.write(WbUrl.to_iri(wburl.url)) - buff.write("', '") - buff.write(wburl.to_str(iri=True)) - buff.write("')") - print(buff.getvalue()) +def to_uri_pencode(url): + return WbUrl.percent_encode_host(WbUrl.to_uri(url)) if __name__ == "__main__": diff --git a/pywb/rewrite/url_rewriter.py b/pywb/rewrite/url_rewriter.py index 84d035ea..37a6e095 100644 --- a/pywb/rewrite/url_rewriter.py +++ b/pywb/rewrite/url_rewriter.py @@ -52,8 +52,8 @@ class UrlRewriter(object): is_abs = True url = 'http:' + url - # always convert any unicode urls to punycode - ascii_urls_only = self.rewrite_opts.get('rewrite_ascii_urls_only', False) + # convert host to %-encoding instead of default punycode + peh = not self.rewrite_opts.get('punycode_links_only', False) # Optimized rewriter for # -rel urls that don't start with / and @@ -73,13 +73,7 @@ class UrlRewriter(object): final_url = self.prefix + wburl.to_str(mod=mod, url=new_url, - iri=not ascii_urls_only) - if not ascii_urls_only: - try: - final_url = final_url.encode('utf-8') - except UnicodeDecodeError: - pass - + percent_encode=peh) return final_url def get_new_url(self, **kwargs): diff --git a/pywb/rewrite/wburl.py b/pywb/rewrite/wburl.py index 69c81ebb..50907791 100644 --- a/pywb/rewrite/wburl.py +++ b/pywb/rewrite/wburl.py @@ -87,56 +87,52 @@ class WbUrl(BaseWbUrl): DEFAULT_SCHEME = 'http://' - #PARTIAL_ENC_RX = re.compile('(https?%3A)?(%2F%2F)?', re.I) FIRST_PATH = re.compile('(? 1: - url = scheme_dom[0] + u'/' + dom + url = scheme_dom[0] + '/' + dom else: url = dom if len(parts) > 1: - url += u'/' + parts[1] + url += '/' + parts[1] return url - @staticmethod - def to_uri(url, was_uni=False): - #if not was_uni: - # if isinstance(url, unicode): - # was_uni = True - - #if not was_uni and not '%' in url: - # return url - + def to_uri(url): + """ Converts a url to an ascii %-encoded form + where: + - scheme is ascii, + - host is punycode, + - and remainder is %-encoded + Not using urlsplit to also decode partially encoded + scheme urls + """ parts = WbUrl.FIRST_PATH.split(url, 1) - #if not was_uni and not '%' in parts[0]: - # return url - scheme_dom = urllib.unquote_plus(parts[0]) if isinstance(scheme_dom, str): @@ -146,18 +142,18 @@ class WbUrl(BaseWbUrl): scheme_dom = scheme_dom.decode('utf-8', 'ignore') scheme_dom = scheme_dom.rsplit('/', 1) - dom = scheme_dom[-1] + domain = scheme_dom[-1] - dom = dom.encode('idna') + domain = domain.encode('idna') if len(scheme_dom) > 1: - url = scheme_dom[0] + '/' + dom + url = scheme_dom[0].encode('utf-8') + '/' + domain else: - url = dom + url = domain if len(parts) > 1: if isinstance(parts[1], unicode): - url += '/' + urllib.quote_plus(parts[1].encode('utf-8')) + url += '/' + urllib.quote(parts[1].encode('utf-8')) else: url += '/' + parts[1] @@ -168,10 +164,9 @@ class WbUrl(BaseWbUrl): def __init__(self, orig_url): super(WbUrl, self).__init__() - was_uni = False if isinstance(orig_url, unicode): orig_url = orig_url.encode('utf-8') - was_uni = True + orig_url = urllib.quote(orig_url) self.original_url = orig_url @@ -179,7 +174,7 @@ class WbUrl(BaseWbUrl): if not self._init_replay(orig_url): raise Exception('Invalid WbUrl: ', orig_url) - self.url = WbUrl.to_uri(self.url, was_uni) + self.url = WbUrl.to_uri(self.url) # protocol agnostic url -> http:// # no protocol -> http:// @@ -249,6 +244,18 @@ class WbUrl(BaseWbUrl): self.url = new_url return self.url + def get_url(self, url=None, percent_encode=False): + if url is not None: + url = WbUrl.to_uri(url) + else: + url = self.url + + if percent_encode: + url = WbUrl.percent_encode_host(url) + + return url + + # Str Representation # ==================== def to_str(self, **overrides): @@ -256,9 +263,9 @@ class WbUrl(BaseWbUrl): mod = overrides.get('mod', self.mod) timestamp = overrides.get('timestamp', self.timestamp) end_timestamp = overrides.get('end_timestamp', self.end_timestamp) - url = overrides.get('url', self.url) - if overrides.get('iri'): - url = WbUrl.to_iri(url) + + url = self.get_url(overrides.get('url'), + overrides.get('percent_encode', False)) return self.to_wburl_str(url=url, type=type_, diff --git a/pywb/static/wombat.js b/pywb/static/wombat.js index 6a2827b9..3e93b112 100644 --- a/pywb/static/wombat.js +++ b/pywb/static/wombat.js @@ -323,7 +323,7 @@ _WBWombat = (function() { return url + this._orig_loc.hash; } - href = parser.href; + href = parser.getAttribute("href"); var hash = parser.hash; if (hash) { diff --git a/pywb/webapp/replay_views.py b/pywb/webapp/replay_views.py index 5fe22d26..2bdcf730 100644 --- a/pywb/webapp/replay_views.py +++ b/pywb/webapp/replay_views.py @@ -241,6 +241,7 @@ class ReplayView(object): else: statusline = '302 Internal Redirect' + new_url = new_url.encode('utf-8') status_headers = StatusAndHeaders(statusline, [('Location', new_url)])