diff --git a/pywb/framework/wbrequestresponse.py b/pywb/framework/wbrequestresponse.py
index 43680545..4a638382 100644
--- a/pywb/framework/wbrequestresponse.py
+++ b/pywb/framework/wbrequestresponse.py
@@ -109,13 +109,9 @@ class WbRequest(object):
if not self.wb_url:
return None
- if not url:
- url = self.wb_url.url
-
- if self.urlrewriter.rewrite_opts.get('rewrite_ascii_urls_only'):
- return self.wb_url.url
- else:
- return self.wb_url.to_iri(url)
+ # pencode urls to force actual urls to appear, unless ascii_links_only set to true
+ pencode = self.urlrewriter.rewrite_opts.get('punycode_link_only', False)
+ return self.wb_url.get_url(url, pencode)
def _is_ajax(self):
value = self.env.get('HTTP_X_REQUESTED_WITH')
diff --git a/pywb/rewrite/test/test_html_rewriter.py b/pywb/rewrite/test/test_html_rewriter.py
index a59a7b13..7137b6fa 100644
--- a/pywb/rewrite/test/test_html_rewriter.py
+++ b/pywb/rewrite/test/test_html_rewriter.py
@@ -51,10 +51,14 @@ ur"""
>>> parse('X')
X
-# Unicode
->>> parse('испытание')
+# Unicode -- default with %-encoding
+>>> parse(u'испытание')
+испытание
+
+>>> parse(u'испытание', urlrewriter=urlrewriter_pencode)
испытание
+
# Meta tag
>>> parse('')
@@ -168,7 +172,14 @@ from pywb.rewrite.html_rewriter import HTMLRewriter
import pprint
-urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/web/')
+urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html',
+ '/web/',
+ rewrite_opts=dict(punycode_links_only=False))
+
+urlrewriter_pencode = UrlRewriter('20131226101010/http://example.com/some/path/index.html',
+ '/web/',
+ rewrite_opts=dict(punycode_links_only=True))
+
no_base_canon_rewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html',
'/web/',
@@ -176,6 +187,7 @@ no_base_canon_rewriter = UrlRewriter('20131226101010/http://example.com/some/pat
rewrite_base=False))
def parse(data, head_insert=None, urlrewriter=urlrewriter):
+ data = data.encode('utf-8')
parser = HTMLRewriter(urlrewriter, head_insert = head_insert)
#data = data.decode('utf-8')
result = parser.rewrite(data) + parser.close()
diff --git a/pywb/rewrite/test/test_regex_rewriters.py b/pywb/rewrite/test/test_regex_rewriters.py
index e598390c..8ec88fbb 100644
--- a/pywb/rewrite/test/test_regex_rewriters.py
+++ b/pywb/rewrite/test/test_regex_rewriters.py
@@ -33,6 +33,9 @@ r"""
>>> _test_js(r'location = /http:\/\/example.com/abc.html/')
'WB_wombat_location = /http:\\/\\/example.com/abc.html/'
+>>> _test_js(r'location = \/http:\/\/example.com\/abc.html\/')
+'WB_wombat_location = \\/http:\\/\\/example.com\\/abc.html\\/'
+
>>> _test_js('"/location" == some_location_val; locations = location;')
'"/location" == some_location_val; locations = WB_wombat_location;'
diff --git a/pywb/rewrite/test/test_wburl.py b/pywb/rewrite/test/test_wburl.py
index 78bf9764..0665d6d4 100644
--- a/pywb/rewrite/test/test_wburl.py
+++ b/pywb/rewrite/test/test_wburl.py
@@ -4,59 +4,39 @@
ur"""
# Replay Urls
# ======================
->>> repr_unicode(WbUrl('20131010000506/example.com'))
-('replay', '20131010000506', '', 'http://example.com', '20131010000506/http://example.com')
+>>> repr(WbUrl('20131010000506/example.com'))
+"('replay', '20131010000506', '', 'http://example.com', '20131010000506/http://example.com')"
->>> repr_unicode(WbUrl('20130102im_/https://example.com'))
-('replay', '20130102', 'im_', 'https://example.com', '20130102im_/https://example.com')
+>>> repr(WbUrl('20130102im_/https://example.com'))
+"('replay', '20130102', 'im_', 'https://example.com', '20130102im_/https://example.com')"
->>> repr_unicode(WbUrl('20130102im_/https:/example.com'))
-('replay', '20130102', 'im_', 'https://example.com', '20130102im_/https://example.com')
+>>> repr(WbUrl('20130102im_/https:/example.com'))
+"('replay', '20130102', 'im_', 'https://example.com', '20130102im_/https://example.com')"
# Protocol agnostic convert to http
->>> repr_unicode(WbUrl('20130102im_///example.com'))
-('replay', '20130102', 'im_', 'http://example.com', '20130102im_/http://example.com')
+>>> repr(WbUrl('20130102im_///example.com'))
+"('replay', '20130102', 'im_', 'http://example.com', '20130102im_/http://example.com')"
->>> repr_unicode(WbUrl('cs_/example.com'))
-('latest_replay', '', 'cs_', 'http://example.com', 'cs_/http://example.com')
+>>> repr(WbUrl('cs_/example.com'))
+"('latest_replay', '', 'cs_', 'http://example.com', 'cs_/http://example.com')"
->>> repr_unicode(WbUrl('https://example.com/xyz'))
-('latest_replay', '', '', 'https://example.com/xyz', 'https://example.com/xyz')
+>>> repr(WbUrl('https://example.com/xyz'))
+"('latest_replay', '', '', 'https://example.com/xyz', 'https://example.com/xyz')"
->>> repr_unicode(WbUrl('https:/example.com/xyz'))
-('latest_replay', '', '', 'https://example.com/xyz', 'https://example.com/xyz')
+>>> repr(WbUrl('https:/example.com/xyz'))
+"('latest_replay', '', '', 'https://example.com/xyz', 'https://example.com/xyz')"
->>> repr_unicode(WbUrl('https://example.com/xyz?a=%2f&b=%2E'))
-('latest_replay', '', '', 'https://example.com/xyz?a=/&b=.', 'https://example.com/xyz?a=/&b=.')
+>>> repr(WbUrl('https://example.com/xyz?a=%2f&b=%2E'))
+"('latest_replay', '', '', 'https://example.com/xyz?a=%2f&b=%2E', 'https://example.com/xyz?a=%2f&b=%2E')"
# Test scheme partially encoded urls
->>> repr_unicode(WbUrl('https%3A//example.com/'))
-('latest_replay', '', '', 'https://example.com/', 'https://example.com/')
+>>> repr(WbUrl('https%3A//example.com/'))
+"('latest_replay', '', '', 'https://example.com/', 'https://example.com/')"
->>> repr_unicode(WbUrl('2014/http%3A%2F%2Fexample.com/'))
-('replay', '2014', '', 'http://example.com/', '2014/http://example.com/')
-
-# Test IDNs
-
-To IRI
->>> print(WbUrl.to_iri(u'https://пример.испытание'))
-https://пример.испытание
-
->>> print(WbUrl.to_iri(u'пример.испытание'))
-пример.испытание
-
->>> print(WbUrl.to_iri('http://' + quote_plus(u'пример.испытание'.encode('utf-8'))))
-http://пример.испытание
-
->>> print(WbUrl.to_iri(u'//пример.испытание/abc/испытание'))
-//пример.испытание/abc/испытание
-
->>> print(WbUrl.to_iri(quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc/' + quote_plus(u'пример'.encode('utf-8'))))
-пример.испытание/abc/пример
-
->>> print(WbUrl.to_iri('https://xn--e1afmkfd.xn--80akhbyknj4f'))
-https://пример.испытание
+>>> repr(WbUrl('2014/http%3A%2F%2Fexample.com/'))
+"('replay', '2014', '', 'http://example.com/', '2014/http://example.com/')"
+# ===== Test IDNs
To URI
>>> print(WbUrl.to_uri(u'https://пример.испытание'))
@@ -69,73 +49,99 @@ xn--e1afmkfd.xn--80akhbyknj4f
http://xn--e1afmkfd.xn--80akhbyknj4f
>>> print(WbUrl.to_uri(u'//пример.испытание/abc/испытание'))
-//xn--e1afmkfd.xn--80akhbyknj4f/abc%2F%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5
+//xn--e1afmkfd.xn--80akhbyknj4f/abc/%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5
>>> print(WbUrl.to_uri('//' + quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc/' + quote_plus(u'пример'.encode('utf-8'))))
//xn--e1afmkfd.xn--80akhbyknj4f/abc/%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80
->>> print(WbUrl.to_uri('https://xn--e1afmkfd.xn--80akhbyknj4f/abc/'))
-https://xn--e1afmkfd.xn--80akhbyknj4f/abc/
+>>> print(WbUrl.to_uri('https://xn--e1afmkfd.xn--80akhbyknj4f/foo/bar?abc=def'))
+https://xn--e1afmkfd.xn--80akhbyknj4f/foo/bar?abc=def
+# truncated
>>> print(WbUrl.to_uri('http://' + quote_plus(u'пример.испытание'.encode('utf-8'))[1:]))
http://xn--d0-olcluwd.xn--80akhbyknj4f
+
+# To %-encoded host uri -- instead of punycode, %-encode host
+
+>>> print(to_uri_pencode(u'https://пример.испытание'))
+https://%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80.%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5
+
+>>> print(to_uri_pencode(u'пример.испытание'))
+%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80.%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5
+
+>>> print(to_uri_pencode('http://' + quote_plus(u'пример.испытание'.encode('utf-8'))))
+http://%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80.%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5
+
+>>> print(to_uri_pencode(u'//пример.испытание/abc/испытание'))
+//%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80.%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5/abc/%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5
+
+>>> print(to_uri_pencode(quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc/' + quote_plus(u'пример'.encode('utf-8'))))
+%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80.%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5/abc/%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80
+
+>>> print(to_uri_pencode('https://xn--e1afmkfd.xn--80akhbyknj4f/foo/bar?abc=def'))
+https://%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80.%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5/foo/bar?abc=def
+
+>>> print(to_uri_pencode('http://' + quote_plus(u'пример.испытание'.encode('utf-8'))[1:]))
+http://d0%D1%80%D0%B8%D0%BC%D0%B5%D1%80.%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5
+
+
# IRI representation
->>> repr_unicode(WbUrl(u'http://пример.испытание'))
-('latest_replay', '', '', 'http://пример.испытание', 'http://пример.испытание')
+>>> repr(WbUrl(u'http://пример.испытание'))
+"('latest_replay', '', '', 'http://xn--e1afmkfd.xn--80akhbyknj4f', 'http://xn--e1afmkfd.xn--80akhbyknj4f')"
->>> repr_unicode(WbUrl(u'https://пример.испытание/abc/'))
-('latest_replay', '', '', 'https://пример.испытание/abc/', 'https://пример.испытание/abc/')
+>>> repr(WbUrl(u'https://пример.испытание/abc/def_ghi/'))
+"('latest_replay', '', '', 'https://xn--e1afmkfd.xn--80akhbyknj4f/abc/def_ghi/', 'https://xn--e1afmkfd.xn--80akhbyknj4f/abc/def_ghi/')"
->>> repr_unicode(WbUrl(u'//пример.испытание/abc/'))
-('latest_replay', '', '', 'http://пример.испытание/abc/', 'http://пример.испытание/abc/')
+>>> repr(WbUrl(u'//пример.испытание/abc/'))
+"('latest_replay', '', '', 'http://xn--e1afmkfd.xn--80akhbyknj4f/abc/', 'http://xn--e1afmkfd.xn--80akhbyknj4f/abc/')"
->>> repr_unicode(WbUrl(u'2014id_/https://пример.испытание/abc'))
-('replay', '2014', 'id_', 'https://пример.испытание/abc', '2014id_/https://пример.испытание/abc')
+>>> repr(WbUrl(u'2014id_/https://пример.испытание/abc'))
+"('replay', '2014', 'id_', 'https://xn--e1afmkfd.xn--80akhbyknj4f/abc', '2014id_/https://xn--e1afmkfd.xn--80akhbyknj4f/abc')"
# percent-encoded form (as sent by browser usually)
->>> repr_unicode(WbUrl('2014id_/http://' + quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc'))
-('replay', '2014', 'id_', 'http://пример.испытание/abc', '2014id_/http://пример.испытание/abc')
+>>> repr(WbUrl('2014id_/http://' + quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc'))
+"('replay', '2014', 'id_', 'http://xn--e1afmkfd.xn--80akhbyknj4f/abc', '2014id_/http://xn--e1afmkfd.xn--80akhbyknj4f/abc')"
# percent-encoded form -- scheme relative
->>> repr_unicode(WbUrl('2014id_///' + quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc'))
-('replay', '2014', 'id_', 'http://пример.испытание/abc', '2014id_/http://пример.испытание/abc')
+>>> repr(WbUrl('2014id_///' + quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc'))
+"('replay', '2014', 'id_', 'http://xn--e1afmkfd.xn--80akhbyknj4f/abc', '2014id_/http://xn--e1afmkfd.xn--80akhbyknj4f/abc')"
# invalid: truncated and superfluous '%', ignore invalid (no exception)
->>> repr_unicode(WbUrl('2014id_/http://' + quote_plus(u'пример.испытание'.encode('utf-8'))[1:] + '%' + '/abc'))
-('replay', '2014', 'id_', 'http://d0ример.испытание%/abc', '2014id_/http://d0ример.испытание%/abc')
+>>> repr(WbUrl('2014id_/http://' + quote_plus(u'пример.испытание'.encode('utf-8'))[1:] + '%' + '/abc'))
+"('replay', '2014', 'id_', 'http://xn--d0-olcluwd.xn--%-7sbpkb3ampk3g/abc', '2014id_/http://xn--d0-olcluwd.xn--%-7sbpkb3ampk3g/abc')"
# Query Urls
# ======================
->>> repr_unicode(WbUrl('*/http://example.com/abc?def=a'))
-('query', '', '', 'http://example.com/abc?def=a', '*/http://example.com/abc?def=a')
+>>> repr(WbUrl('*/http://example.com/abc?def=a'))
+"('query', '', '', 'http://example.com/abc?def=a', '*/http://example.com/abc?def=a')"
->>> repr_unicode(WbUrl('*/http://example.com/abc?def=a*'))
-('url_query', '', '', 'http://example.com/abc?def=a', '*/http://example.com/abc?def=a*')
+>>> repr(WbUrl('*/http://example.com/abc?def=a*'))
+"('url_query', '', '', 'http://example.com/abc?def=a', '*/http://example.com/abc?def=a*')"
->>> repr_unicode(WbUrl('2010*/http://example.com/abc?def=a'))
-('query', '2010', '', 'http://example.com/abc?def=a', '2010*/http://example.com/abc?def=a')
+>>> repr(WbUrl('2010*/http://example.com/abc?def=a'))
+"('query', '2010', '', 'http://example.com/abc?def=a', '2010*/http://example.com/abc?def=a')"
# timestamp range query
->>> repr_unicode(WbUrl('2009-2015*/http://example.com/abc?def=a'))
-('query', '2009', '', 'http://example.com/abc?def=a', '2009-2015*/http://example.com/abc?def=a')
+>>> repr(WbUrl('2009-2015*/http://example.com/abc?def=a'))
+"('query', '2009', '', 'http://example.com/abc?def=a', '2009-2015*/http://example.com/abc?def=a')"
->>> repr_unicode(WbUrl('json/*/http://example.com/abc?def=a'))
-('query', '', 'json', 'http://example.com/abc?def=a', 'json/*/http://example.com/abc?def=a')
+>>> repr(WbUrl('json/*/http://example.com/abc?def=a'))
+"('query', '', 'json', 'http://example.com/abc?def=a', 'json/*/http://example.com/abc?def=a')"
->>> repr_unicode(WbUrl('timemap-link/2011*/http://example.com/abc?def=a'))
-('query', '2011', 'timemap-link', 'http://example.com/abc?def=a', 'timemap-link/2011*/http://example.com/abc?def=a')
+>>> repr(WbUrl('timemap-link/2011*/http://example.com/abc?def=a'))
+"('query', '2011', 'timemap-link', 'http://example.com/abc?def=a', 'timemap-link/2011*/http://example.com/abc?def=a')"
# strip off repeated, likely scheme-agnostic, slashes altogether
->>> repr_unicode(WbUrl('///example.com'))
-('latest_replay', '', '', 'http://example.com', 'http://example.com')
+>>> repr(WbUrl('///example.com'))
+"('latest_replay', '', '', 'http://example.com', 'http://example.com')"
->>> repr_unicode(WbUrl('//example.com/'))
-('latest_replay', '', '', 'http://example.com/', 'http://example.com/')
+>>> repr(WbUrl('//example.com/'))
+"('latest_replay', '', '', 'http://example.com/', 'http://example.com/')"
->>> repr_unicode(WbUrl('/example.com/'))
-('latest_replay', '', '', 'http://example.com/', 'http://example.com/')
+>>> repr(WbUrl('/example.com/'))
+"('latest_replay', '', '', 'http://example.com/', 'http://example.com/')"
# Is_ Tests
>>> u = WbUrl('*/http://example.com/abc?def=a*')
@@ -156,7 +162,7 @@ True
# Error Urls
# ======================
# no longer rejecting this here
-#>>> x = WbUrl('/#$%#/')
+#>>> x = WbUrl('/#$%#/')"
Traceback (most recent call last):
Exception: Bad Request Url: http://#$%#/
@@ -180,14 +186,8 @@ from urllib import quote_plus, unquote_plus
from StringIO import StringIO
-def repr_unicode(wburl):
- buff = StringIO()
- buff.write("('{0}', '{1}', '{2}', '".format(wburl.type, wburl.timestamp, wburl.mod))
- buff.write(WbUrl.to_iri(wburl.url))
- buff.write("', '")
- buff.write(wburl.to_str(iri=True))
- buff.write("')")
- print(buff.getvalue())
+def to_uri_pencode(url):
+ return WbUrl.percent_encode_host(WbUrl.to_uri(url))
if __name__ == "__main__":
diff --git a/pywb/rewrite/url_rewriter.py b/pywb/rewrite/url_rewriter.py
index 84d035ea..37a6e095 100644
--- a/pywb/rewrite/url_rewriter.py
+++ b/pywb/rewrite/url_rewriter.py
@@ -52,8 +52,8 @@ class UrlRewriter(object):
is_abs = True
url = 'http:' + url
- # always convert any unicode urls to punycode
- ascii_urls_only = self.rewrite_opts.get('rewrite_ascii_urls_only', False)
+ # convert host to %-encoding instead of default punycode
+ peh = not self.rewrite_opts.get('punycode_links_only', False)
# Optimized rewriter for
# -rel urls that don't start with / and
@@ -73,13 +73,7 @@ class UrlRewriter(object):
final_url = self.prefix + wburl.to_str(mod=mod,
url=new_url,
- iri=not ascii_urls_only)
- if not ascii_urls_only:
- try:
- final_url = final_url.encode('utf-8')
- except UnicodeDecodeError:
- pass
-
+ percent_encode=peh)
return final_url
def get_new_url(self, **kwargs):
diff --git a/pywb/rewrite/wburl.py b/pywb/rewrite/wburl.py
index 69c81ebb..50907791 100644
--- a/pywb/rewrite/wburl.py
+++ b/pywb/rewrite/wburl.py
@@ -87,56 +87,52 @@ class WbUrl(BaseWbUrl):
DEFAULT_SCHEME = 'http://'
- #PARTIAL_ENC_RX = re.compile('(https?%3A)?(%2F%2F)?', re.I)
FIRST_PATH = re.compile('(? 1:
- url = scheme_dom[0] + u'/' + dom
+ url = scheme_dom[0] + '/' + dom
else:
url = dom
if len(parts) > 1:
- url += u'/' + parts[1]
+ url += '/' + parts[1]
return url
-
@staticmethod
- def to_uri(url, was_uni=False):
- #if not was_uni:
- # if isinstance(url, unicode):
- # was_uni = True
-
- #if not was_uni and not '%' in url:
- # return url
-
+ def to_uri(url):
+ """ Converts a url to an ascii %-encoded form
+ where:
+ - scheme is ascii,
+ - host is punycode,
+ - and remainder is %-encoded
+ Not using urlsplit to also decode partially encoded
+ scheme urls
+ """
parts = WbUrl.FIRST_PATH.split(url, 1)
- #if not was_uni and not '%' in parts[0]:
- # return url
-
scheme_dom = urllib.unquote_plus(parts[0])
if isinstance(scheme_dom, str):
@@ -146,18 +142,18 @@ class WbUrl(BaseWbUrl):
scheme_dom = scheme_dom.decode('utf-8', 'ignore')
scheme_dom = scheme_dom.rsplit('/', 1)
- dom = scheme_dom[-1]
+ domain = scheme_dom[-1]
- dom = dom.encode('idna')
+ domain = domain.encode('idna')
if len(scheme_dom) > 1:
- url = scheme_dom[0] + '/' + dom
+ url = scheme_dom[0].encode('utf-8') + '/' + domain
else:
- url = dom
+ url = domain
if len(parts) > 1:
if isinstance(parts[1], unicode):
- url += '/' + urllib.quote_plus(parts[1].encode('utf-8'))
+ url += '/' + urllib.quote(parts[1].encode('utf-8'))
else:
url += '/' + parts[1]
@@ -168,10 +164,9 @@ class WbUrl(BaseWbUrl):
def __init__(self, orig_url):
super(WbUrl, self).__init__()
- was_uni = False
if isinstance(orig_url, unicode):
orig_url = orig_url.encode('utf-8')
- was_uni = True
+ orig_url = urllib.quote(orig_url)
self.original_url = orig_url
@@ -179,7 +174,7 @@ class WbUrl(BaseWbUrl):
if not self._init_replay(orig_url):
raise Exception('Invalid WbUrl: ', orig_url)
- self.url = WbUrl.to_uri(self.url, was_uni)
+ self.url = WbUrl.to_uri(self.url)
# protocol agnostic url -> http://
# no protocol -> http://
@@ -249,6 +244,18 @@ class WbUrl(BaseWbUrl):
self.url = new_url
return self.url
+ def get_url(self, url=None, percent_encode=False):
+ if url is not None:
+ url = WbUrl.to_uri(url)
+ else:
+ url = self.url
+
+ if percent_encode:
+ url = WbUrl.percent_encode_host(url)
+
+ return url
+
+
# Str Representation
# ====================
def to_str(self, **overrides):
@@ -256,9 +263,9 @@ class WbUrl(BaseWbUrl):
mod = overrides.get('mod', self.mod)
timestamp = overrides.get('timestamp', self.timestamp)
end_timestamp = overrides.get('end_timestamp', self.end_timestamp)
- url = overrides.get('url', self.url)
- if overrides.get('iri'):
- url = WbUrl.to_iri(url)
+
+ url = self.get_url(overrides.get('url'),
+ overrides.get('percent_encode', False))
return self.to_wburl_str(url=url,
type=type_,
diff --git a/pywb/static/wombat.js b/pywb/static/wombat.js
index 6a2827b9..3e93b112 100644
--- a/pywb/static/wombat.js
+++ b/pywb/static/wombat.js
@@ -323,7 +323,7 @@ _WBWombat = (function() {
return url + this._orig_loc.hash;
}
- href = parser.href;
+ href = parser.getAttribute("href");
var hash = parser.hash;
if (hash) {
diff --git a/pywb/webapp/replay_views.py b/pywb/webapp/replay_views.py
index 5fe22d26..2bdcf730 100644
--- a/pywb/webapp/replay_views.py
+++ b/pywb/webapp/replay_views.py
@@ -241,6 +241,7 @@ class ReplayView(object):
else:
statusline = '302 Internal Redirect'
+ new_url = new_url.encode('utf-8')
status_headers = StatusAndHeaders(statusline,
[('Location', new_url)])