mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
wburl idn: more complete support for idn urls (#66)
add distinct to_iri() and to_uri() functions in WbUrl internal representation is always as ascii uri for rewriting, defaults to iri representation unless 'rewrite_ascii_only_urls' is set to true per collection add wbrequest.get_url() to get url as either iri or uri to be passed to templates
This commit is contained in:
parent
edff3f17fb
commit
695245d9e8
@ -105,6 +105,18 @@ class WbRequest(object):
|
|||||||
|
|
||||||
self._parse_extra()
|
self._parse_extra()
|
||||||
|
|
||||||
|
def get_url(self, url=None):
|
||||||
|
if not self.wb_url:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not url:
|
||||||
|
url = self.wb_url.url
|
||||||
|
|
||||||
|
if self.urlrewriter.rewrite_opts.get('rewrite_ascii_urls_only'):
|
||||||
|
return self.wb_url.url
|
||||||
|
else:
|
||||||
|
return self.wb_url.to_iri(url)
|
||||||
|
|
||||||
def _is_ajax(self):
|
def _is_ajax(self):
|
||||||
value = self.env.get('HTTP_X_REQUESTED_WITH')
|
value = self.env.get('HTTP_X_REQUESTED_WITH')
|
||||||
if value and value.lower() == 'xmlhttprequest':
|
if value and value.lower() == 'xmlhttprequest':
|
||||||
|
@ -4,94 +4,138 @@
|
|||||||
ur"""
|
ur"""
|
||||||
# Replay Urls
|
# Replay Urls
|
||||||
# ======================
|
# ======================
|
||||||
>>> repr(WbUrl('20131010000506/example.com'))
|
>>> repr_unicode(WbUrl('20131010000506/example.com'))
|
||||||
"('replay', '20131010000506', '', 'http://example.com', '20131010000506/http://example.com')"
|
('replay', '20131010000506', '', 'http://example.com', '20131010000506/http://example.com')
|
||||||
|
|
||||||
>>> repr(WbUrl('20130102im_/https://example.com'))
|
>>> repr_unicode(WbUrl('20130102im_/https://example.com'))
|
||||||
"('replay', '20130102', 'im_', 'https://example.com', '20130102im_/https://example.com')"
|
('replay', '20130102', 'im_', 'https://example.com', '20130102im_/https://example.com')
|
||||||
|
|
||||||
>>> repr(WbUrl('20130102im_/https:/example.com'))
|
>>> repr_unicode(WbUrl('20130102im_/https:/example.com'))
|
||||||
"('replay', '20130102', 'im_', 'https://example.com', '20130102im_/https://example.com')"
|
('replay', '20130102', 'im_', 'https://example.com', '20130102im_/https://example.com')
|
||||||
|
|
||||||
# Protocol agnostic convert to http
|
# Protocol agnostic convert to http
|
||||||
>>> repr(WbUrl('20130102im_///example.com'))
|
>>> repr_unicode(WbUrl('20130102im_///example.com'))
|
||||||
"('replay', '20130102', 'im_', 'http://example.com', '20130102im_/http://example.com')"
|
('replay', '20130102', 'im_', 'http://example.com', '20130102im_/http://example.com')
|
||||||
|
|
||||||
>>> repr(WbUrl('cs_/example.com'))
|
>>> repr_unicode(WbUrl('cs_/example.com'))
|
||||||
"('latest_replay', '', 'cs_', 'http://example.com', 'cs_/http://example.com')"
|
('latest_replay', '', 'cs_', 'http://example.com', 'cs_/http://example.com')
|
||||||
|
|
||||||
>>> repr(WbUrl('https://example.com/xyz'))
|
>>> repr_unicode(WbUrl('https://example.com/xyz'))
|
||||||
"('latest_replay', '', '', 'https://example.com/xyz', 'https://example.com/xyz')"
|
('latest_replay', '', '', 'https://example.com/xyz', 'https://example.com/xyz')
|
||||||
|
|
||||||
>>> repr(WbUrl('https:/example.com/xyz'))
|
>>> repr_unicode(WbUrl('https:/example.com/xyz'))
|
||||||
"('latest_replay', '', '', 'https://example.com/xyz', 'https://example.com/xyz')"
|
('latest_replay', '', '', 'https://example.com/xyz', 'https://example.com/xyz')
|
||||||
|
|
||||||
>>> repr(WbUrl('https://example.com/xyz?a=%2f&b=%2E'))
|
>>> repr_unicode(WbUrl('https://example.com/xyz?a=%2f&b=%2E'))
|
||||||
"('latest_replay', '', '', 'https://example.com/xyz?a=%2f&b=%2E', 'https://example.com/xyz?a=%2f&b=%2E')"
|
('latest_replay', '', '', 'https://example.com/xyz?a=/&b=.', 'https://example.com/xyz?a=/&b=.')
|
||||||
|
|
||||||
# Test scheme partially encoded urls
|
# Test scheme partially encoded urls
|
||||||
>>> repr(WbUrl('https%3A//example.com/'))
|
>>> repr_unicode(WbUrl('https%3A//example.com/'))
|
||||||
"('latest_replay', '', '', 'https://example.com/', 'https://example.com/')"
|
('latest_replay', '', '', 'https://example.com/', 'https://example.com/')
|
||||||
|
|
||||||
>>> repr(WbUrl('2014/http%3A%2F%2Fexample.com/'))
|
>>> repr_unicode(WbUrl('2014/http%3A%2F%2Fexample.com/'))
|
||||||
"('replay', '2014', '', 'http://example.com/', '2014/http://example.com/')"
|
('replay', '2014', '', 'http://example.com/', '2014/http://example.com/')
|
||||||
|
|
||||||
# Test IDNs
|
# Test IDNs
|
||||||
>>> repr(WbUrl(u'http://пример.испытание'))
|
|
||||||
"('latest_replay', '', '', 'http://xn--e1afmkfd.xn--80akhbyknj4f', 'http://xn--e1afmkfd.xn--80akhbyknj4f')"
|
|
||||||
|
|
||||||
>>> repr(WbUrl(u'https://пример.испытание/abc/'))
|
To IRI
|
||||||
"('latest_replay', '', '', 'https://xn--e1afmkfd.xn--80akhbyknj4f/abc/', 'https://xn--e1afmkfd.xn--80akhbyknj4f/abc/')"
|
>>> print(WbUrl.to_iri(u'https://пример.испытание'))
|
||||||
|
https://пример.испытание
|
||||||
|
|
||||||
>>> repr(WbUrl(u'//пример.испытание/abc/'))
|
>>> print(WbUrl.to_iri(u'пример.испытание'))
|
||||||
"('latest_replay', '', '', 'http://xn--e1afmkfd.xn--80akhbyknj4f/abc/', 'http://xn--e1afmkfd.xn--80akhbyknj4f/abc/')"
|
пример.испытание
|
||||||
|
|
||||||
>>> repr(WbUrl(u'2014id_/https://пример.испытание/abc'))
|
>>> print(WbUrl.to_iri('http://' + quote_plus(u'пример.испытание'.encode('utf-8'))))
|
||||||
"('replay', '2014', 'id_', 'https://xn--e1afmkfd.xn--80akhbyknj4f/abc', '2014id_/https://xn--e1afmkfd.xn--80akhbyknj4f/abc')"
|
http://пример.испытание
|
||||||
|
|
||||||
|
>>> print(WbUrl.to_iri(u'//пример.испытание/abc/испытание'))
|
||||||
|
//пример.испытание/abc/испытание
|
||||||
|
|
||||||
|
>>> print(WbUrl.to_iri(quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc/' + quote_plus(u'пример'.encode('utf-8'))))
|
||||||
|
пример.испытание/abc/пример
|
||||||
|
|
||||||
|
>>> print(WbUrl.to_iri('https://xn--e1afmkfd.xn--80akhbyknj4f'))
|
||||||
|
https://пример.испытание
|
||||||
|
|
||||||
|
|
||||||
|
To URI
|
||||||
|
>>> print(WbUrl.to_uri(u'https://пример.испытание'))
|
||||||
|
https://xn--e1afmkfd.xn--80akhbyknj4f
|
||||||
|
|
||||||
|
>>> print(WbUrl.to_uri(u'пример.испытание'))
|
||||||
|
xn--e1afmkfd.xn--80akhbyknj4f
|
||||||
|
|
||||||
|
>>> print(WbUrl.to_uri('http://' + quote_plus(u'пример.испытание'.encode('utf-8'))))
|
||||||
|
http://xn--e1afmkfd.xn--80akhbyknj4f
|
||||||
|
|
||||||
|
>>> print(WbUrl.to_uri(u'//пример.испытание/abc/испытание'))
|
||||||
|
//xn--e1afmkfd.xn--80akhbyknj4f/abc%2F%D0%B8%D1%81%D0%BF%D1%8B%D1%82%D0%B0%D0%BD%D0%B8%D0%B5
|
||||||
|
|
||||||
|
>>> print(WbUrl.to_uri('//' + quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc/' + quote_plus(u'пример'.encode('utf-8'))))
|
||||||
|
//xn--e1afmkfd.xn--80akhbyknj4f/abc/%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80
|
||||||
|
|
||||||
|
>>> print(WbUrl.to_uri('https://xn--e1afmkfd.xn--80akhbyknj4f/abc/'))
|
||||||
|
https://xn--e1afmkfd.xn--80akhbyknj4f/abc/
|
||||||
|
|
||||||
|
>>> print(WbUrl.to_uri('http://' + quote_plus(u'пример.испытание'.encode('utf-8'))[1:]))
|
||||||
|
http://xn--d0-olcluwd.xn--80akhbyknj4f
|
||||||
|
|
||||||
|
# IRI representation
|
||||||
|
>>> repr_unicode(WbUrl(u'http://пример.испытание'))
|
||||||
|
('latest_replay', '', '', 'http://пример.испытание', 'http://пример.испытание')
|
||||||
|
|
||||||
|
>>> repr_unicode(WbUrl(u'https://пример.испытание/abc/'))
|
||||||
|
('latest_replay', '', '', 'https://пример.испытание/abc/', 'https://пример.испытание/abc/')
|
||||||
|
|
||||||
|
>>> repr_unicode(WbUrl(u'//пример.испытание/abc/'))
|
||||||
|
('latest_replay', '', '', 'http://пример.испытание/abc/', 'http://пример.испытание/abc/')
|
||||||
|
|
||||||
|
>>> repr_unicode(WbUrl(u'2014id_/https://пример.испытание/abc'))
|
||||||
|
('replay', '2014', 'id_', 'https://пример.испытание/abc', '2014id_/https://пример.испытание/abc')
|
||||||
|
|
||||||
# percent-encoded form (as sent by browser usually)
|
# percent-encoded form (as sent by browser usually)
|
||||||
>>> repr(WbUrl('2014id_/http://' + quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc'))
|
>>> repr_unicode(WbUrl('2014id_/http://' + quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc'))
|
||||||
"('replay', '2014', 'id_', 'http://xn--e1afmkfd.xn--80akhbyknj4f/abc', '2014id_/http://xn--e1afmkfd.xn--80akhbyknj4f/abc')"
|
('replay', '2014', 'id_', 'http://пример.испытание/abc', '2014id_/http://пример.испытание/abc')
|
||||||
|
|
||||||
# percent-encoded form -- scheme relative
|
# percent-encoded form -- scheme relative
|
||||||
>>> repr(WbUrl('2014id_///' + quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc'))
|
>>> repr_unicode(WbUrl('2014id_///' + quote_plus(u'пример.испытание'.encode('utf-8')) + '/abc'))
|
||||||
"('replay', '2014', 'id_', 'http://xn--e1afmkfd.xn--80akhbyknj4f/abc', '2014id_/http://xn--e1afmkfd.xn--80akhbyknj4f/abc')"
|
('replay', '2014', 'id_', 'http://пример.испытание/abc', '2014id_/http://пример.испытание/abc')
|
||||||
|
|
||||||
# invalid: truncated and superfluous '%', ignore invalid (no exception)
|
# invalid: truncated and superfluous '%', ignore invalid (no exception)
|
||||||
>>> repr(WbUrl('2014id_/http://' + quote_plus(u'пример.испытание'.encode('utf-8'))[1:] + '%' + '/abc'))
|
>>> repr_unicode(WbUrl('2014id_/http://' + quote_plus(u'пример.испытание'.encode('utf-8'))[1:] + '%' + '/abc'))
|
||||||
"('replay', '2014', 'id_', 'http://xn--d0-olcluwd.xn--%-7sbpkb3ampk3g/abc', '2014id_/http://xn--d0-olcluwd.xn--%-7sbpkb3ampk3g/abc')"
|
('replay', '2014', 'id_', 'http://d0ример.испытание%/abc', '2014id_/http://d0ример.испытание%/abc')
|
||||||
|
|
||||||
|
|
||||||
# Query Urls
|
# Query Urls
|
||||||
# ======================
|
# ======================
|
||||||
>>> repr(WbUrl('*/http://example.com/abc?def=a'))
|
>>> repr_unicode(WbUrl('*/http://example.com/abc?def=a'))
|
||||||
"('query', '', '', 'http://example.com/abc?def=a', '*/http://example.com/abc?def=a')"
|
('query', '', '', 'http://example.com/abc?def=a', '*/http://example.com/abc?def=a')
|
||||||
|
|
||||||
>>> repr(WbUrl('*/http://example.com/abc?def=a*'))
|
>>> repr_unicode(WbUrl('*/http://example.com/abc?def=a*'))
|
||||||
"('url_query', '', '', 'http://example.com/abc?def=a', '*/http://example.com/abc?def=a*')"
|
('url_query', '', '', 'http://example.com/abc?def=a', '*/http://example.com/abc?def=a*')
|
||||||
|
|
||||||
>>> repr(WbUrl('2010*/http://example.com/abc?def=a'))
|
>>> repr_unicode(WbUrl('2010*/http://example.com/abc?def=a'))
|
||||||
"('query', '2010', '', 'http://example.com/abc?def=a', '2010*/http://example.com/abc?def=a')"
|
('query', '2010', '', 'http://example.com/abc?def=a', '2010*/http://example.com/abc?def=a')
|
||||||
|
|
||||||
# timestamp range query
|
# timestamp range query
|
||||||
>>> repr(WbUrl('2009-2015*/http://example.com/abc?def=a'))
|
>>> repr_unicode(WbUrl('2009-2015*/http://example.com/abc?def=a'))
|
||||||
"('query', '2009', '', 'http://example.com/abc?def=a', '2009-2015*/http://example.com/abc?def=a')"
|
('query', '2009', '', 'http://example.com/abc?def=a', '2009-2015*/http://example.com/abc?def=a')
|
||||||
|
|
||||||
>>> repr(WbUrl('json/*/http://example.com/abc?def=a'))
|
>>> repr_unicode(WbUrl('json/*/http://example.com/abc?def=a'))
|
||||||
"('query', '', 'json', 'http://example.com/abc?def=a', 'json/*/http://example.com/abc?def=a')"
|
('query', '', 'json', 'http://example.com/abc?def=a', 'json/*/http://example.com/abc?def=a')
|
||||||
|
|
||||||
>>> repr(WbUrl('timemap-link/2011*/http://example.com/abc?def=a'))
|
>>> repr_unicode(WbUrl('timemap-link/2011*/http://example.com/abc?def=a'))
|
||||||
"('query', '2011', 'timemap-link', 'http://example.com/abc?def=a', 'timemap-link/2011*/http://example.com/abc?def=a')"
|
('query', '2011', 'timemap-link', 'http://example.com/abc?def=a', 'timemap-link/2011*/http://example.com/abc?def=a')
|
||||||
|
|
||||||
# strip off repeated, likely scheme-agnostic, slashes altogether
|
# strip off repeated, likely scheme-agnostic, slashes altogether
|
||||||
>>> repr(WbUrl('///example.com'))
|
>>> repr_unicode(WbUrl('///example.com'))
|
||||||
"('latest_replay', '', '', 'http://example.com', 'http://example.com')"
|
('latest_replay', '', '', 'http://example.com', 'http://example.com')
|
||||||
|
|
||||||
>>> repr(WbUrl('//example.com/'))
|
>>> repr_unicode(WbUrl('//example.com/'))
|
||||||
"('latest_replay', '', '', 'http://example.com/', 'http://example.com/')"
|
('latest_replay', '', '', 'http://example.com/', 'http://example.com/')
|
||||||
|
|
||||||
>>> repr(WbUrl('/example.com/'))
|
>>> repr_unicode(WbUrl('/example.com/'))
|
||||||
"('latest_replay', '', '', 'http://example.com/', 'http://example.com/')"
|
('latest_replay', '', '', 'http://example.com/', 'http://example.com/')
|
||||||
|
|
||||||
# Is_ Tests
|
# Is_ Tests
|
||||||
>>> u = WbUrl('*/http://example.com/abc?def=a*')
|
>>> u = WbUrl('*/http://example.com/abc?def=a*')
|
||||||
@ -131,7 +175,20 @@ Exception: ('Invalid WbUrl: ', '')
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from pywb.rewrite.wburl import WbUrl
|
from pywb.rewrite.wburl import WbUrl
|
||||||
from urllib import quote_plus
|
from urllib import quote_plus, unquote_plus
|
||||||
|
|
||||||
|
from StringIO import StringIO
|
||||||
|
|
||||||
|
|
||||||
|
def repr_unicode(wburl):
|
||||||
|
buff = StringIO()
|
||||||
|
buff.write("('{0}', '{1}', '{2}', '".format(wburl.type, wburl.timestamp, wburl.mod))
|
||||||
|
buff.write(WbUrl.to_iri(wburl.url))
|
||||||
|
buff.write("', '")
|
||||||
|
buff.write(wburl.to_str(iri=True))
|
||||||
|
buff.write("')")
|
||||||
|
print(buff.getvalue())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import doctest
|
import doctest
|
||||||
|
@ -52,6 +52,9 @@ class UrlRewriter(object):
|
|||||||
is_abs = True
|
is_abs = True
|
||||||
url = 'http:' + url
|
url = 'http:' + url
|
||||||
|
|
||||||
|
# always convert any unicode urls to punycode
|
||||||
|
ascii_urls_only = self.rewrite_opts.get('rewrite_ascii_urls_only', False)
|
||||||
|
|
||||||
# Optimized rewriter for
|
# Optimized rewriter for
|
||||||
# -rel urls that don't start with / and
|
# -rel urls that don't start with / and
|
||||||
# do not contain ../ and no special mod
|
# do not contain ../ and no special mod
|
||||||
@ -68,7 +71,11 @@ class UrlRewriter(object):
|
|||||||
if mod is None:
|
if mod is None:
|
||||||
mod = wburl.mod
|
mod = wburl.mod
|
||||||
|
|
||||||
final_url = self.prefix + wburl.to_str(mod=mod, url=new_url)
|
final_url = self.prefix + wburl.to_str(mod=mod,
|
||||||
|
url=new_url,
|
||||||
|
iri=not ascii_urls_only)
|
||||||
|
if not ascii_urls_only:
|
||||||
|
final_url = final_url.encode('utf-8')
|
||||||
|
|
||||||
return final_url
|
return final_url
|
||||||
|
|
||||||
|
@ -90,6 +90,79 @@ class WbUrl(BaseWbUrl):
|
|||||||
#PARTIAL_ENC_RX = re.compile('(https?%3A)?(%2F%2F)?', re.I)
|
#PARTIAL_ENC_RX = re.compile('(https?%3A)?(%2F%2F)?', re.I)
|
||||||
FIRST_PATH = re.compile('(?<![:/])/(?![/])')
|
FIRST_PATH = re.compile('(?<![:/])/(?![/])')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def to_iri(url):
|
||||||
|
if isinstance(url, str):
|
||||||
|
url = urllib.unquote_plus(url)
|
||||||
|
url = url.decode('utf-8')
|
||||||
|
|
||||||
|
parts = WbUrl.FIRST_PATH.split(url, 1)
|
||||||
|
scheme_dom = parts[0]
|
||||||
|
|
||||||
|
#scheme_dom = urllib.unquote_plus(parts[0])
|
||||||
|
|
||||||
|
#if isinstance(scheme_dom, str):
|
||||||
|
# scheme_dom = scheme_dom.decode('utf-8', 'ignore')
|
||||||
|
|
||||||
|
scheme_dom = scheme_dom.rsplit(u'/', 1)
|
||||||
|
dom = scheme_dom[-1]
|
||||||
|
|
||||||
|
try:
|
||||||
|
dom = dom.decode('idna')
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if len(scheme_dom) > 1:
|
||||||
|
url = scheme_dom[0] + u'/' + dom
|
||||||
|
else:
|
||||||
|
url = dom
|
||||||
|
|
||||||
|
if len(parts) > 1:
|
||||||
|
url += u'/' + parts[1]
|
||||||
|
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def to_uri(url, was_uni=False):
|
||||||
|
#if not was_uni:
|
||||||
|
# if isinstance(url, unicode):
|
||||||
|
# was_uni = True
|
||||||
|
|
||||||
|
#if not was_uni and not '%' in url:
|
||||||
|
# return url
|
||||||
|
|
||||||
|
parts = WbUrl.FIRST_PATH.split(url, 1)
|
||||||
|
|
||||||
|
#if not was_uni and not '%' in parts[0]:
|
||||||
|
# return url
|
||||||
|
|
||||||
|
scheme_dom = urllib.unquote_plus(parts[0])
|
||||||
|
|
||||||
|
if isinstance(scheme_dom, str):
|
||||||
|
if scheme_dom == parts[0]:
|
||||||
|
return url
|
||||||
|
|
||||||
|
scheme_dom = scheme_dom.decode('utf-8', 'ignore')
|
||||||
|
|
||||||
|
scheme_dom = scheme_dom.rsplit('/', 1)
|
||||||
|
dom = scheme_dom[-1]
|
||||||
|
|
||||||
|
dom = dom.encode('idna')
|
||||||
|
|
||||||
|
if len(scheme_dom) > 1:
|
||||||
|
url = scheme_dom[0] + '/' + dom
|
||||||
|
else:
|
||||||
|
url = dom
|
||||||
|
|
||||||
|
if len(parts) > 1:
|
||||||
|
if isinstance(parts[1], unicode):
|
||||||
|
url += '/' + urllib.quote_plus(parts[1].encode('utf-8'))
|
||||||
|
else:
|
||||||
|
url += '/' + parts[1]
|
||||||
|
|
||||||
|
return url
|
||||||
|
|
||||||
# ======================
|
# ======================
|
||||||
|
|
||||||
def __init__(self, orig_url):
|
def __init__(self, orig_url):
|
||||||
@ -106,30 +179,7 @@ class WbUrl(BaseWbUrl):
|
|||||||
if not self._init_replay(orig_url):
|
if not self._init_replay(orig_url):
|
||||||
raise Exception('Invalid WbUrl: ', orig_url)
|
raise Exception('Invalid WbUrl: ', orig_url)
|
||||||
|
|
||||||
if was_uni or '%' in self.url:
|
self.url = WbUrl.to_uri(self.url, was_uni)
|
||||||
parts = self.FIRST_PATH.split(self.url, 1)
|
|
||||||
|
|
||||||
if was_uni or '%' in parts[0]:
|
|
||||||
if not was_uni:
|
|
||||||
scheme_dom = urllib.unquote_plus(parts[0])
|
|
||||||
else:
|
|
||||||
scheme_dom = parts[0]
|
|
||||||
|
|
||||||
scheme_dom = scheme_dom.rsplit('/', 1)
|
|
||||||
|
|
||||||
dom = scheme_dom[-1]
|
|
||||||
|
|
||||||
dom = dom.decode('utf-8', 'ignore')
|
|
||||||
dom = dom.encode('idna')
|
|
||||||
|
|
||||||
if len(scheme_dom) > 1:
|
|
||||||
self.url = scheme_dom[0] + '/' + dom
|
|
||||||
else:
|
|
||||||
self.url = dom
|
|
||||||
|
|
||||||
if len(parts) > 1:
|
|
||||||
self.url += '/' + parts[1]
|
|
||||||
|
|
||||||
|
|
||||||
# protocol agnostic url -> http://
|
# protocol agnostic url -> http://
|
||||||
# no protocol -> http://
|
# no protocol -> http://
|
||||||
@ -208,6 +258,8 @@ class WbUrl(BaseWbUrl):
|
|||||||
timestamp = overrides.get('timestamp', self.timestamp)
|
timestamp = overrides.get('timestamp', self.timestamp)
|
||||||
end_timestamp = overrides.get('end_timestamp', self.end_timestamp)
|
end_timestamp = overrides.get('end_timestamp', self.end_timestamp)
|
||||||
url = overrides.get('url', self.url)
|
url = overrides.get('url', self.url)
|
||||||
|
if overrides.get('iri'):
|
||||||
|
url = WbUrl.to_iri(url)
|
||||||
|
|
||||||
return self.to_wburl_str(url=url,
|
return self.to_wburl_str(url=url,
|
||||||
type=type_,
|
type=type_,
|
||||||
|
@ -132,9 +132,12 @@ this.load = function() {
|
|||||||
var hash = window.location.hash;
|
var hash = window.location.hash;
|
||||||
|
|
||||||
var loc = window.location.href.replace(window.location.hash, "");
|
var loc = window.location.href.replace(window.location.hash, "");
|
||||||
|
loc = decodeURI(loc);
|
||||||
|
|
||||||
if (wbinfo.top_url && (loc != wbinfo.top_url) && wbinfo.mod != "bn_") {
|
if (wbinfo.top_url && (loc != wbinfo.top_url) && wbinfo.mod != "bn_") {
|
||||||
// Auto-redirect to top frame
|
// Auto-redirect to top frame
|
||||||
|
console.log(wbinfo.top_url);
|
||||||
|
console.log(loc);
|
||||||
window.location.replace(wbinfo.top_url + hash);
|
window.location.replace(wbinfo.top_url + hash);
|
||||||
} else {
|
} else {
|
||||||
// Init Banner (no frame or top frame)
|
// Init Banner (no frame or top frame)
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
{% if rule.js_rewrite_location != 'urls' and include_wombat %}
|
{% if rule.js_rewrite_location != 'urls' and include_wombat %}
|
||||||
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wombat.js'> </script>
|
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wombat.js'> </script>
|
||||||
<script>
|
<script>
|
||||||
{% set urlsplit = cdx.original | urlsplit %}
|
{% set urlsplit = cdx.url | urlsplit %}
|
||||||
if (window && window._WBWombat && !window._wb_js_inited) {
|
if (window && window._WBWombat && !window._wb_js_inited) {
|
||||||
var _wb_wombat = new _WBWombat("{{ wbrequest.wb_prefix}}",
|
var _wb_wombat = new _WBWombat("{{ wbrequest.wb_prefix}}",
|
||||||
"{{ cdx['timestamp'] if include_ts else ''}}",
|
"{{ cdx['timestamp'] if include_ts else ''}}",
|
||||||
@ -15,7 +15,7 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
<script>
|
<script>
|
||||||
wbinfo = {}
|
wbinfo = {}
|
||||||
wbinfo.url = "{{ cdx.original }}";
|
wbinfo.url = "{{ cdx.url }}";
|
||||||
wbinfo.timestamp = "{{ cdx.timestamp }}";
|
wbinfo.timestamp = "{{ cdx.timestamp }}";
|
||||||
wbinfo.prefix = "{{ wbrequest.wb_prefix }}";
|
wbinfo.prefix = "{{ wbrequest.wb_prefix }}";
|
||||||
wbinfo.mod = "{{ wbrequest.wb_url.mod }}";
|
wbinfo.mod = "{{ wbrequest.wb_url.mod }}";
|
||||||
|
@ -6,14 +6,14 @@ function ts_to_date(ts, is_gmt)
|
|||||||
if (ts.length < 14) {
|
if (ts.length < 14) {
|
||||||
return ts;
|
return ts;
|
||||||
}
|
}
|
||||||
|
|
||||||
var datestr = (ts.substring(0, 4) + "-" +
|
var datestr = (ts.substring(0, 4) + "-" +
|
||||||
ts.substring(4, 6) + "-" +
|
ts.substring(4, 6) + "-" +
|
||||||
ts.substring(6, 8) + "T" +
|
ts.substring(6, 8) + "T" +
|
||||||
ts.substring(8, 10) + ":" +
|
ts.substring(8, 10) + ":" +
|
||||||
ts.substring(10, 12) + ":" +
|
ts.substring(10, 12) + ":" +
|
||||||
ts.substring(12, 14) + "-00:00");
|
ts.substring(12, 14) + "-00:00");
|
||||||
|
|
||||||
var date = new Date(datestr);
|
var date = new Date(datestr);
|
||||||
if (is_gmt) {
|
if (is_gmt) {
|
||||||
return date.toGMTString();
|
return date.toGMTString();
|
||||||
@ -36,12 +36,12 @@ function ts_to_date(ts, is_gmt)
|
|||||||
</tr>
|
</tr>
|
||||||
{% for cdx in cdx_lines %}
|
{% for cdx in cdx_lines %}
|
||||||
<tr style="{{ 'font-weight: bold' if cdx['mimetype'] != 'warc/revisit' else '' }}">
|
<tr style="{{ 'font-weight: bold' if cdx['mimetype'] != 'warc/revisit' else '' }}">
|
||||||
<td><a href="{{ prefix }}{{ cdx.timestamp }}/{{ cdx.original }}">
|
<td><a href="{{ prefix }}{{ cdx.timestamp }}/{{ cdx.url }}">
|
||||||
<script>document.write(ts_to_date("{{ cdx['timestamp']}}", true))</script>
|
<script>document.write(ts_to_date("{{ cdx['timestamp']}}", true))</script>
|
||||||
</a></td>
|
</a></td>
|
||||||
<td>{{ cdx['statuscode'] }}</td>
|
<td>{{ cdx.statuscode }}</td>
|
||||||
<td>{{ cdx['original'] }}</td>
|
<td>{{ cdx.url }}</td>
|
||||||
<td>{{ cdx['filename'] }}</td>
|
<td>{{ cdx.filename }}</td>
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</table>
|
</table>
|
||||||
|
@ -72,7 +72,8 @@ class SearchPageWbUrlHandler(WbUrlHandler):
|
|||||||
return self.handle_request(wbrequest)
|
return self.handle_request(wbrequest)
|
||||||
|
|
||||||
def get_top_frame_params(self, wbrequest, mod=''):
|
def get_top_frame_params(self, wbrequest, mod=''):
|
||||||
embed_url = wbrequest.wb_url.to_str(mod=mod)
|
embed_url = wbrequest.wb_url.to_str(mod=mod, url='')
|
||||||
|
embed_url += wbrequest.get_url()
|
||||||
|
|
||||||
if wbrequest.wb_url.timestamp:
|
if wbrequest.wb_url.timestamp:
|
||||||
timestamp = wbrequest.wb_url.timestamp
|
timestamp = wbrequest.wb_url.timestamp
|
||||||
@ -82,7 +83,7 @@ class SearchPageWbUrlHandler(WbUrlHandler):
|
|||||||
params = dict(embed_url=embed_url,
|
params = dict(embed_url=embed_url,
|
||||||
wbrequest=wbrequest,
|
wbrequest=wbrequest,
|
||||||
timestamp=timestamp,
|
timestamp=timestamp,
|
||||||
url=wbrequest.wb_url.url,
|
url=wbrequest.get_url(),
|
||||||
banner_html=self.banner_html)
|
banner_html=self.banner_html)
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
@ -3,6 +3,7 @@ from pywb.framework.wbrequestresponse import WbResponse
|
|||||||
from pywb.framework.memento import make_timemap, LINK_FORMAT
|
from pywb.framework.memento import make_timemap, LINK_FORMAT
|
||||||
|
|
||||||
import urlparse
|
import urlparse
|
||||||
|
import urllib
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from os import path
|
from os import path
|
||||||
@ -128,12 +129,16 @@ class HeadInsertView(J2TemplateView):
|
|||||||
def create_insert_func(self, wbrequest,
|
def create_insert_func(self, wbrequest,
|
||||||
include_ts=True):
|
include_ts=True):
|
||||||
|
|
||||||
|
url = wbrequest.get_url()
|
||||||
|
|
||||||
top_url = wbrequest.wb_prefix
|
top_url = wbrequest.wb_prefix
|
||||||
top_url += wbrequest.wb_url.to_str(mod=wbrequest.final_mod)
|
top_url += wbrequest.wb_url.to_str(mod=wbrequest.final_mod, url='')
|
||||||
|
top_url += url
|
||||||
|
|
||||||
include_wombat = not wbrequest.wb_url.is_banner_only
|
include_wombat = not wbrequest.wb_url.is_banner_only
|
||||||
|
|
||||||
def make_head_insert(rule, cdx):
|
def make_head_insert(rule, cdx):
|
||||||
|
cdx['url'] = url
|
||||||
return (self.render_to_string(wbrequest=wbrequest,
|
return (self.render_to_string(wbrequest=wbrequest,
|
||||||
cdx=cdx,
|
cdx=cdx,
|
||||||
top_url=top_url,
|
top_url=top_url,
|
||||||
@ -165,9 +170,14 @@ class HeadInsertView(J2TemplateView):
|
|||||||
#=================================================================
|
#=================================================================
|
||||||
class J2HtmlCapturesView(J2TemplateView):
|
class J2HtmlCapturesView(J2TemplateView):
|
||||||
def render_response(self, wbrequest, cdx_lines, **kwargs):
|
def render_response(self, wbrequest, cdx_lines, **kwargs):
|
||||||
|
def format_cdx_lines():
|
||||||
|
for cdx in cdx_lines:
|
||||||
|
cdx['url'] = wbrequest.get_url(url=cdx['original'])
|
||||||
|
yield cdx
|
||||||
|
|
||||||
return J2TemplateView.render_response(self,
|
return J2TemplateView.render_response(self,
|
||||||
cdx_lines=list(cdx_lines),
|
cdx_lines=list(format_cdx_lines()),
|
||||||
url=wbrequest.wb_url.url,
|
url=wbrequest.get_url(),
|
||||||
type=wbrequest.wb_url.type,
|
type=wbrequest.wb_url.type,
|
||||||
prefix=wbrequest.wb_prefix,
|
prefix=wbrequest.wb_prefix,
|
||||||
**kwargs)
|
**kwargs)
|
||||||
|
@ -97,7 +97,7 @@ class TestWb:
|
|||||||
resp = self.testapp.get('/pywb/20140127171238tf_/http://www.iana.org/')
|
resp = self.testapp.get('/pywb/20140127171238tf_/http://www.iana.org/')
|
||||||
|
|
||||||
assert '<iframe ' in resp.body
|
assert '<iframe ' in resp.body
|
||||||
assert '/pywb/20140127171238/http://www.iana.org/' in resp.body
|
assert '/pywb/20140127171238/http://www.iana.org/' in resp.body, resp.body
|
||||||
|
|
||||||
def test_replay_content(self):
|
def test_replay_content(self):
|
||||||
resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/')
|
resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user