mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Merge branch 'develop' for 0.8.2
This commit is contained in:
commit
1eadd35598
@ -1,3 +1,11 @@
|
||||
pywb 0.8.2 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* rewrite: fix for redirect loop related to pages with 'www.' prefix. Since canonicalization removes the prefix, treat redirect to 'www.' as self-redirect (for now).
|
||||
|
||||
* memento: ensure rel=memento url matches timegate redirect exactly (urls may differ due to canonicalization, use actual instead of requested for both)
|
||||
|
||||
|
||||
pywb 0.8.1 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
PyWb 0.8.1
|
||||
PyWb 0.8.2
|
||||
==========
|
||||
|
||||
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master
|
||||
|
@ -74,10 +74,13 @@ class MementoRespMixin(object):
|
||||
is_memento = (wbrequest.wb_url.type == wbrequest.wb_url.REPLAY)
|
||||
|
||||
link = []
|
||||
req_url = wbrequest.wb_url.url
|
||||
|
||||
if is_memento or is_timegate:
|
||||
url = req_url
|
||||
if cdx:
|
||||
ts = cdx['timestamp']
|
||||
url = cdx['original']
|
||||
# for top frame
|
||||
elif wbrequest.wb_url.timestamp:
|
||||
ts = wbrequest.wb_url.timestamp
|
||||
@ -91,13 +94,14 @@ class MementoRespMixin(object):
|
||||
self.status_headers.headers.append(('Memento-Datetime',
|
||||
http_date))
|
||||
|
||||
canon_link = wbrequest.urlrewriter.get_new_url(mod='', timestamp=ts)
|
||||
canon_link = wbrequest.urlrewriter.get_new_url(mod='',
|
||||
timestamp=ts,
|
||||
url=url)
|
||||
|
||||
link.append(self.make_memento_link(canon_link,
|
||||
'memento',
|
||||
http_date))
|
||||
|
||||
req_url = wbrequest.wb_url.url
|
||||
|
||||
if is_memento and is_timegate:
|
||||
link.append(self.make_link(req_url, 'original timegate'))
|
||||
else:
|
||||
|
@ -34,7 +34,7 @@ class CaptureException(WbException):
|
||||
|
||||
#=================================================================
|
||||
class ReplayView(object):
|
||||
STRIP_SCHEME = re.compile('^([\w]+:[/]*)?(.*?)$')
|
||||
STRIP_SCHEME_WWW = re.compile('^([\w]+:[/]*(?:www[\d]*\.)?)?(.*?)$')
|
||||
|
||||
def __init__(self, content_loader, config):
|
||||
self.content_loader = content_loader
|
||||
@ -286,8 +286,8 @@ class ReplayView(object):
|
||||
host = urlsplit(cdx['original']).netloc
|
||||
location_url = host + location_url
|
||||
|
||||
if (ReplayView.strip_scheme(request_url) ==
|
||||
ReplayView.strip_scheme(location_url)):
|
||||
if (ReplayView.strip_scheme_www(request_url) ==
|
||||
ReplayView.strip_scheme_www(location_url)):
|
||||
raise CaptureException('Self Redirect: ' + str(cdx))
|
||||
|
||||
# TODO: reevaluate this, as it may reject valid refreshes of a page
|
||||
@ -307,39 +307,43 @@ class ReplayView(object):
|
||||
request_url = (wbrequest.host_prefix +
|
||||
wbrequest.rel_prefix + str(wbrequest.wb_url))
|
||||
|
||||
if (ReplayView.strip_scheme(request_url) ==
|
||||
ReplayView.strip_scheme(wbrequest.referrer)):
|
||||
if (ReplayView.strip_scheme_www(request_url) ==
|
||||
ReplayView.strip_scheme_www(wbrequest.referrer)):
|
||||
raise CaptureException('Self Redirect via Referrer: ' +
|
||||
str(wbrequest.wb_url))
|
||||
|
||||
@staticmethod
|
||||
def strip_scheme(url):
|
||||
def strip_scheme_www(url):
|
||||
"""
|
||||
>>> ReplayView.strip_scheme('https://example.com') ==\
|
||||
ReplayView.strip_scheme('http://example.com')
|
||||
>>> ReplayView.strip_scheme_www('https://example.com') ==\
|
||||
ReplayView.strip_scheme_www('http://example.com')
|
||||
True
|
||||
|
||||
>>> ReplayView.strip_scheme('https://example.com') ==\
|
||||
ReplayView.strip_scheme('http:/example.com')
|
||||
>>> ReplayView.strip_scheme_www('https://example.com') ==\
|
||||
ReplayView.strip_scheme_www('http:/example.com')
|
||||
True
|
||||
|
||||
>>> ReplayView.strip_scheme('https://example.com') ==\
|
||||
ReplayView.strip_scheme('example.com')
|
||||
>>> ReplayView.strip_scheme_www('https://example.com') ==\
|
||||
ReplayView.strip_scheme_www('example.com')
|
||||
True
|
||||
|
||||
>>> ReplayView.strip_scheme('about://example.com') ==\
|
||||
ReplayView.strip_scheme('example.com')
|
||||
>>> ReplayView.strip_scheme_www('https://example.com') ==\
|
||||
ReplayView.strip_scheme_www('http://www2.example.com')
|
||||
True
|
||||
|
||||
>>> ReplayView.strip_scheme('http://') ==\
|
||||
ReplayView.strip_scheme('')
|
||||
>>> ReplayView.strip_scheme_www('about://example.com') ==\
|
||||
ReplayView.strip_scheme_www('example.com')
|
||||
True
|
||||
|
||||
>>> ReplayView.strip_scheme('#!@?') ==\
|
||||
ReplayView.strip_scheme('#!@?')
|
||||
>>> ReplayView.strip_scheme_www('http://') ==\
|
||||
ReplayView.strip_scheme_www('')
|
||||
True
|
||||
|
||||
>>> ReplayView.strip_scheme_www('#!@?') ==\
|
||||
ReplayView.strip_scheme_www('#!@?')
|
||||
True
|
||||
"""
|
||||
m = ReplayView.STRIP_SCHEME.match(url)
|
||||
m = ReplayView.STRIP_SCHEME_WWW.match(url)
|
||||
match = m.group(2)
|
||||
return match
|
||||
|
||||
|
@ -189,7 +189,7 @@ class J2HtmlCapturesView(J2TemplateView):
|
||||
|
||||
#=================================================================
|
||||
class MementoTimemapView(object):
|
||||
def render_response(self, wbrequest, cdx_lines):
|
||||
def render_response(self, wbrequest, cdx_lines, **kwargs):
|
||||
memento_lines = make_timemap(wbrequest, cdx_lines)
|
||||
return WbResponse.text_stream(memento_lines,
|
||||
content_type=LINK_FORMAT)
|
||||
|
Loading…
x
Reference in New Issue
Block a user