1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Merge branch 'develop' for 0.8.2

This commit is contained in:
Ilya Kreymer 2015-02-28 09:05:09 -08:00
commit 1eadd35598
6 changed files with 41 additions and 25 deletions

View File

@ -1,3 +1,11 @@
pywb 0.8.2 changelist
~~~~~~~~~~~~~~~~~~~~~
* rewrite: fix for redirect loop related to pages with 'www.' prefix. Since canonicalization removes the prefix, treat redirect to 'www.' as self-redirect (for now).
* memento: ensure rel=memento url matches timegate redirect exactly (urls may differ due to canonicalization, use actual instead of requested for both)
pywb 0.8.1 changelist
~~~~~~~~~~~~~~~~~~~~~

View File

@ -1,4 +1,4 @@
PyWb 0.8.1
PyWb 0.8.2
==========
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master

View File

@ -74,10 +74,13 @@ class MementoRespMixin(object):
is_memento = (wbrequest.wb_url.type == wbrequest.wb_url.REPLAY)
link = []
req_url = wbrequest.wb_url.url
if is_memento or is_timegate:
url = req_url
if cdx:
ts = cdx['timestamp']
url = cdx['original']
# for top frame
elif wbrequest.wb_url.timestamp:
ts = wbrequest.wb_url.timestamp
@ -91,13 +94,14 @@ class MementoRespMixin(object):
self.status_headers.headers.append(('Memento-Datetime',
http_date))
canon_link = wbrequest.urlrewriter.get_new_url(mod='', timestamp=ts)
canon_link = wbrequest.urlrewriter.get_new_url(mod='',
timestamp=ts,
url=url)
link.append(self.make_memento_link(canon_link,
'memento',
http_date))
req_url = wbrequest.wb_url.url
if is_memento and is_timegate:
link.append(self.make_link(req_url, 'original timegate'))
else:

View File

@ -34,7 +34,7 @@ class CaptureException(WbException):
#=================================================================
class ReplayView(object):
STRIP_SCHEME = re.compile('^([\w]+:[/]*)?(.*?)$')
STRIP_SCHEME_WWW = re.compile('^([\w]+:[/]*(?:www[\d]*\.)?)?(.*?)$')
def __init__(self, content_loader, config):
self.content_loader = content_loader
@ -286,8 +286,8 @@ class ReplayView(object):
host = urlsplit(cdx['original']).netloc
location_url = host + location_url
if (ReplayView.strip_scheme(request_url) ==
ReplayView.strip_scheme(location_url)):
if (ReplayView.strip_scheme_www(request_url) ==
ReplayView.strip_scheme_www(location_url)):
raise CaptureException('Self Redirect: ' + str(cdx))
# TODO: reevaluate this, as it may reject valid refreshes of a page
@ -307,39 +307,43 @@ class ReplayView(object):
request_url = (wbrequest.host_prefix +
wbrequest.rel_prefix + str(wbrequest.wb_url))
if (ReplayView.strip_scheme(request_url) ==
ReplayView.strip_scheme(wbrequest.referrer)):
if (ReplayView.strip_scheme_www(request_url) ==
ReplayView.strip_scheme_www(wbrequest.referrer)):
raise CaptureException('Self Redirect via Referrer: ' +
str(wbrequest.wb_url))
@staticmethod
def strip_scheme(url):
def strip_scheme_www(url):
"""
>>> ReplayView.strip_scheme('https://example.com') ==\
ReplayView.strip_scheme('http://example.com')
>>> ReplayView.strip_scheme_www('https://example.com') ==\
ReplayView.strip_scheme_www('http://example.com')
True
>>> ReplayView.strip_scheme('https://example.com') ==\
ReplayView.strip_scheme('http:/example.com')
>>> ReplayView.strip_scheme_www('https://example.com') ==\
ReplayView.strip_scheme_www('http:/example.com')
True
>>> ReplayView.strip_scheme('https://example.com') ==\
ReplayView.strip_scheme('example.com')
>>> ReplayView.strip_scheme_www('https://example.com') ==\
ReplayView.strip_scheme_www('example.com')
True
>>> ReplayView.strip_scheme('about://example.com') ==\
ReplayView.strip_scheme('example.com')
>>> ReplayView.strip_scheme_www('https://example.com') ==\
ReplayView.strip_scheme_www('http://www2.example.com')
True
>>> ReplayView.strip_scheme('http://') ==\
ReplayView.strip_scheme('')
>>> ReplayView.strip_scheme_www('about://example.com') ==\
ReplayView.strip_scheme_www('example.com')
True
>>> ReplayView.strip_scheme('#!@?') ==\
ReplayView.strip_scheme('#!@?')
>>> ReplayView.strip_scheme_www('http://') ==\
ReplayView.strip_scheme_www('')
True
>>> ReplayView.strip_scheme_www('#!@?') ==\
ReplayView.strip_scheme_www('#!@?')
True
"""
m = ReplayView.STRIP_SCHEME.match(url)
m = ReplayView.STRIP_SCHEME_WWW.match(url)
match = m.group(2)
return match

View File

@ -189,7 +189,7 @@ class J2HtmlCapturesView(J2TemplateView):
#=================================================================
class MementoTimemapView(object):
def render_response(self, wbrequest, cdx_lines):
def render_response(self, wbrequest, cdx_lines, **kwargs):
memento_lines = make_timemap(wbrequest, cdx_lines)
return WbResponse.text_stream(memento_lines,
content_type=LINK_FORMAT)

View File

@ -34,7 +34,7 @@ class PyTest(TestCommand):
setup(
name='pywb',
version='0.8.1',
version='0.8.2',
url='https://github.com/ikreymer/pywb',
author='Ilya Kreymer',
author_email='ikreymer@gmail.com',