1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-28 00:25:21 +01:00

Merge branch 'develop' for 0.8.2

This commit is contained in:
Ilya Kreymer 2015-02-28 09:05:09 -08:00
commit 1eadd35598
6 changed files with 41 additions and 25 deletions

View File

@ -1,3 +1,11 @@
pywb 0.8.2 changelist
~~~~~~~~~~~~~~~~~~~~~
* rewrite: fix for redirect loop related to pages with 'www.' prefix. Since canonicalization removes the prefix, treat redirect to 'www.' as self-redirect (for now).
* memento: ensure rel=memento url matches timegate redirect exactly (urls may differ due to canonicalization, use actual instead of requested for both)
pywb 0.8.1 changelist pywb 0.8.1 changelist
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~

View File

@ -1,4 +1,4 @@
PyWb 0.8.1 PyWb 0.8.2
========== ==========
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master .. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master

View File

@ -74,10 +74,13 @@ class MementoRespMixin(object):
is_memento = (wbrequest.wb_url.type == wbrequest.wb_url.REPLAY) is_memento = (wbrequest.wb_url.type == wbrequest.wb_url.REPLAY)
link = [] link = []
req_url = wbrequest.wb_url.url
if is_memento or is_timegate: if is_memento or is_timegate:
url = req_url
if cdx: if cdx:
ts = cdx['timestamp'] ts = cdx['timestamp']
url = cdx['original']
# for top frame # for top frame
elif wbrequest.wb_url.timestamp: elif wbrequest.wb_url.timestamp:
ts = wbrequest.wb_url.timestamp ts = wbrequest.wb_url.timestamp
@ -91,13 +94,14 @@ class MementoRespMixin(object):
self.status_headers.headers.append(('Memento-Datetime', self.status_headers.headers.append(('Memento-Datetime',
http_date)) http_date))
canon_link = wbrequest.urlrewriter.get_new_url(mod='', timestamp=ts) canon_link = wbrequest.urlrewriter.get_new_url(mod='',
timestamp=ts,
url=url)
link.append(self.make_memento_link(canon_link, link.append(self.make_memento_link(canon_link,
'memento', 'memento',
http_date)) http_date))
req_url = wbrequest.wb_url.url
if is_memento and is_timegate: if is_memento and is_timegate:
link.append(self.make_link(req_url, 'original timegate')) link.append(self.make_link(req_url, 'original timegate'))
else: else:

View File

@ -34,7 +34,7 @@ class CaptureException(WbException):
#================================================================= #=================================================================
class ReplayView(object): class ReplayView(object):
STRIP_SCHEME = re.compile('^([\w]+:[/]*)?(.*?)$') STRIP_SCHEME_WWW = re.compile('^([\w]+:[/]*(?:www[\d]*\.)?)?(.*?)$')
def __init__(self, content_loader, config): def __init__(self, content_loader, config):
self.content_loader = content_loader self.content_loader = content_loader
@ -286,8 +286,8 @@ class ReplayView(object):
host = urlsplit(cdx['original']).netloc host = urlsplit(cdx['original']).netloc
location_url = host + location_url location_url = host + location_url
if (ReplayView.strip_scheme(request_url) == if (ReplayView.strip_scheme_www(request_url) ==
ReplayView.strip_scheme(location_url)): ReplayView.strip_scheme_www(location_url)):
raise CaptureException('Self Redirect: ' + str(cdx)) raise CaptureException('Self Redirect: ' + str(cdx))
# TODO: reevaluate this, as it may reject valid refreshes of a page # TODO: reevaluate this, as it may reject valid refreshes of a page
@ -307,39 +307,43 @@ class ReplayView(object):
request_url = (wbrequest.host_prefix + request_url = (wbrequest.host_prefix +
wbrequest.rel_prefix + str(wbrequest.wb_url)) wbrequest.rel_prefix + str(wbrequest.wb_url))
if (ReplayView.strip_scheme(request_url) == if (ReplayView.strip_scheme_www(request_url) ==
ReplayView.strip_scheme(wbrequest.referrer)): ReplayView.strip_scheme_www(wbrequest.referrer)):
raise CaptureException('Self Redirect via Referrer: ' + raise CaptureException('Self Redirect via Referrer: ' +
str(wbrequest.wb_url)) str(wbrequest.wb_url))
@staticmethod @staticmethod
def strip_scheme(url): def strip_scheme_www(url):
""" """
>>> ReplayView.strip_scheme('https://example.com') ==\ >>> ReplayView.strip_scheme_www('https://example.com') ==\
ReplayView.strip_scheme('http://example.com') ReplayView.strip_scheme_www('http://example.com')
True True
>>> ReplayView.strip_scheme('https://example.com') ==\ >>> ReplayView.strip_scheme_www('https://example.com') ==\
ReplayView.strip_scheme('http:/example.com') ReplayView.strip_scheme_www('http:/example.com')
True True
>>> ReplayView.strip_scheme('https://example.com') ==\ >>> ReplayView.strip_scheme_www('https://example.com') ==\
ReplayView.strip_scheme('example.com') ReplayView.strip_scheme_www('example.com')
True True
>>> ReplayView.strip_scheme('about://example.com') ==\ >>> ReplayView.strip_scheme_www('https://example.com') ==\
ReplayView.strip_scheme('example.com') ReplayView.strip_scheme_www('http://www2.example.com')
True True
>>> ReplayView.strip_scheme('http://') ==\ >>> ReplayView.strip_scheme_www('about://example.com') ==\
ReplayView.strip_scheme('') ReplayView.strip_scheme_www('example.com')
True True
>>> ReplayView.strip_scheme('#!@?') ==\ >>> ReplayView.strip_scheme_www('http://') ==\
ReplayView.strip_scheme('#!@?') ReplayView.strip_scheme_www('')
True
>>> ReplayView.strip_scheme_www('#!@?') ==\
ReplayView.strip_scheme_www('#!@?')
True True
""" """
m = ReplayView.STRIP_SCHEME.match(url) m = ReplayView.STRIP_SCHEME_WWW.match(url)
match = m.group(2) match = m.group(2)
return match return match

View File

@ -189,7 +189,7 @@ class J2HtmlCapturesView(J2TemplateView):
#================================================================= #=================================================================
class MementoTimemapView(object): class MementoTimemapView(object):
def render_response(self, wbrequest, cdx_lines): def render_response(self, wbrequest, cdx_lines, **kwargs):
memento_lines = make_timemap(wbrequest, cdx_lines) memento_lines = make_timemap(wbrequest, cdx_lines)
return WbResponse.text_stream(memento_lines, return WbResponse.text_stream(memento_lines,
content_type=LINK_FORMAT) content_type=LINK_FORMAT)

View File

@ -34,7 +34,7 @@ class PyTest(TestCommand):
setup( setup(
name='pywb', name='pywb',
version='0.8.1', version='0.8.2',
url='https://github.com/ikreymer/pywb', url='https://github.com/ikreymer/pywb',
author='Ilya Kreymer', author='Ilya Kreymer',
author_email='ikreymer@gmail.com', author_email='ikreymer@gmail.com',