mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-28 00:25:21 +01:00
Merge branch 'develop' for 0.8.2
This commit is contained in:
commit
1eadd35598
@ -1,3 +1,11 @@
|
|||||||
|
pywb 0.8.2 changelist
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
* rewrite: fix for redirect loop related to pages with 'www.' prefix. Since canonicalization removes the prefix, treat redirect to 'www.' as self-redirect (for now).
|
||||||
|
|
||||||
|
* memento: ensure rel=memento url matches timegate redirect exactly (urls may differ due to canonicalization, use actual instead of requested for both)
|
||||||
|
|
||||||
|
|
||||||
pywb 0.8.1 changelist
|
pywb 0.8.1 changelist
|
||||||
~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
PyWb 0.8.1
|
PyWb 0.8.2
|
||||||
==========
|
==========
|
||||||
|
|
||||||
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master
|
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master
|
||||||
|
@ -74,10 +74,13 @@ class MementoRespMixin(object):
|
|||||||
is_memento = (wbrequest.wb_url.type == wbrequest.wb_url.REPLAY)
|
is_memento = (wbrequest.wb_url.type == wbrequest.wb_url.REPLAY)
|
||||||
|
|
||||||
link = []
|
link = []
|
||||||
|
req_url = wbrequest.wb_url.url
|
||||||
|
|
||||||
if is_memento or is_timegate:
|
if is_memento or is_timegate:
|
||||||
|
url = req_url
|
||||||
if cdx:
|
if cdx:
|
||||||
ts = cdx['timestamp']
|
ts = cdx['timestamp']
|
||||||
|
url = cdx['original']
|
||||||
# for top frame
|
# for top frame
|
||||||
elif wbrequest.wb_url.timestamp:
|
elif wbrequest.wb_url.timestamp:
|
||||||
ts = wbrequest.wb_url.timestamp
|
ts = wbrequest.wb_url.timestamp
|
||||||
@ -91,13 +94,14 @@ class MementoRespMixin(object):
|
|||||||
self.status_headers.headers.append(('Memento-Datetime',
|
self.status_headers.headers.append(('Memento-Datetime',
|
||||||
http_date))
|
http_date))
|
||||||
|
|
||||||
canon_link = wbrequest.urlrewriter.get_new_url(mod='', timestamp=ts)
|
canon_link = wbrequest.urlrewriter.get_new_url(mod='',
|
||||||
|
timestamp=ts,
|
||||||
|
url=url)
|
||||||
|
|
||||||
link.append(self.make_memento_link(canon_link,
|
link.append(self.make_memento_link(canon_link,
|
||||||
'memento',
|
'memento',
|
||||||
http_date))
|
http_date))
|
||||||
|
|
||||||
req_url = wbrequest.wb_url.url
|
|
||||||
|
|
||||||
if is_memento and is_timegate:
|
if is_memento and is_timegate:
|
||||||
link.append(self.make_link(req_url, 'original timegate'))
|
link.append(self.make_link(req_url, 'original timegate'))
|
||||||
else:
|
else:
|
||||||
|
@ -34,7 +34,7 @@ class CaptureException(WbException):
|
|||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class ReplayView(object):
|
class ReplayView(object):
|
||||||
STRIP_SCHEME = re.compile('^([\w]+:[/]*)?(.*?)$')
|
STRIP_SCHEME_WWW = re.compile('^([\w]+:[/]*(?:www[\d]*\.)?)?(.*?)$')
|
||||||
|
|
||||||
def __init__(self, content_loader, config):
|
def __init__(self, content_loader, config):
|
||||||
self.content_loader = content_loader
|
self.content_loader = content_loader
|
||||||
@ -286,8 +286,8 @@ class ReplayView(object):
|
|||||||
host = urlsplit(cdx['original']).netloc
|
host = urlsplit(cdx['original']).netloc
|
||||||
location_url = host + location_url
|
location_url = host + location_url
|
||||||
|
|
||||||
if (ReplayView.strip_scheme(request_url) ==
|
if (ReplayView.strip_scheme_www(request_url) ==
|
||||||
ReplayView.strip_scheme(location_url)):
|
ReplayView.strip_scheme_www(location_url)):
|
||||||
raise CaptureException('Self Redirect: ' + str(cdx))
|
raise CaptureException('Self Redirect: ' + str(cdx))
|
||||||
|
|
||||||
# TODO: reevaluate this, as it may reject valid refreshes of a page
|
# TODO: reevaluate this, as it may reject valid refreshes of a page
|
||||||
@ -307,39 +307,43 @@ class ReplayView(object):
|
|||||||
request_url = (wbrequest.host_prefix +
|
request_url = (wbrequest.host_prefix +
|
||||||
wbrequest.rel_prefix + str(wbrequest.wb_url))
|
wbrequest.rel_prefix + str(wbrequest.wb_url))
|
||||||
|
|
||||||
if (ReplayView.strip_scheme(request_url) ==
|
if (ReplayView.strip_scheme_www(request_url) ==
|
||||||
ReplayView.strip_scheme(wbrequest.referrer)):
|
ReplayView.strip_scheme_www(wbrequest.referrer)):
|
||||||
raise CaptureException('Self Redirect via Referrer: ' +
|
raise CaptureException('Self Redirect via Referrer: ' +
|
||||||
str(wbrequest.wb_url))
|
str(wbrequest.wb_url))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def strip_scheme(url):
|
def strip_scheme_www(url):
|
||||||
"""
|
"""
|
||||||
>>> ReplayView.strip_scheme('https://example.com') ==\
|
>>> ReplayView.strip_scheme_www('https://example.com') ==\
|
||||||
ReplayView.strip_scheme('http://example.com')
|
ReplayView.strip_scheme_www('http://example.com')
|
||||||
True
|
True
|
||||||
|
|
||||||
>>> ReplayView.strip_scheme('https://example.com') ==\
|
>>> ReplayView.strip_scheme_www('https://example.com') ==\
|
||||||
ReplayView.strip_scheme('http:/example.com')
|
ReplayView.strip_scheme_www('http:/example.com')
|
||||||
True
|
True
|
||||||
|
|
||||||
>>> ReplayView.strip_scheme('https://example.com') ==\
|
>>> ReplayView.strip_scheme_www('https://example.com') ==\
|
||||||
ReplayView.strip_scheme('example.com')
|
ReplayView.strip_scheme_www('example.com')
|
||||||
True
|
True
|
||||||
|
|
||||||
>>> ReplayView.strip_scheme('about://example.com') ==\
|
>>> ReplayView.strip_scheme_www('https://example.com') ==\
|
||||||
ReplayView.strip_scheme('example.com')
|
ReplayView.strip_scheme_www('http://www2.example.com')
|
||||||
True
|
True
|
||||||
|
|
||||||
>>> ReplayView.strip_scheme('http://') ==\
|
>>> ReplayView.strip_scheme_www('about://example.com') ==\
|
||||||
ReplayView.strip_scheme('')
|
ReplayView.strip_scheme_www('example.com')
|
||||||
True
|
True
|
||||||
|
|
||||||
>>> ReplayView.strip_scheme('#!@?') ==\
|
>>> ReplayView.strip_scheme_www('http://') ==\
|
||||||
ReplayView.strip_scheme('#!@?')
|
ReplayView.strip_scheme_www('')
|
||||||
|
True
|
||||||
|
|
||||||
|
>>> ReplayView.strip_scheme_www('#!@?') ==\
|
||||||
|
ReplayView.strip_scheme_www('#!@?')
|
||||||
True
|
True
|
||||||
"""
|
"""
|
||||||
m = ReplayView.STRIP_SCHEME.match(url)
|
m = ReplayView.STRIP_SCHEME_WWW.match(url)
|
||||||
match = m.group(2)
|
match = m.group(2)
|
||||||
return match
|
return match
|
||||||
|
|
||||||
|
@ -189,7 +189,7 @@ class J2HtmlCapturesView(J2TemplateView):
|
|||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class MementoTimemapView(object):
|
class MementoTimemapView(object):
|
||||||
def render_response(self, wbrequest, cdx_lines):
|
def render_response(self, wbrequest, cdx_lines, **kwargs):
|
||||||
memento_lines = make_timemap(wbrequest, cdx_lines)
|
memento_lines = make_timemap(wbrequest, cdx_lines)
|
||||||
return WbResponse.text_stream(memento_lines,
|
return WbResponse.text_stream(memento_lines,
|
||||||
content_type=LINK_FORMAT)
|
content_type=LINK_FORMAT)
|
||||||
|
2
setup.py
2
setup.py
@ -34,7 +34,7 @@ class PyTest(TestCommand):
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='pywb',
|
name='pywb',
|
||||||
version='0.8.1',
|
version='0.8.2',
|
||||||
url='https://github.com/ikreymer/pywb',
|
url='https://github.com/ikreymer/pywb',
|
||||||
author='Ilya Kreymer',
|
author='Ilya Kreymer',
|
||||||
author_email='ikreymer@gmail.com',
|
author_email='ikreymer@gmail.com',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user