mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
replay: change strip_scheme() to strip_scheme_www() to also strip away www. prefix for self-redirect checking, #73
This commit is contained in:
parent
83f8d7d29b
commit
5d80d2d891
@ -34,7 +34,7 @@ class CaptureException(WbException):
|
|||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class ReplayView(object):
|
class ReplayView(object):
|
||||||
STRIP_SCHEME = re.compile('^([\w]+:[/]*)?(.*?)$')
|
STRIP_SCHEME_WWW = re.compile('^([\w]+:[/]*(?:www[\d]*\.)?)?(.*?)$')
|
||||||
|
|
||||||
def __init__(self, content_loader, config):
|
def __init__(self, content_loader, config):
|
||||||
self.content_loader = content_loader
|
self.content_loader = content_loader
|
||||||
@ -286,8 +286,8 @@ class ReplayView(object):
|
|||||||
host = urlsplit(cdx['original']).netloc
|
host = urlsplit(cdx['original']).netloc
|
||||||
location_url = host + location_url
|
location_url = host + location_url
|
||||||
|
|
||||||
if (ReplayView.strip_scheme(request_url) ==
|
if (ReplayView.strip_scheme_www(request_url) ==
|
||||||
ReplayView.strip_scheme(location_url)):
|
ReplayView.strip_scheme_www(location_url)):
|
||||||
raise CaptureException('Self Redirect: ' + str(cdx))
|
raise CaptureException('Self Redirect: ' + str(cdx))
|
||||||
|
|
||||||
# TODO: reevaluate this, as it may reject valid refreshes of a page
|
# TODO: reevaluate this, as it may reject valid refreshes of a page
|
||||||
@ -307,39 +307,43 @@ class ReplayView(object):
|
|||||||
request_url = (wbrequest.host_prefix +
|
request_url = (wbrequest.host_prefix +
|
||||||
wbrequest.rel_prefix + str(wbrequest.wb_url))
|
wbrequest.rel_prefix + str(wbrequest.wb_url))
|
||||||
|
|
||||||
if (ReplayView.strip_scheme(request_url) ==
|
if (ReplayView.strip_scheme_www(request_url) ==
|
||||||
ReplayView.strip_scheme(wbrequest.referrer)):
|
ReplayView.strip_scheme_www(wbrequest.referrer)):
|
||||||
raise CaptureException('Self Redirect via Referrer: ' +
|
raise CaptureException('Self Redirect via Referrer: ' +
|
||||||
str(wbrequest.wb_url))
|
str(wbrequest.wb_url))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def strip_scheme(url):
|
def strip_scheme_www(url):
|
||||||
"""
|
"""
|
||||||
>>> ReplayView.strip_scheme('https://example.com') ==\
|
>>> ReplayView.strip_scheme_www('https://example.com') ==\
|
||||||
ReplayView.strip_scheme('http://example.com')
|
ReplayView.strip_scheme_www('http://example.com')
|
||||||
True
|
True
|
||||||
|
|
||||||
>>> ReplayView.strip_scheme('https://example.com') ==\
|
>>> ReplayView.strip_scheme_www('https://example.com') ==\
|
||||||
ReplayView.strip_scheme('http:/example.com')
|
ReplayView.strip_scheme_www('http:/example.com')
|
||||||
True
|
True
|
||||||
|
|
||||||
>>> ReplayView.strip_scheme('https://example.com') ==\
|
>>> ReplayView.strip_scheme_www('https://example.com') ==\
|
||||||
ReplayView.strip_scheme('example.com')
|
ReplayView.strip_scheme_www('example.com')
|
||||||
True
|
True
|
||||||
|
|
||||||
>>> ReplayView.strip_scheme('about://example.com') ==\
|
>>> ReplayView.strip_scheme_www('https://example.com') ==\
|
||||||
ReplayView.strip_scheme('example.com')
|
ReplayView.strip_scheme_www('http://www2.example.com')
|
||||||
True
|
True
|
||||||
|
|
||||||
>>> ReplayView.strip_scheme('http://') ==\
|
>>> ReplayView.strip_scheme_www('about://example.com') ==\
|
||||||
ReplayView.strip_scheme('')
|
ReplayView.strip_scheme_www('example.com')
|
||||||
True
|
True
|
||||||
|
|
||||||
>>> ReplayView.strip_scheme('#!@?') ==\
|
>>> ReplayView.strip_scheme_www('http://') ==\
|
||||||
ReplayView.strip_scheme('#!@?')
|
ReplayView.strip_scheme_www('')
|
||||||
|
True
|
||||||
|
|
||||||
|
>>> ReplayView.strip_scheme_www('#!@?') ==\
|
||||||
|
ReplayView.strip_scheme_www('#!@?')
|
||||||
True
|
True
|
||||||
"""
|
"""
|
||||||
m = ReplayView.STRIP_SCHEME.match(url)
|
m = ReplayView.STRIP_SCHEME_WWW.match(url)
|
||||||
match = m.group(2)
|
match = m.group(2)
|
||||||
return match
|
return match
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user