mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
referer redirect: check against registered routes
js rewriter: only rewrite quoted strings, support relative redirect Jinja view: add 'host' filter for extracting hostname css tweak
This commit is contained in:
parent
a757f53bd5
commit
232ac733ab
@ -28,10 +28,7 @@ class ArchivalRouter:
|
|||||||
if env['REL_REQUEST_URI'] in ['/', '/index.html', '/index.htm']:
|
if env['REL_REQUEST_URI'] in ['/', '/index.html', '/index.htm']:
|
||||||
return self.render_home_page()
|
return self.render_home_page()
|
||||||
|
|
||||||
if not self.fallback:
|
return self.fallback(env, self.routes) if self.fallback else None
|
||||||
return None
|
|
||||||
|
|
||||||
return self.fallback(WbRequest.from_uri(None, env))
|
|
||||||
|
|
||||||
|
|
||||||
def render_home_page(self):
|
def render_home_page(self):
|
||||||
@ -76,7 +73,13 @@ class Route:
|
|||||||
|
|
||||||
|
|
||||||
def __call__(self, env, use_abs_prefix):
|
def __call__(self, env, use_abs_prefix):
|
||||||
request_uri = env['REL_REQUEST_URI']
|
wbrequest = self.parse_request(env, use_abs_prefix)
|
||||||
|
return self.handler(wbrequest) if wbrequest else None
|
||||||
|
|
||||||
|
def parse_request(self, env, use_abs_prefix, request_uri = None):
|
||||||
|
if not request_uri:
|
||||||
|
request_uri = env['REL_REQUEST_URI']
|
||||||
|
|
||||||
matcher = self.regex.match(request_uri[1:])
|
matcher = self.regex.match(request_uri[1:])
|
||||||
if not matcher:
|
if not matcher:
|
||||||
return None
|
return None
|
||||||
@ -104,7 +107,8 @@ class Route:
|
|||||||
# Allow for applying of additional filters
|
# Allow for applying of additional filters
|
||||||
self._apply_filters(wbrequest, matcher)
|
self._apply_filters(wbrequest, matcher)
|
||||||
|
|
||||||
return self._handle_request(wbrequest)
|
return wbrequest
|
||||||
|
|
||||||
|
|
||||||
def _apply_filters(self, wbrequest, matcher):
|
def _apply_filters(self, wbrequest, matcher):
|
||||||
for filter in self.filters:
|
for filter in self.filters:
|
||||||
@ -114,9 +118,6 @@ class Route:
|
|||||||
def _custom_init(self, config):
|
def _custom_init(self, config):
|
||||||
self.filters = config.get('filters', [])
|
self.filters = config.get('filters', [])
|
||||||
|
|
||||||
def _handle_request(self, wbrequest):
|
|
||||||
return self.handler(wbrequest)
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
#return '* ' + self.regex_str + ' => ' + str(self.handler)
|
#return '* ' + self.regex_str + ' => ' + str(self.handler)
|
||||||
return str(self.handler)
|
return str(self.handler)
|
||||||
@ -143,6 +144,10 @@ class ReferRedirect:
|
|||||||
>>> test_redir('http://localhost:8080/', '/../../other.html', 'http://localhost:8080/coll/20131010/http://example.com/index.html')
|
>>> test_redir('http://localhost:8080/', '/../../other.html', 'http://localhost:8080/coll/20131010/http://example.com/index.html')
|
||||||
'http://localhost:8080/coll/20131010/http://example.com/other.html'
|
'http://localhost:8080/coll/20131010/http://example.com/other.html'
|
||||||
|
|
||||||
|
# Custom collection
|
||||||
|
>>> test_redir('http://localhost:8080/', '/other.html', 'http://localhost:8080/complex/123/20131010/http://example.com/path/page.html', coll='complex/123')
|
||||||
|
'http://localhost:8080/complex/123/20131010/http://example.com/path/other.html'
|
||||||
|
|
||||||
# With timestamp included
|
# With timestamp included
|
||||||
>>> test_redir('http://localhost:8080/', '/20131010/other.html', 'http://localhost:8080/coll/20131010/http://example.com/index.html')
|
>>> test_redir('http://localhost:8080/', '/20131010/other.html', 'http://localhost:8080/coll/20131010/http://example.com/index.html')
|
||||||
'http://localhost:8080/coll/20131010/http://example.com/other.html'
|
'http://localhost:8080/coll/20131010/http://example.com/other.html'
|
||||||
@ -151,6 +156,7 @@ class ReferRedirect:
|
|||||||
>>> test_redir('http://localhost:8080/', '/20131010/path/other.html', 'http://localhost:8080/coll/20131010/http://example.com/some/index.html')
|
>>> test_redir('http://localhost:8080/', '/20131010/path/other.html', 'http://localhost:8080/coll/20131010/http://example.com/some/index.html')
|
||||||
'http://localhost:8080/coll/20131010/http://example.com/path/other.html'
|
'http://localhost:8080/coll/20131010/http://example.com/path/other.html'
|
||||||
|
|
||||||
|
# Wrong Host
|
||||||
>>> test_redir('http://example:8080/', '/other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html')
|
>>> test_redir('http://example:8080/', '/other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html')
|
||||||
False
|
False
|
||||||
|
|
||||||
@ -175,30 +181,48 @@ class ReferRedirect:
|
|||||||
self.match_prefixs = [match_prefixs]
|
self.match_prefixs = [match_prefixs]
|
||||||
|
|
||||||
|
|
||||||
def __call__(self, wbrequest):
|
def __call__(self, env, routes):
|
||||||
if wbrequest.referrer is None:
|
referrer = env.get('HTTP_REFERER')
|
||||||
|
|
||||||
|
# ensure there is a referrer
|
||||||
|
if referrer is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if not any (wbrequest.referrer.startswith(i) for i in self.match_prefixs):
|
# ensure referrer starts with one of allowed hosts
|
||||||
|
if not any (referrer.startswith(i) for i in self.match_prefixs):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
ref_split = urlparse.urlsplit(wbrequest.referrer)
|
# get referrer path name
|
||||||
|
ref_split = urlparse.urlsplit(referrer)
|
||||||
|
|
||||||
path = ref_split.path
|
path = ref_split.path
|
||||||
script_name = wbrequest.env['SCRIPT_NAME']
|
|
||||||
|
|
||||||
if not path.startswith(script_name):
|
app_path = env['SCRIPT_NAME']
|
||||||
|
|
||||||
|
if app_path:
|
||||||
|
# must start with current app name, if not root
|
||||||
|
if not path.startswith(app_path):
|
||||||
|
return None
|
||||||
|
|
||||||
|
path = path[len(app_path):]
|
||||||
|
|
||||||
|
|
||||||
|
for route in routes:
|
||||||
|
ref_request = route.parse_request(env, False, request_uri = path)
|
||||||
|
if ref_request:
|
||||||
|
break
|
||||||
|
|
||||||
|
# must have matched one of the routes
|
||||||
|
if not ref_request:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
ref_path = path[len(script_name) + 1:].split('/', 1)
|
# must have a rewriter
|
||||||
|
if not ref_request.urlrewriter:
|
||||||
# No match on any exception
|
|
||||||
try:
|
|
||||||
rewriter = UrlRewriter(ref_path[1], script_name + '/' + ref_path[0] + '/')
|
|
||||||
except Exception:
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
rel_request_uri = wbrequest.request_uri[1:]
|
rewriter = ref_request.urlrewriter
|
||||||
|
|
||||||
|
rel_request_uri = env['REL_REQUEST_URI'][1:]
|
||||||
|
|
||||||
timestamp_path = rewriter.wburl.timestamp + '/'
|
timestamp_path = rewriter.wburl.timestamp + '/'
|
||||||
|
|
||||||
@ -218,12 +242,13 @@ if __name__ == "__main__" or utils.enable_doctests():
|
|||||||
|
|
||||||
import handlers
|
import handlers
|
||||||
|
|
||||||
def test_redir(match_host, request_uri, referrer, script_name = ''):
|
def test_redir(match_host, request_uri, referrer, script_name = '', coll = 'coll'):
|
||||||
env = {'REL_REQUEST_URI': request_uri, 'HTTP_REFERER': referrer, 'SCRIPT_NAME': script_name}
|
env = {'REL_REQUEST_URI': request_uri, 'HTTP_REFERER': referrer, 'SCRIPT_NAME': script_name}
|
||||||
|
routes = [Route(coll, handlers.BaseHandler())]
|
||||||
|
|
||||||
redir = ReferRedirect(match_host)
|
redir = ReferRedirect(match_host)
|
||||||
req = WbRequest.from_uri(request_uri, env)
|
#req = WbRequest.from_uri(request_uri, env)
|
||||||
rep = redir(req)
|
rep = redir(env, routes)
|
||||||
if not rep:
|
if not rep:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -34,6 +34,8 @@ class RegexRewriter:
|
|||||||
|
|
||||||
HTTPX_MATCH_STR = r'https?:\\?/\\?/[A-Za-z0-9:_@.-]+'
|
HTTPX_MATCH_STR = r'https?:\\?/\\?/[A-Za-z0-9:_@.-]+'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_OP = add_prefix
|
DEFAULT_OP = add_prefix
|
||||||
|
|
||||||
|
|
||||||
@ -101,6 +103,12 @@ class JSRewriter(RegexRewriter):
|
|||||||
>>> test_js(r'location = "http:\\/\\/example.com/abc.html"')
|
>>> test_js(r'location = "http:\\/\\/example.com/abc.html"')
|
||||||
'WB_wombat_location = "/web/20131010im_/http:\\\\/\\\\/example.com/abc.html"'
|
'WB_wombat_location = "/web/20131010im_/http:\\\\/\\\\/example.com/abc.html"'
|
||||||
|
|
||||||
|
>>> test_js(r"location = 'http://example.com/abc.html/'")
|
||||||
|
"WB_wombat_location = '/web/20131010im_/http://example.com/abc.html/'"
|
||||||
|
|
||||||
|
>>> test_js(r'location = http://example.com/abc.html/')
|
||||||
|
'WB_wombat_location = http://example.com/abc.html/'
|
||||||
|
|
||||||
>>> test_js(r'location = /http:\/\/example.com/abc.html/')
|
>>> test_js(r'location = /http:\/\/example.com/abc.html/')
|
||||||
'WB_wombat_location = /http:\\\\/\\\\/example.com/abc.html/'
|
'WB_wombat_location = /http:\\\\/\\\\/example.com/abc.html/'
|
||||||
|
|
||||||
@ -120,8 +128,14 @@ class JSRewriter(RegexRewriter):
|
|||||||
>>> test_js('window.location = "http://example.com/abc.html"; some_func(); ', [('some_func\(\).*', RegexRewriter.comment_out, 0)])
|
>>> test_js('window.location = "http://example.com/abc.html"; some_func(); ', [('some_func\(\).*', RegexRewriter.comment_out, 0)])
|
||||||
'window.WB_wombat_location = "/web/20131010im_/http://example.com/abc.html"; /*some_func(); */'
|
'window.WB_wombat_location = "/web/20131010im_/http://example.com/abc.html"; /*some_func(); */'
|
||||||
|
|
||||||
|
# scheme-agnostic
|
||||||
|
>>> test_js('cool_Location = "//example.com/abc.html" //comment')
|
||||||
|
'cool_Location = "/web/20131010im_///example.com/abc.html" //comment'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
JS_HTTPX = r'(?<="|\')(?:https?:)?\\?/\\?/[A-Za-z0-9:_@.-]+'
|
||||||
|
|
||||||
def __init__(self, rewriter, extra = []):
|
def __init__(self, rewriter, extra = []):
|
||||||
rules = self._create_rules(rewriter.get_abs_url())
|
rules = self._create_rules(rewriter.get_abs_url())
|
||||||
rules.extend(extra)
|
rules.extend(extra)
|
||||||
@ -131,7 +145,7 @@ class JSRewriter(RegexRewriter):
|
|||||||
|
|
||||||
def _create_rules(self, http_prefix):
|
def _create_rules(self, http_prefix):
|
||||||
return [
|
return [
|
||||||
(r'(?<!/)\b' + RegexRewriter.HTTPX_MATCH_STR, http_prefix, 0),
|
(self.JS_HTTPX, http_prefix, 0),
|
||||||
(r'(?<!/)\blocation\b', 'WB_wombat_', 0),
|
(r'(?<!/)\blocation\b', 'WB_wombat_', 0),
|
||||||
(r'(?<=document\.)domain', 'WB_wombat_', 0),
|
(r'(?<=document\.)domain', 'WB_wombat_', 0),
|
||||||
]
|
]
|
||||||
|
@ -1,18 +1,19 @@
|
|||||||
|
|
||||||
#_wayback_banner
|
#_wayback_banner
|
||||||
{
|
{
|
||||||
display: block !important;
|
display: block !important;
|
||||||
top: 0px !important;
|
top: 0px !important;
|
||||||
left: 0px !important;
|
left: 0px !important;
|
||||||
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif !important;
|
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif !important;
|
||||||
position: inherit !important;
|
position: absolute !important;
|
||||||
padding: 4px !important;
|
padding: 4px !important;
|
||||||
width: 100% !important;
|
width: 100% !important;
|
||||||
font-size: 24px !important;
|
font-size: 24px !important;
|
||||||
border: 1px solid !important;
|
border: 1px solid !important;
|
||||||
background-color: lightYellow !important;
|
background-color: lightYellow !important;
|
||||||
color: black !important;
|
color: black !important;
|
||||||
text-align: center !important;
|
text-align: center !important;
|
||||||
z-index: 2147483643 !important;
|
z-index: 2147483643 !important;
|
||||||
|
line-height: normal !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@ import utils
|
|||||||
import wbrequestresponse
|
import wbrequestresponse
|
||||||
import wbexceptions
|
import wbexceptions
|
||||||
import time
|
import time
|
||||||
|
import urlparse
|
||||||
|
|
||||||
from os import path
|
from os import path
|
||||||
from itertools import imap
|
from itertools import imap
|
||||||
@ -38,6 +39,7 @@ class J2TemplateView:
|
|||||||
|
|
||||||
jinja_env = Environment(loader = loader, trim_blocks = True)
|
jinja_env = Environment(loader = loader, trim_blocks = True)
|
||||||
jinja_env.filters['format_ts'] = J2TemplateView.format_ts
|
jinja_env.filters['format_ts'] = J2TemplateView.format_ts
|
||||||
|
jinja_env.filters['host'] = J2TemplateView.get_host
|
||||||
return jinja_env
|
return jinja_env
|
||||||
|
|
||||||
def render_to_string(self, **kwargs):
|
def render_to_string(self, **kwargs):
|
||||||
@ -59,6 +61,9 @@ class J2TemplateView:
|
|||||||
value = utils.timestamp_to_datetime(value)
|
value = utils.timestamp_to_datetime(value)
|
||||||
return time.strftime(format, value)
|
return time.strftime(format, value)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_host(url):
|
||||||
|
return urlparse.urlsplit(url).netloc
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user