1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

rewriting: support setting cookie_scope at collection level

js rewriting: add custom url rewrite option to per-url rewrite rules
This commit is contained in:
Ilya Kreymer 2014-10-06 10:14:45 -07:00
parent f1b3f8c76f
commit 498a864441
8 changed files with 49 additions and 26 deletions

View File

@ -61,7 +61,8 @@ class ArchivalRouter(object):
coll=coll,
use_abs_prefix=use_abs_prefix,
wburl_class=route.handler.get_wburl_type(),
urlrewriter_class=UrlRewriter)
urlrewriter_class=UrlRewriter,
cookie_scope=route.cookie_scope)
# Allow for applying of additional filters
route.apply_filters(wbrequest, matcher)
@ -99,6 +100,7 @@ class Route(object):
self.request_class = request_class
# collection id from regex group (default 0)
self.coll_group = coll_group
self.cookie_scope = config.get('cookie_scope')
self._custom_init(config)
def is_handling(self, request_uri):

View File

@ -37,7 +37,8 @@ class WbRequest(object):
use_abs_prefix=False,
wburl_class=None,
urlrewriter_class=None,
is_proxy=False):
is_proxy=False,
cookie_scope=None):
self.env = env
@ -69,10 +70,12 @@ class WbRequest(object):
# wb_url present and not root page
if wb_url_str != '/' and wburl_class:
self.wb_url = wburl_class(wb_url_str)
self.urlrewriter = urlrewriter_class(self.wb_url, self.wb_prefix,
self.urlrewriter = urlrewriter_class(self.wb_url,
self.wb_prefix,
host_prefix + rel_prefix,
rel_prefix,
env.get('SCRIPT_NAME', '/'))
env.get('SCRIPT_NAME', '/'),
cookie_scope)
else:
# no wb_url, just store blank wb_url
self.wb_url = None

View File

@ -77,8 +77,8 @@ class RootScopeCookieRewriter(WbUrlBaseCookieRewriter):
#=================================================================
def get_cookie_rewriter(rule):
if rule and rule.cookie_scope == 'root':
def get_cookie_rewriter(cookie_scope):
if cookie_scope == 'root':
return RootScopeCookieRewriter
else:
return MinimalScopeCookieRewriter

View File

@ -90,13 +90,19 @@ class RegexRewriter(object):
@staticmethod
def parse_rules_from_config(config):
def parse_rule(obj):
match = obj.get('match')
replace = RegexRewriter.format(obj.get('replace', '{0}'))
group = obj.get('group', 0)
result = (match, replace, group)
return result
return map(parse_rule, config)
def run_parse_rules(rewriter):
def parse_rule(obj):
match = obj.get('match')
if 'rewrite' in obj:
replace = RegexRewriter.archival_rewrite(rewriter)
else:
replace = RegexRewriter.format(obj.get('replace', '{0}'))
group = obj.get('group', 0)
result = (match, replace, group)
return result
return map(parse_rule, config)
return run_parse_rules
#=================================================================

View File

@ -52,11 +52,11 @@ class RewriteRules(BaseRule):
rewriter_cls = self.rewriters[field]
rule_def_tuples = RegexRewriter.parse_rules_from_config(regexs)
#rule_def_tuples = RegexRewriter.parse_rules_from_config(regexs)
parse_rules_func = RegexRewriter.parse_rules_from_config(regexs)
def extend_rewriter_with_regex(urlrewriter):
#import sys
#sys.stderr.write('\n\nEXTEND: ' + str(rule_def_tuples))
rule_def_tuples = parse_rules_func(urlrewriter)
return rewriter_cls(urlrewriter, rule_def_tuples)
self.rewriters[field] = extend_rewriter_with_regex

View File

@ -18,12 +18,14 @@ class UrlRewriter(object):
PROTOCOLS = ['http:', 'https:', 'ftp:', 'mms:', 'rtsp:', 'wais:']
def __init__(self, wburl, prefix, full_prefix=None, rel_prefix=None, root_path=None):
def __init__(self, wburl, prefix, full_prefix=None, rel_prefix=None, root_path=None,
cookie_scope=None):
self.wburl = wburl if isinstance(wburl, WbUrl) else WbUrl(wburl)
self.prefix = prefix
self.full_prefix = full_prefix
self.rel_prefix = rel_prefix if rel_prefix else prefix
self.root_path = root_path if root_path else '/'
self.cookie_scope = cookie_scope
def rewrite(self, url, mod=None):
# if special protocol, no rewriting at all
@ -84,8 +86,12 @@ class UrlRewriter(object):
new_wburl = WbUrl(new_url)
return UrlRewriter(new_wburl, self.prefix)
def get_cookie_rewriter(self, rule=None):
cls = get_cookie_rewriter(rule)
def get_cookie_rewriter(self, scope=None):
# collection scope overrides rule scope?
if self.cookie_scope:
scope = self.cookie_scope
cls = get_cookie_rewriter(scope)
return cls(self)
def __repr__(self):
@ -151,5 +157,5 @@ class HttpsUrlRewriter(UrlRewriter):
def rebase_rewriter(self, new_url):
return self
def get_cookie_rewriter(self, rule=None):
def get_cookie_rewriter(self, scope=None):
return None

View File

@ -45,14 +45,20 @@ rules:
parse_comments: true
# instagram rules
#=================================================================
- url_prefix: 'com,instagram'
- url_prefix: 'com,facebook'
rewrite:
cookie_scope: root
# instagram rules
#=================================================================
- url_prefix: 'net,cloudfront,'
rewrite:
js_regexs:
- match: '\burl\((//[^)]+)\)'
rewrite: true
group: 1
# flickr rules
#=================================================================
- url_prefix: ['com,yimg,l)/g/combo', 'com,yimg,s)/pw/combo', 'com,yahooapis,yui)/combo']

View File

@ -179,7 +179,7 @@ def create_wb_router(passed_config={}):
for name, value in collections.iteritems():
if isinstance(value, BaseHandler):
handler_dict[name] = value
routes.append(Route(name, value))
routes.append(Route(name, value, config=route_config))
continue
route_config = init_route_config(value, config)
@ -187,7 +187,7 @@ def create_wb_router(passed_config={}):
if route_config.get('index_paths') == '$liveweb':
live = create_live_handler(route_config)
handler_dict[name] = live
routes.append(Route(name, live))
routes.append(Route(name, live, config=route_config))
continue
query_handler = init_collection(route_config)