1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

rewriting: support setting cookie_scope at collection level

js rewriting: add custom url rewrite option to per-url rewrite rules
This commit is contained in:
Ilya Kreymer 2014-10-06 10:14:45 -07:00
parent f1b3f8c76f
commit 498a864441
8 changed files with 49 additions and 26 deletions

View File

@ -61,7 +61,8 @@ class ArchivalRouter(object):
coll=coll, coll=coll,
use_abs_prefix=use_abs_prefix, use_abs_prefix=use_abs_prefix,
wburl_class=route.handler.get_wburl_type(), wburl_class=route.handler.get_wburl_type(),
urlrewriter_class=UrlRewriter) urlrewriter_class=UrlRewriter,
cookie_scope=route.cookie_scope)
# Allow for applying of additional filters # Allow for applying of additional filters
route.apply_filters(wbrequest, matcher) route.apply_filters(wbrequest, matcher)
@ -99,6 +100,7 @@ class Route(object):
self.request_class = request_class self.request_class = request_class
# collection id from regex group (default 0) # collection id from regex group (default 0)
self.coll_group = coll_group self.coll_group = coll_group
self.cookie_scope = config.get('cookie_scope')
self._custom_init(config) self._custom_init(config)
def is_handling(self, request_uri): def is_handling(self, request_uri):

View File

@ -37,7 +37,8 @@ class WbRequest(object):
use_abs_prefix=False, use_abs_prefix=False,
wburl_class=None, wburl_class=None,
urlrewriter_class=None, urlrewriter_class=None,
is_proxy=False): is_proxy=False,
cookie_scope=None):
self.env = env self.env = env
@ -69,10 +70,12 @@ class WbRequest(object):
# wb_url present and not root page # wb_url present and not root page
if wb_url_str != '/' and wburl_class: if wb_url_str != '/' and wburl_class:
self.wb_url = wburl_class(wb_url_str) self.wb_url = wburl_class(wb_url_str)
self.urlrewriter = urlrewriter_class(self.wb_url, self.wb_prefix, self.urlrewriter = urlrewriter_class(self.wb_url,
self.wb_prefix,
host_prefix + rel_prefix, host_prefix + rel_prefix,
rel_prefix, rel_prefix,
env.get('SCRIPT_NAME', '/')) env.get('SCRIPT_NAME', '/'),
cookie_scope)
else: else:
# no wb_url, just store blank wb_url # no wb_url, just store blank wb_url
self.wb_url = None self.wb_url = None

View File

@ -77,8 +77,8 @@ class RootScopeCookieRewriter(WbUrlBaseCookieRewriter):
#================================================================= #=================================================================
def get_cookie_rewriter(rule): def get_cookie_rewriter(cookie_scope):
if rule and rule.cookie_scope == 'root': if cookie_scope == 'root':
return RootScopeCookieRewriter return RootScopeCookieRewriter
else: else:
return MinimalScopeCookieRewriter return MinimalScopeCookieRewriter

View File

@ -90,13 +90,19 @@ class RegexRewriter(object):
@staticmethod @staticmethod
def parse_rules_from_config(config): def parse_rules_from_config(config):
def parse_rule(obj): def run_parse_rules(rewriter):
match = obj.get('match') def parse_rule(obj):
replace = RegexRewriter.format(obj.get('replace', '{0}')) match = obj.get('match')
group = obj.get('group', 0) if 'rewrite' in obj:
result = (match, replace, group) replace = RegexRewriter.archival_rewrite(rewriter)
return result else:
return map(parse_rule, config) replace = RegexRewriter.format(obj.get('replace', '{0}'))
group = obj.get('group', 0)
result = (match, replace, group)
return result
return map(parse_rule, config)
return run_parse_rules
#================================================================= #=================================================================

View File

@ -52,11 +52,11 @@ class RewriteRules(BaseRule):
rewriter_cls = self.rewriters[field] rewriter_cls = self.rewriters[field]
rule_def_tuples = RegexRewriter.parse_rules_from_config(regexs) #rule_def_tuples = RegexRewriter.parse_rules_from_config(regexs)
parse_rules_func = RegexRewriter.parse_rules_from_config(regexs)
def extend_rewriter_with_regex(urlrewriter): def extend_rewriter_with_regex(urlrewriter):
#import sys rule_def_tuples = parse_rules_func(urlrewriter)
#sys.stderr.write('\n\nEXTEND: ' + str(rule_def_tuples))
return rewriter_cls(urlrewriter, rule_def_tuples) return rewriter_cls(urlrewriter, rule_def_tuples)
self.rewriters[field] = extend_rewriter_with_regex self.rewriters[field] = extend_rewriter_with_regex

View File

@ -18,12 +18,14 @@ class UrlRewriter(object):
PROTOCOLS = ['http:', 'https:', 'ftp:', 'mms:', 'rtsp:', 'wais:'] PROTOCOLS = ['http:', 'https:', 'ftp:', 'mms:', 'rtsp:', 'wais:']
def __init__(self, wburl, prefix, full_prefix=None, rel_prefix=None, root_path=None): def __init__(self, wburl, prefix, full_prefix=None, rel_prefix=None, root_path=None,
cookie_scope=None):
self.wburl = wburl if isinstance(wburl, WbUrl) else WbUrl(wburl) self.wburl = wburl if isinstance(wburl, WbUrl) else WbUrl(wburl)
self.prefix = prefix self.prefix = prefix
self.full_prefix = full_prefix self.full_prefix = full_prefix
self.rel_prefix = rel_prefix if rel_prefix else prefix self.rel_prefix = rel_prefix if rel_prefix else prefix
self.root_path = root_path if root_path else '/' self.root_path = root_path if root_path else '/'
self.cookie_scope = cookie_scope
def rewrite(self, url, mod=None): def rewrite(self, url, mod=None):
# if special protocol, no rewriting at all # if special protocol, no rewriting at all
@ -84,8 +86,12 @@ class UrlRewriter(object):
new_wburl = WbUrl(new_url) new_wburl = WbUrl(new_url)
return UrlRewriter(new_wburl, self.prefix) return UrlRewriter(new_wburl, self.prefix)
def get_cookie_rewriter(self, rule=None): def get_cookie_rewriter(self, scope=None):
cls = get_cookie_rewriter(rule) # collection scope overrides rule scope?
if self.cookie_scope:
scope = self.cookie_scope
cls = get_cookie_rewriter(scope)
return cls(self) return cls(self)
def __repr__(self): def __repr__(self):
@ -151,5 +157,5 @@ class HttpsUrlRewriter(UrlRewriter):
def rebase_rewriter(self, new_url): def rebase_rewriter(self, new_url):
return self return self
def get_cookie_rewriter(self, rule=None): def get_cookie_rewriter(self, scope=None):
return None return None

View File

@ -45,14 +45,20 @@ rules:
parse_comments: true parse_comments: true
- url_prefix: 'com,facebook'
# instagram rules
#=================================================================
- url_prefix: 'com,instagram'
rewrite: rewrite:
cookie_scope: root cookie_scope: root
# instagram rules
#=================================================================
- url_prefix: 'net,cloudfront,'
rewrite:
js_regexs:
- match: '\burl\((//[^)]+)\)'
rewrite: true
group: 1
# flickr rules # flickr rules
#================================================================= #=================================================================
- url_prefix: ['com,yimg,l)/g/combo', 'com,yimg,s)/pw/combo', 'com,yahooapis,yui)/combo'] - url_prefix: ['com,yimg,l)/g/combo', 'com,yimg,s)/pw/combo', 'com,yahooapis,yui)/combo']

View File

@ -179,7 +179,7 @@ def create_wb_router(passed_config={}):
for name, value in collections.iteritems(): for name, value in collections.iteritems():
if isinstance(value, BaseHandler): if isinstance(value, BaseHandler):
handler_dict[name] = value handler_dict[name] = value
routes.append(Route(name, value)) routes.append(Route(name, value, config=route_config))
continue continue
route_config = init_route_config(value, config) route_config = init_route_config(value, config)
@ -187,7 +187,7 @@ def create_wb_router(passed_config={}):
if route_config.get('index_paths') == '$liveweb': if route_config.get('index_paths') == '$liveweb':
live = create_live_handler(route_config) live = create_live_handler(route_config)
handler_dict[name] = live handler_dict[name] = live
routes.append(Route(name, live)) routes.append(Route(name, live, config=route_config))
continue continue
query_handler = init_collection(route_config) query_handler = init_collection(route_config)