From 7ac98fbfe241a4532f881aa17ee5ebda00581907 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 21 Sep 2014 13:23:19 -0700 Subject: [PATCH] cookie rewriter: use relative path for cookie path rewriting, pass relative path to urlrewriter rules: add more rules --- pywb/framework/wbrequestresponse.py | 3 ++- pywb/rewrite/cookie_rewriter.py | 2 +- pywb/rewrite/url_rewriter.py | 6 ++---- pywb/rules.yaml | 15 +++++++++++++-- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/pywb/framework/wbrequestresponse.py b/pywb/framework/wbrequestresponse.py index 7c8f6578..17dea3d8 100644 --- a/pywb/framework/wbrequestresponse.py +++ b/pywb/framework/wbrequestresponse.py @@ -70,7 +70,8 @@ class WbRequest(object): if wb_url_str != '/' and wburl_class: self.wb_url = wburl_class(wb_url_str) self.urlrewriter = urlrewriter_class(self.wb_url, self.wb_prefix, - host_prefix + rel_prefix) + host_prefix + rel_prefix, + rel_prefix) else: # no wb_url, just store blank wb_url self.wb_url = None diff --git a/pywb/rewrite/cookie_rewriter.py b/pywb/rewrite/cookie_rewriter.py index 26786245..f4736b91 100644 --- a/pywb/rewrite/cookie_rewriter.py +++ b/pywb/rewrite/cookie_rewriter.py @@ -22,7 +22,7 @@ class WbUrlCookieRewriter(object): # if domain set, no choice but to expand cookie path to root if morsel.get('domain'): del morsel['domain'] - morsel['path'] = self.url_rewriter.prefix + morsel['path'] = self.url_rewriter.rel_prefix # else set cookie to rewritten path elif morsel.get('path'): morsel['path'] = self.url_rewriter.rewrite(morsel['path']) diff --git a/pywb/rewrite/url_rewriter.py b/pywb/rewrite/url_rewriter.py index c89e9a21..c6bd6f9c 100644 --- a/pywb/rewrite/url_rewriter.py +++ b/pywb/rewrite/url_rewriter.py @@ -18,13 +18,11 @@ class UrlRewriter(object): PROTOCOLS = ['http:', 'https:', 'ftp:', 'mms:', 'rtsp:', 'wais:'] - def __init__(self, wburl, prefix, full_prefix=None): + def __init__(self, wburl, prefix, full_prefix=None, rel_prefix=None): self.wburl = wburl if isinstance(wburl, WbUrl) else WbUrl(wburl) self.prefix = prefix self.full_prefix = full_prefix - - #if self.prefix.endswith('/'): - # self.prefix = self.prefix[:-1] + self.rel_prefix = rel_prefix if rel_prefix else prefix def rewrite(self, url, mod=None): # if special protocol, no rewriting at all diff --git a/pywb/rules.yaml b/pywb/rules.yaml index 4e6ac514..76831688 100644 --- a/pywb/rules.yaml +++ b/pywb/rules.yaml @@ -10,7 +10,6 @@ rules: # facebook rules #================================================================= -# - url_prefix: 'com,facebook)/ajax/pagelet/generic.php/profiletimelinesectionpagelet' - url_prefix: 'com,facebook)/ajax/pagelet/generic.php/' fuzzy_lookup: 'com,facebook\)/.*[?&]data=(.*?(?:[&]|query_type[^,]+))' @@ -19,7 +18,19 @@ rules: fuzzy_lookup: '(ft_ent_identifier=[^&]+).*(lsd=[^&]+)' -# not actually needed, fuzzy match is used instead here + - url_prefix: 'com,facebook)/ajax/chat/hovercard/sidebar.php' + + fuzzy_lookup: '(ids\[0\]=[^&]+)' + + - url_prefix: 'com,facebook)/ajax/' + + fuzzy_lookup: '([?&][^_]\w+=[^&]+)+' + + - url_prefix: 'com,facebook)/login.php' + + fuzzy_lookup: '(email=[^&]+).*(lgnrnd=[^&]+).*(lsd=[^&]+)' + + # not actually needed, fuzzy match is used instead here # canonicalize: # match: 'com,facebook\)/.*[?&]data=([^&]+).*' # replace: 'com,facebook)/ajax/pagelet/generic.php/profiletimelinesectionpagelet?data=\1'