1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

cookie rewriter: use relative path for cookie path rewriting, pass

relative path to urlrewriter
rules: add more rules
This commit is contained in:
Ilya Kreymer 2014-09-21 13:23:19 -07:00
parent 9be7074183
commit 7ac98fbfe2
4 changed files with 18 additions and 8 deletions

View File

@ -70,7 +70,8 @@ class WbRequest(object):
if wb_url_str != '/' and wburl_class: if wb_url_str != '/' and wburl_class:
self.wb_url = wburl_class(wb_url_str) self.wb_url = wburl_class(wb_url_str)
self.urlrewriter = urlrewriter_class(self.wb_url, self.wb_prefix, self.urlrewriter = urlrewriter_class(self.wb_url, self.wb_prefix,
host_prefix + rel_prefix) host_prefix + rel_prefix,
rel_prefix)
else: else:
# no wb_url, just store blank wb_url # no wb_url, just store blank wb_url
self.wb_url = None self.wb_url = None

View File

@ -22,7 +22,7 @@ class WbUrlCookieRewriter(object):
# if domain set, no choice but to expand cookie path to root # if domain set, no choice but to expand cookie path to root
if morsel.get('domain'): if morsel.get('domain'):
del morsel['domain'] del morsel['domain']
morsel['path'] = self.url_rewriter.prefix morsel['path'] = self.url_rewriter.rel_prefix
# else set cookie to rewritten path # else set cookie to rewritten path
elif morsel.get('path'): elif morsel.get('path'):
morsel['path'] = self.url_rewriter.rewrite(morsel['path']) morsel['path'] = self.url_rewriter.rewrite(morsel['path'])

View File

@ -18,13 +18,11 @@ class UrlRewriter(object):
PROTOCOLS = ['http:', 'https:', 'ftp:', 'mms:', 'rtsp:', 'wais:'] PROTOCOLS = ['http:', 'https:', 'ftp:', 'mms:', 'rtsp:', 'wais:']
def __init__(self, wburl, prefix, full_prefix=None): def __init__(self, wburl, prefix, full_prefix=None, rel_prefix=None):
self.wburl = wburl if isinstance(wburl, WbUrl) else WbUrl(wburl) self.wburl = wburl if isinstance(wburl, WbUrl) else WbUrl(wburl)
self.prefix = prefix self.prefix = prefix
self.full_prefix = full_prefix self.full_prefix = full_prefix
self.rel_prefix = rel_prefix if rel_prefix else prefix
#if self.prefix.endswith('/'):
# self.prefix = self.prefix[:-1]
def rewrite(self, url, mod=None): def rewrite(self, url, mod=None):
# if special protocol, no rewriting at all # if special protocol, no rewriting at all

View File

@ -10,7 +10,6 @@ rules:
# facebook rules # facebook rules
#================================================================= #=================================================================
# - url_prefix: 'com,facebook)/ajax/pagelet/generic.php/profiletimelinesectionpagelet'
- url_prefix: 'com,facebook)/ajax/pagelet/generic.php/' - url_prefix: 'com,facebook)/ajax/pagelet/generic.php/'
fuzzy_lookup: 'com,facebook\)/.*[?&]data=(.*?(?:[&]|query_type[^,]+))' fuzzy_lookup: 'com,facebook\)/.*[?&]data=(.*?(?:[&]|query_type[^,]+))'
@ -19,7 +18,19 @@ rules:
fuzzy_lookup: '(ft_ent_identifier=[^&]+).*(lsd=[^&]+)' fuzzy_lookup: '(ft_ent_identifier=[^&]+).*(lsd=[^&]+)'
# not actually needed, fuzzy match is used instead here - url_prefix: 'com,facebook)/ajax/chat/hovercard/sidebar.php'
fuzzy_lookup: '(ids\[0\]=[^&]+)'
- url_prefix: 'com,facebook)/ajax/'
fuzzy_lookup: '([?&][^_]\w+=[^&]+)+'
- url_prefix: 'com,facebook)/login.php'
fuzzy_lookup: '(email=[^&]+).*(lgnrnd=[^&]+).*(lsd=[^&]+)'
# not actually needed, fuzzy match is used instead here
# canonicalize: # canonicalize:
# match: 'com,facebook\)/.*[?&]data=([^&]+).*' # match: 'com,facebook\)/.*[?&]data=([^&]+).*'
# replace: 'com,facebook)/ajax/pagelet/generic.php/profiletimelinesectionpagelet?data=\1' # replace: 'com,facebook)/ajax/pagelet/generic.php/profiletimelinesectionpagelet?data=\1'