1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

cookie rewriter: use relative path for cookie path rewriting, pass

relative path to urlrewriter
rules: add more rules
This commit is contained in:
Ilya Kreymer 2014-09-21 13:23:19 -07:00
parent 9be7074183
commit 7ac98fbfe2
4 changed files with 18 additions and 8 deletions

View File

@ -70,7 +70,8 @@ class WbRequest(object):
if wb_url_str != '/' and wburl_class:
self.wb_url = wburl_class(wb_url_str)
self.urlrewriter = urlrewriter_class(self.wb_url, self.wb_prefix,
host_prefix + rel_prefix)
host_prefix + rel_prefix,
rel_prefix)
else:
# no wb_url, just store blank wb_url
self.wb_url = None

View File

@ -22,7 +22,7 @@ class WbUrlCookieRewriter(object):
# if domain set, no choice but to expand cookie path to root
if morsel.get('domain'):
del morsel['domain']
morsel['path'] = self.url_rewriter.prefix
morsel['path'] = self.url_rewriter.rel_prefix
# else set cookie to rewritten path
elif morsel.get('path'):
morsel['path'] = self.url_rewriter.rewrite(morsel['path'])

View File

@ -18,13 +18,11 @@ class UrlRewriter(object):
PROTOCOLS = ['http:', 'https:', 'ftp:', 'mms:', 'rtsp:', 'wais:']
def __init__(self, wburl, prefix, full_prefix=None):
def __init__(self, wburl, prefix, full_prefix=None, rel_prefix=None):
self.wburl = wburl if isinstance(wburl, WbUrl) else WbUrl(wburl)
self.prefix = prefix
self.full_prefix = full_prefix
#if self.prefix.endswith('/'):
# self.prefix = self.prefix[:-1]
self.rel_prefix = rel_prefix if rel_prefix else prefix
def rewrite(self, url, mod=None):
# if special protocol, no rewriting at all

View File

@ -10,7 +10,6 @@ rules:
# facebook rules
#=================================================================
# - url_prefix: 'com,facebook)/ajax/pagelet/generic.php/profiletimelinesectionpagelet'
- url_prefix: 'com,facebook)/ajax/pagelet/generic.php/'
fuzzy_lookup: 'com,facebook\)/.*[?&]data=(.*?(?:[&]|query_type[^,]+))'
@ -19,7 +18,19 @@ rules:
fuzzy_lookup: '(ft_ent_identifier=[^&]+).*(lsd=[^&]+)'
# not actually needed, fuzzy match is used instead here
- url_prefix: 'com,facebook)/ajax/chat/hovercard/sidebar.php'
fuzzy_lookup: '(ids\[0\]=[^&]+)'
- url_prefix: 'com,facebook)/ajax/'
fuzzy_lookup: '([?&][^_]\w+=[^&]+)+'
- url_prefix: 'com,facebook)/login.php'
fuzzy_lookup: '(email=[^&]+).*(lgnrnd=[^&]+).*(lsd=[^&]+)'
# not actually needed, fuzzy match is used instead here
# canonicalize:
# match: 'com,facebook\)/.*[?&]data=([^&]+).*'
# replace: 'com,facebook)/ajax/pagelet/generic.php/profiletimelinesectionpagelet?data=\1'