mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 08:04:49 +01:00
Merge branch 'develop' into video
This commit is contained in:
commit
d3ef47342c
@ -55,6 +55,24 @@ class MinimalScopeCookieRewriter(WbUrlBaseCookieRewriter):
|
||||
return morsel
|
||||
|
||||
|
||||
#=================================================================
|
||||
class ExactPathCookieRewriter(WbUrlBaseCookieRewriter):
|
||||
"""
|
||||
Rewrite cookies only using exact path, useful for live rewrite
|
||||
without a timestamp and to minimize cookie pollution
|
||||
|
||||
If path or domain present, simply remove
|
||||
"""
|
||||
|
||||
def rewrite_cookie(self, name, morsel):
|
||||
if morsel.get('domain'):
|
||||
del morsel['domain']
|
||||
# else set cookie to rewritten path
|
||||
if morsel.get('path'):
|
||||
del morsel['path']
|
||||
|
||||
self._remove_age_opts(morsel)
|
||||
return morsel
|
||||
#=================================================================
|
||||
class RootScopeCookieRewriter(WbUrlBaseCookieRewriter):
|
||||
"""
|
||||
@ -79,5 +97,7 @@ class RootScopeCookieRewriter(WbUrlBaseCookieRewriter):
|
||||
def get_cookie_rewriter(cookie_scope):
|
||||
if cookie_scope == 'root':
|
||||
return RootScopeCookieRewriter
|
||||
elif cookie_scope == 'exact':
|
||||
return ExactPathCookieRewriter
|
||||
else:
|
||||
return MinimalScopeCookieRewriter
|
||||
|
@ -185,6 +185,12 @@ class HTMLRewriterMixin(object):
|
||||
elif attr_name == 'crossorigin':
|
||||
attr_name = '_crossorigin'
|
||||
|
||||
# special case: link don't rewrite canonical
|
||||
elif tag == 'link' and attr_name == 'href':
|
||||
if not self.has_attr(tag_attrs, ('rel', 'canonical')):
|
||||
rw_mod = handler.get(attr_name)
|
||||
attr_value = self._rewrite_url(attr_value, rw_mod)
|
||||
|
||||
# special case: meta tag
|
||||
elif (tag == 'meta') and (attr_name == 'content'):
|
||||
if self.has_attr(tag_attrs, ('http-equiv', 'refresh')):
|
||||
|
@ -1,4 +1,5 @@
|
||||
r"""
|
||||
# Default -- MinimalScopeRewriter
|
||||
# No rewriting
|
||||
>>> rewrite_cookie('a=b; c=d;')
|
||||
[('Set-Cookie', 'a=b'), ('Set-Cookie', 'c=d')]
|
||||
@ -23,10 +24,17 @@ r"""
|
||||
>>> rewrite_cookie('abc@def=123')
|
||||
[]
|
||||
|
||||
# ExactCookieRewriter
|
||||
>>> rewrite_cookie('some=value; Path=/diff/path/;', urlrewriter, ExactPathCookieRewriter)
|
||||
[('Set-Cookie', 'some=value')]
|
||||
|
||||
>>> rewrite_cookie('some=value; Domain=.example.com; Path=/diff/path/; Max-Age=1500', urlrewriter, ExactPathCookieRewriter)
|
||||
[('Set-Cookie', 'some=value')]
|
||||
|
||||
"""
|
||||
|
||||
|
||||
from pywb.rewrite.cookie_rewriter import MinimalScopeCookieRewriter
|
||||
from pywb.rewrite.cookie_rewriter import MinimalScopeCookieRewriter, ExactPathCookieRewriter
|
||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||
|
||||
urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/pywb/')
|
||||
@ -34,6 +42,6 @@ urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.htm
|
||||
urlrewriter2 = UrlRewriter('em_/http://example.com/', '/preview/')
|
||||
|
||||
|
||||
def rewrite_cookie(cookie_str, rewriter=urlrewriter):
|
||||
return MinimalScopeCookieRewriter(rewriter).rewrite(cookie_str)
|
||||
def rewrite_cookie(cookie_str, rewriter=urlrewriter, cookie_rewriter=MinimalScopeCookieRewriter):
|
||||
return cookie_rewriter(rewriter).rewrite(cookie_str)
|
||||
|
||||
|
@ -106,6 +106,10 @@ ur"""
|
||||
>>> parse('<link href="abc.txt"><div>SomeTest</div>', head_insert = '<script>load_stuff();</script>')
|
||||
<link href="/web/20131226101010oe_/http://example.com/some/path/abc.txt"><script>load_stuff();</script><div>SomeTest</div>
|
||||
|
||||
# don't rewrite rel=canonical
|
||||
>>> parse('<link rel=canonical href="http://example.com/">')
|
||||
<link rel="canonical" href="http://example.com/">
|
||||
|
||||
# doctype
|
||||
>>> parse('<!doctype html PUBLIC "public">')
|
||||
<!doctype html PUBLIC "public">
|
||||
|
@ -45,6 +45,16 @@ r"""
|
||||
>>> _test_js('document_domain = "anotherdomain.com"; window.document.domain = "example.com"')
|
||||
'document_domain = "anotherdomain.com"; window.document.WB_wombat_domain = "example.com"'
|
||||
|
||||
# protocol-rel escapes
|
||||
>>> _test_js('"//example.com/"')
|
||||
'"/web/20131010/http://example.com/"'
|
||||
|
||||
>>> _test_js(r'"\/\/example.com/"')
|
||||
'"/web/20131010/http:\\/\\/example.com/"'
|
||||
|
||||
>>> _test_js(r'"\\/\\/example.com/"')
|
||||
'"/web/20131010/http:\\\\/\\\\/example.com/"'
|
||||
|
||||
# custom rules added
|
||||
>>> _test_js('window.location = "http://example.com/abc.html"; some_func(); ', [('some_func\(\).*', RegexRewriter.format('/*{0}*/'), 0)])
|
||||
'window.WB_wombat_location = "/web/20131010/http://example.com/abc.html"; /*some_func(); */'
|
||||
|
@ -50,6 +50,21 @@
|
||||
>>> do_rewrite(r'http:\/\/some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/')
|
||||
'localhost:8080/20101226101112/http:\\\\/\\\\/some-other-site.com'
|
||||
|
||||
>>> do_rewrite(r'//some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/')
|
||||
'localhost:8080/20101226101112/http://some-other-site.com'
|
||||
|
||||
>>> do_rewrite(r'\/\/some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/')
|
||||
'localhost:8080/20101226101112/http:\\\\/\\\\/some-other-site.com'
|
||||
|
||||
>>> do_rewrite(r'\\/\\/some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/')
|
||||
'localhost:8080/20101226101112/http:\\\\/\\\\/some-other-site.com'
|
||||
|
||||
>>> do_rewrite(r'http:\/\/some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/')
|
||||
'localhost:8080/20101226101112/http:\\\\/\\\\/some-other-site.com'
|
||||
|
||||
>>> do_rewrite(r'http:\/\/some-other-site.com', '20101226101112/http://example.com/index.html', 'localhost:8080/')
|
||||
'localhost:8080/20101226101112/http:\\\\/\\\\/some-other-site.com'
|
||||
|
||||
>>> do_rewrite('../../other.html', '2020/http://example.com/index.html', '/')
|
||||
'/2020/http://example.com/other.html'
|
||||
|
||||
|
@ -17,7 +17,9 @@ class UrlRewriter(object):
|
||||
|
||||
PROTOCOLS = ['http:', 'https:', 'ftp:', 'mms:', 'rtsp:', 'wais:']
|
||||
|
||||
def __init__(self, wburl, prefix, full_prefix=None, rel_prefix=None,
|
||||
REL_SCHEME = ('//', r'\/\/', r'\\/\\/')
|
||||
|
||||
def __init__(self, wburl, prefix, full_prefix=None, rel_prefix=None,
|
||||
root_path=None, cookie_scope=None):
|
||||
self.wburl = wburl if isinstance(wburl, WbUrl) else WbUrl(wburl)
|
||||
self.prefix = prefix
|
||||
@ -45,7 +47,7 @@ class UrlRewriter(object):
|
||||
|
||||
is_abs = any(url.startswith(x) for x in self.PROTOCOLS)
|
||||
|
||||
if url.startswith('//'):
|
||||
if url.startswith(self.REL_SCHEME):
|
||||
is_abs = True
|
||||
url = 'http:' + url
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user