diff --git a/pywb/rewrite/cookie_rewriter.py b/pywb/rewrite/cookie_rewriter.py index 4724df4c..e9dd80ac 100644 --- a/pywb/rewrite/cookie_rewriter.py +++ b/pywb/rewrite/cookie_rewriter.py @@ -55,6 +55,24 @@ class MinimalScopeCookieRewriter(WbUrlBaseCookieRewriter): return morsel +#================================================================= +class ExactPathCookieRewriter(WbUrlBaseCookieRewriter): + """ + Rewrite cookies only using exact path, useful for live rewrite + without a timestamp and to minimize cookie pollution + + If path or domain present, simply remove + """ + + def rewrite_cookie(self, name, morsel): + if morsel.get('domain'): + del morsel['domain'] + # else set cookie to rewritten path + if morsel.get('path'): + del morsel['path'] + + self._remove_age_opts(morsel) + return morsel #================================================================= class RootScopeCookieRewriter(WbUrlBaseCookieRewriter): """ @@ -79,5 +97,7 @@ class RootScopeCookieRewriter(WbUrlBaseCookieRewriter): def get_cookie_rewriter(cookie_scope): if cookie_scope == 'root': return RootScopeCookieRewriter + elif cookie_scope == 'exact': + return ExactPathCookieRewriter else: return MinimalScopeCookieRewriter diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py index b3668521..7acc0f3e 100644 --- a/pywb/rewrite/html_rewriter.py +++ b/pywb/rewrite/html_rewriter.py @@ -185,6 +185,12 @@ class HTMLRewriterMixin(object): elif attr_name == 'crossorigin': attr_name = '_crossorigin' + # special case: link don't rewrite canonical + elif tag == 'link' and attr_name == 'href': + if not self.has_attr(tag_attrs, ('rel', 'canonical')): + rw_mod = handler.get(attr_name) + attr_value = self._rewrite_url(attr_value, rw_mod) + # special case: meta tag elif (tag == 'meta') and (attr_name == 'content'): if self.has_attr(tag_attrs, ('http-equiv', 'refresh')): diff --git a/pywb/rewrite/test/test_cookie_rewriter.py b/pywb/rewrite/test/test_cookie_rewriter.py index c20f56f9..4f57464f 100644 --- a/pywb/rewrite/test/test_cookie_rewriter.py +++ b/pywb/rewrite/test/test_cookie_rewriter.py @@ -1,4 +1,5 @@ r""" +# Default -- MinimalScopeRewriter # No rewriting >>> rewrite_cookie('a=b; c=d;') [('Set-Cookie', 'a=b'), ('Set-Cookie', 'c=d')] @@ -23,10 +24,17 @@ r""" >>> rewrite_cookie('abc@def=123') [] +# ExactCookieRewriter +>>> rewrite_cookie('some=value; Path=/diff/path/;', urlrewriter, ExactPathCookieRewriter) +[('Set-Cookie', 'some=value')] + +>>> rewrite_cookie('some=value; Domain=.example.com; Path=/diff/path/; Max-Age=1500', urlrewriter, ExactPathCookieRewriter) +[('Set-Cookie', 'some=value')] + """ -from pywb.rewrite.cookie_rewriter import MinimalScopeCookieRewriter +from pywb.rewrite.cookie_rewriter import MinimalScopeCookieRewriter, ExactPathCookieRewriter from pywb.rewrite.url_rewriter import UrlRewriter urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/pywb/') @@ -34,6 +42,6 @@ urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.htm urlrewriter2 = UrlRewriter('em_/http://example.com/', '/preview/') -def rewrite_cookie(cookie_str, rewriter=urlrewriter): - return MinimalScopeCookieRewriter(rewriter).rewrite(cookie_str) +def rewrite_cookie(cookie_str, rewriter=urlrewriter, cookie_rewriter=MinimalScopeCookieRewriter): + return cookie_rewriter(rewriter).rewrite(cookie_str) diff --git a/pywb/rewrite/test/test_html_rewriter.py b/pywb/rewrite/test/test_html_rewriter.py index a7d9337b..46c1773c 100644 --- a/pywb/rewrite/test/test_html_rewriter.py +++ b/pywb/rewrite/test/test_html_rewriter.py @@ -106,6 +106,10 @@ ur""" >>> parse('