diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py index f0c904c2..618c5191 100644 --- a/pywb/rewrite/html_rewriter.py +++ b/pywb/rewrite/html_rewriter.py @@ -174,6 +174,12 @@ class HTMLRewriterMixin(object): elif attr_name == 'crossorigin': attr_name = '_crossorigin' + # special case: link don't rewrite canonical + elif tag == 'link' and attr_name == 'href': + if not self.has_attr(tag_attrs, ('rel', 'canonical')): + rw_mod = handler.get(attr_name) + attr_value = self._rewrite_url(attr_value, rw_mod) + # special case: meta tag elif (tag == 'meta') and (attr_name == 'content'): if self.has_attr(tag_attrs, ('http-equiv', 'refresh')): diff --git a/pywb/rewrite/test/test_html_rewriter.py b/pywb/rewrite/test/test_html_rewriter.py index 45df4dfb..710fa338 100644 --- a/pywb/rewrite/test/test_html_rewriter.py +++ b/pywb/rewrite/test/test_html_rewriter.py @@ -102,6 +102,10 @@ ur""" >>> parse('
SomeTest
', head_insert = '')
SomeTest
+# don't rewrite rel=canonical +>>> parse('') + + # doctype >>> parse('')