1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

rewrite: don't rewrite rel=canonical links, need to make rewriting more

configurable (#50)
This commit is contained in:
Ilya Kreymer 2014-11-11 15:34:14 -08:00
parent 49e98e0cdc
commit 388f31e08f
2 changed files with 10 additions and 0 deletions

View File

@ -174,6 +174,12 @@ class HTMLRewriterMixin(object):
elif attr_name == 'crossorigin':
attr_name = '_crossorigin'
# special case: link don't rewrite canonical
elif tag == 'link' and attr_name == 'href':
if not self.has_attr(tag_attrs, ('rel', 'canonical')):
rw_mod = handler.get(attr_name)
attr_value = self._rewrite_url(attr_value, rw_mod)
# special case: meta tag
elif (tag == 'meta') and (attr_name == 'content'):
if self.has_attr(tag_attrs, ('http-equiv', 'refresh')):

View File

@ -102,6 +102,10 @@ ur"""
>>> parse('<link href="abc.txt"><div>SomeTest</div>', head_insert = '<script>load_stuff();</script>')
<link href="/web/20131226101010oe_/http://example.com/some/path/abc.txt"><script>load_stuff();</script><div>SomeTest</div>
# don't rewrite rel=canonical
>>> parse('<link rel=canonical href="http://example.com/">')
<link rel="canonical" href="http://example.com/">
# doctype
>>> parse('<!doctype html PUBLIC "public">')
<!doctype html PUBLIC "public">