diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py index f7575fa5..4866fc5a 100644 --- a/pywb/rewrite/html_rewriter.py +++ b/pywb/rewrite/html_rewriter.py @@ -33,7 +33,8 @@ class HTMLRewriterMixin(object): 'embed': {'src': 'oe_'}, 'head': {'': defmod}, # for head rewriting 'iframe': {'src': 'if_'}, - 'img': {'src': 'im_'}, + 'img': {'src': 'im_', + 'srcset': 'im_'}, 'ins': {'cite': defmod}, 'input': {'src': 'im_'}, 'form': {'action': defmod}, @@ -117,6 +118,11 @@ class HTMLRewriterMixin(object): else: return None + def _rewrite_srcset(self, value, mod=''): + values = value.split(',') + values = map(lambda x: self._rewrite_url(x.strip()), values) + return ', '.join(values) + def _rewrite_css(self, css_content): if css_content: return self.css_rewriter.rewrite(css_content) @@ -169,6 +175,11 @@ class HTMLRewriterMixin(object): elif attr_name == 'style': attr_value = self._rewrite_css(attr_value) + # special case: srcset list + elif attr_name == 'srcset': + rw_mod = handler.get(attr_name, '') + attr_value = self._rewrite_srcset(attr_value, rw_mod) + # special case: disable crossorigin attr # as they may interfere with rewriting semantics elif attr_name == 'crossorigin': diff --git a/pywb/rewrite/test/test_html_rewriter.py b/pywb/rewrite/test/test_html_rewriter.py index 593a4096..96e358ea 100644 --- a/pywb/rewrite/test/test_html_rewriter.py +++ b/pywb/rewrite/test/test_html_rewriter.py @@ -60,6 +60,10 @@ ur""" >>> parse('
')
+# srcset attrib +>>> parse('') + + # Script tag >>> parse('')