1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

rewrite: add support for srcset rewriting for img tag

This commit is contained in:
Ilya Kreymer 2014-11-02 16:10:38 -08:00
parent ced94d289d
commit 7aac3aa2dd
2 changed files with 16 additions and 1 deletions

View File

@ -33,7 +33,8 @@ class HTMLRewriterMixin(object):
'embed': {'src': 'oe_'},
'head': {'': defmod}, # for head rewriting
'iframe': {'src': 'if_'},
'img': {'src': 'im_'},
'img': {'src': 'im_',
'srcset': 'im_'},
'ins': {'cite': defmod},
'input': {'src': 'im_'},
'form': {'action': defmod},
@ -117,6 +118,11 @@ class HTMLRewriterMixin(object):
else:
return None
def _rewrite_srcset(self, value, mod=''):
values = value.split(',')
values = map(lambda x: self._rewrite_url(x.strip()), values)
return ', '.join(values)
def _rewrite_css(self, css_content):
if css_content:
return self.css_rewriter.rewrite(css_content)
@ -169,6 +175,11 @@ class HTMLRewriterMixin(object):
elif attr_name == 'style':
attr_value = self._rewrite_css(attr_value)
# special case: srcset list
elif attr_name == 'srcset':
rw_mod = handler.get(attr_name, '')
attr_value = self._rewrite_srcset(attr_value, rw_mod)
# special case: disable crossorigin attr
# as they may interfere with rewriting semantics
elif attr_name == 'crossorigin':

View File

@ -60,6 +60,10 @@ ur"""
>>> parse('<div data-url="http://example.com/a/b/c.html" data-some-other-value="http://example.com/img.gif">')
<div data-url="/web/20131226101010oe_/http://example.com/a/b/c.html" data-some-other-value="/web/20131226101010oe_/http://example.com/img.gif">
# srcset attrib
>>> parse('<img srcset="//example.com/1x 1x, //example.com/foo 2x, https://example.com/bar 4x">')
<img srcset="/web/20131226101010/http://example.com/1x 1x, /web/20131226101010/http://example.com/foo 2x, /web/20131226101010/https://example.com/bar 4x">
# Script tag
>>> parse('<script>window.location = "http://example.com/a/b/c.html"</script>')
<script>window.WB_wombat_location = "/web/20131226101010/http://example.com/a/b/c.html"</script>