1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

Allow rewriting of empty srcset attributes

Strictly speaking a `srcset` attribute must consist of one or more
strings
(http://w3c.github.io/html/semantics-embedded-content.html#element-attrdef-img-srcset)
However are websites out there that specify an empty string as the
value.

This commit makes sure that the rewriting does not break and just
returns an empty string.
This commit is contained in:
chdorner 2016-06-01 11:21:55 +02:00
parent 8e473f01fa
commit b54347f8d1
2 changed files with 5 additions and 0 deletions

View File

@ -211,6 +211,7 @@ class HTMLRewriterMixin(object):
def _rewrite_srcset(self, value, mod=''):
values = value.split(',')
values = map(lambda x: self._rewrite_url(x.strip()), values)
values = [v for v in values if v is not None]
return ', '.join(values)
def _rewrite_css(self, css_content):

View File

@ -115,6 +115,10 @@ r"""
>>> parse('<img srcset="//example.com/1x 1x, //example.com/foo 2x, https://example.com/bar 4x">')
<img srcset="/web/20131226101010///example.com/1x 1x, /web/20131226101010///example.com/foo 2x, /web/20131226101010/https://example.com/bar 4x">
# empty srcset attrib
>>> parse('<img srcset="">')
<img srcset="">
# Script tag
>>> parse('<script>window.location = "http://example.com/a/b/c.html"</script>')
<script>window.WB_wombat_location = "/web/20131226101010/http://example.com/a/b/c.html"</script>