From b54347f8d1ec97ef9a53d96255676a82ec685ec7 Mon Sep 17 00:00:00 2001 From: chdorner Date: Wed, 1 Jun 2016 11:21:55 +0200 Subject: [PATCH] Allow rewriting of empty srcset attributes Strictly speaking a `srcset` attribute must consist of one or more strings (http://w3c.github.io/html/semantics-embedded-content.html#element-attrdef-img-srcset) However are websites out there that specify an empty string as the value. This commit makes sure that the rewriting does not break and just returns an empty string. --- pywb/rewrite/html_rewriter.py | 1 + pywb/rewrite/test/test_html_rewriter.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py index e57f8591..171b4e69 100644 --- a/pywb/rewrite/html_rewriter.py +++ b/pywb/rewrite/html_rewriter.py @@ -211,6 +211,7 @@ class HTMLRewriterMixin(object): def _rewrite_srcset(self, value, mod=''): values = value.split(',') values = map(lambda x: self._rewrite_url(x.strip()), values) + values = [v for v in values if v is not None] return ', '.join(values) def _rewrite_css(self, css_content): diff --git a/pywb/rewrite/test/test_html_rewriter.py b/pywb/rewrite/test/test_html_rewriter.py index 818bd114..13063936 100644 --- a/pywb/rewrite/test/test_html_rewriter.py +++ b/pywb/rewrite/test/test_html_rewriter.py @@ -115,6 +115,10 @@ r""" >>> parse('') +# empty srcset attrib +>>> parse('') + + # Script tag >>> parse('')