From 32e9020fd25469c28b2128ec7425b617a1f286fe Mon Sep 17 00:00:00 2001 From: Yasar Date: Mon, 1 Aug 2022 02:31:04 +0200 Subject: [PATCH] html_rewriter: fixed attribute 'srcset' rewriting (#712) Co-authored-by: Yasar Kunduz --- pywb/rewrite/html_rewriter.py | 2 +- pywb/rewrite/test/test_html_rewriter.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py index d31bcd87..c6bd178e 100644 --- a/pywb/rewrite/html_rewriter.py +++ b/pywb/rewrite/html_rewriter.py @@ -177,7 +177,7 @@ class HTMLRewriterMixin(StreamingRewriter): return '' values = (url.strip() for url in re.split(self.SRCSET_REGEX, value) if url) - values = [self._rewrite_url(v.strip()) for v in values] + values = [self._rewrite_url(v.split(' ')[0].strip()) + (' ' + ' '.join(v.split(' ')[1:])).rstrip() for v in values if v] return ', '.join(values) def _rewrite_meta_refresh(self, meta_refresh): diff --git a/pywb/rewrite/test/test_html_rewriter.py b/pywb/rewrite/test/test_html_rewriter.py index 8809441c..7471db9a 100644 --- a/pywb/rewrite/test/test_html_rewriter.py +++ b/pywb/rewrite/test/test_html_rewriter.py @@ -185,6 +185,10 @@ r""" >>> parse('') +# complex srcset attrib +>>> parse('') + + # empty srcset attrib >>> parse('')