1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

html_rewriter: fixed attribute 'srcset' rewriting (#712)

Co-authored-by: Yasar Kunduz <yasar.kunduz@nationaalarchief.nl>
This commit is contained in:
Yasar 2022-08-01 02:31:04 +02:00 committed by GitHub
parent 62633a48c4
commit 32e9020fd2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 5 additions and 1 deletions

View File

@ -177,7 +177,7 @@ class HTMLRewriterMixin(StreamingRewriter):
return ''
values = (url.strip() for url in re.split(self.SRCSET_REGEX, value) if url)
values = [self._rewrite_url(v.strip()) for v in values]
values = [self._rewrite_url(v.split(' ')[0].strip()) + (' ' + ' '.join(v.split(' ')[1:])).rstrip() for v in values if v]
return ', '.join(values)
def _rewrite_meta_refresh(self, meta_refresh):

View File

@ -185,6 +185,10 @@ r"""
>>> parse('<img srcset="//example.com/1x,1x 2w, //example1.com/foo 2x, http://example.com/bar,bar 4x">')
<img srcset="/web/20131226101010///example.com/1x,1x 2w, /web/20131226101010///example1.com/foo 2x, /web/20131226101010/http://example.com/bar,bar 4x">
# complex srcset attrib
>>> parse('<img srcset="http://test.com/yaşar-kunduz.jpg 320w, http://test.com/yaşar-konçalves-273x300.jpg 273w">')
<img srcset="/web/20131226101010/http://test.com/ya%C5%9Far-kunduz.jpg 320w, /web/20131226101010/http://test.com/ya%C5%9Far-konc%CC%A7alves-273x300.jpg 273w">
# empty srcset attrib
>>> parse('<img srcset="">')
<img srcset="">