diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py
index d92e2ef9..ef98df0e 100644
--- a/pywb/rewrite/html_rewriter.py
+++ b/pywb/rewrite/html_rewriter.py
@@ -234,13 +234,15 @@ class HTMLRewriterMixin(StreamingRewriter):
return new_value
+ SRCSET_REGEX = re.compile('\s*(\S*\s+[\d\.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))')
+
def _rewrite_srcset(self, value, mod=''):
if not value:
return ''
- values = value.split(',')
+ values = (url.strip() for url in re.split(self.SRCSET_REGEX, value) if url)
values = [self._rewrite_url(v.strip()) for v in values]
- return ','.join(values)
+ return ', '.join(values)
def _rewrite_css(self, css_content):
if css_content:
diff --git a/pywb/rewrite/test/test_html_rewriter.py b/pywb/rewrite/test/test_html_rewriter.py
index 71575d18..896b6e9b 100644
--- a/pywb/rewrite/test/test_html_rewriter.py
+++ b/pywb/rewrite/test/test_html_rewriter.py
@@ -140,9 +140,41 @@ r"""
>>> parse('')
-# srcset attrib
->>> parse('
')
-
+# srcset attrib: simple
+>>> parse('
')
+
+
+# srcset attrib: single comma-containing
+>>> parse('
')
+
+
+# srcset attrib: single comma-containing plus descriptor
+>>> parse('
')
+
+
+# srcset attrib: comma-containing absolute url and relative url, separated by comma and space
+>>> parse('
')
+
+
+# srcset attrib: comma-containing relative url and absolute url, separated by comma and space
+>>> parse('
')
+
+
+# srcset attrib: absolute urls with descriptors, separated by comma (no space)
+>>> parse('
')
+
+
+# srcset attrib: absolute url with descriptor, separated by comma (no space) from absolute url without descriptor
+>>> parse('
')
+
+
+# srcset attrib: absolute url without descriptor, separated by comma (no space) from absolute url with descriptor
+>>> parse('
')
+
+
+# complex srcset attrib
+>>> parse('
')
+
# empty srcset attrib
>>> parse('
')
diff --git a/pywb/static/wombat.js b/pywb/static/wombat.js
index 6e4c2f7b..9d7a14f6 100644
--- a/pywb/static/wombat.js
+++ b/pywb/static/wombat.js
@@ -1373,13 +1373,14 @@ var _WBWombat = function($wbwindow, wbinfo) {
return "";
}
- values = value.split(',');
+ // Filter removes non-truthy values like null, undefined, and ""
+ values = value.split(/\s*(\S*\s+[\d\.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))/).filter(Boolean);
for (var i = 0; i < values.length; i++) {
values[i] = rewrite_url(values[i].trim());
}
- return values.join(",");
+ return values.join(", ");
}
//============================================