diff --git a/pywb/indexreader.py b/pywb/indexreader.py index 95453c9f..66bde98a 100644 --- a/pywb/indexreader.py +++ b/pywb/indexreader.py @@ -19,7 +19,8 @@ class RemoteCDXServer: 'statuscode': '200', 'timestamp': '20020120142510', 'urlkey': 'com,example)/'} - """ + + """ def __init__(self, serverUrl): self.serverUrl = serverUrl diff --git a/pywb/regexmatch.py b/pywb/regexmatch.py index 53102f21..9f3d4242 100644 --- a/pywb/regexmatch.py +++ b/pywb/regexmatch.py @@ -148,6 +148,9 @@ class CSSRewriter(RegexRewriter): """ + CSS_URL_REGEX = "url\\s*\\(\\s*[\\\\\"']*([^'\"]+)[\\\\\"']*\\s*\\)" + CSS_IMPORT_NO_URL_REGEX = "@import\\s+(?!url)\\(?\\s*['\"]?(?!url[\\s\\(])([\w.:/\\\\-]+)" + def __init__(self, rewriter): rules = self._createRules(rewriter) @@ -156,8 +159,8 @@ class CSSRewriter(RegexRewriter): def _createRules(self, rewriter): return [ - ("url\\s*\\(\\s*[\\\\\"']*([^'\"]+)[\\\\\"']*\\s*\\)", RegexRewriter.archivalRewrite(rewriter), 1), - ("@import\\s+(?!url)\\(?\\s*['\"]?(?!url[\\s\\(])([\w.:/\\\\-]+)", RegexRewriter.archivalRewrite(rewriter), 1), + (CSSRewriter.CSS_URL_REGEX, RegexRewriter.archivalRewrite(rewriter), 1), + (CSSRewriter.CSS_IMPORT_NO_URL_REGEX, RegexRewriter.archivalRewrite(rewriter), 1), ] diff --git a/pywb/wbhtml.py b/pywb/wbhtml.py index 325b4892..946715da 100644 --- a/pywb/wbhtml.py +++ b/pywb/wbhtml.py @@ -10,30 +10,38 @@ from regexmatch import JSRewriter, CSSRewriter #================================================================= class WBHtml(HTMLParser): r""" - >>> WBHtml(rewriter).feed('Text') + >>> parse('Text') Text - >>> WBHtml(rewriter).feed('