From 32962be7c494c31813908193cb960c050888c4c8 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 10 Apr 2019 10:38:58 -0700 Subject: [PATCH] JSONP Rewriter: Fix regex to match both /* and // comments (#460) * jsonp rewriter: improve regex to match starting /* and // multiline comments, update test * fix regex, add and cleanup jsonp rewriter tests * Fixes #459 --- pywb/rewrite/jsonp_rewriter.py | 6 ++- pywb/rewrite/test/test_content_rewriter.py | 18 ++++++++- pywb/rewrite/test/test_jsonp_rewriter.py | 47 ++++++++++++++++++++-- 3 files changed, 64 insertions(+), 7 deletions(-) diff --git a/pywb/rewrite/jsonp_rewriter.py b/pywb/rewrite/jsonp_rewriter.py index 5a262714..9767fdef 100644 --- a/pywb/rewrite/jsonp_rewriter.py +++ b/pywb/rewrite/jsonp_rewriter.py @@ -4,12 +4,14 @@ from pywb.rewrite.content_rewriter import StreamingRewriter # ============================================================================ class JSONPRewriter(StreamingRewriter): - JSONP = re.compile(r'^(?:\s*\/\*(?:.*)\*\/)*\s*(\w+)\(\{') + #JSONP = re.compile(r'^(?:\s*\/\*(?:.*)\*\/)*\s*(\w+)\(\{') + # Match a single /* and // style comments at the beginning + JSONP = re.compile(r'(?:^[ \t]*(?:(?:\/\*[^\*]*\*\/)|(?:\/\/[^\n]+[\n])))*[ \t]*(\w+)\(\{', re.M) CALLBACK = re.compile(r'[?].*callback=([^&]+)') def rewrite(self, string): # see if json is jsonp, starts with callback func - m_json = self.JSONP.search(string) + m_json = self.JSONP.match(string) if not m_json: return string diff --git a/pywb/rewrite/test/test_content_rewriter.py b/pywb/rewrite/test/test_content_rewriter.py index 2117948b..a20b493c 100644 --- a/pywb/rewrite/test/test_content_rewriter.py +++ b/pywb/rewrite/test/test_content_rewriter.py @@ -459,7 +459,23 @@ class TestContentRewriter(object): def test_rewrite_js_as_json_generic_jsonp(self): headers = {'Content-Type': 'application/json'} - content = '/**/ jsonpCallbackABCDEF({"foo": "bar"});' + content = '/*abc*/ jsonpCallbackABCDEF({"foo": "bar"});' + + headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_', + url='http://example.com/path/file?callback=jsonpCallback12345') + + # content-type unchanged + assert ('Content-Type', 'application/json') in headers.headers + + exp = 'jsonpCallback12345({"foo": "bar"});' + assert b''.join(gen).decode('utf-8') == exp + + def test_rewrite_js_as_json_generic_jsonp_multiline_comment(self): + headers = {'Content-Type': 'application/json'} + content = """\ +// A comment +// Another? +jsonpCallbackABCDEF({"foo": "bar"});""" headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_', url='http://example.com/path/file?callback=jsonpCallback12345') diff --git a/pywb/rewrite/test/test_jsonp_rewriter.py b/pywb/rewrite/test/test_jsonp_rewriter.py index 64df648b..0e0a3499 100644 --- a/pywb/rewrite/test/test_jsonp_rewriter.py +++ b/pywb/rewrite/test/test_jsonp_rewriter.py @@ -8,13 +8,18 @@ class TestJSONPRewriter(object): cls.rewriter = JSONPRewriter(urlrewriter) urlrewriter = UrlRewriter('20161226/http://example.com/', '/web/', 'https://localhost/web/') - cls.rewriter_no_cb = JSONPRewriter(urlrewriter) + cls.rewriter_missing_cb = JSONPRewriter(urlrewriter) def test_jsonp_rewrite_1(self): string = 'jQuery_1234({"foo": "bar", "some": "data"})' expect = 'jQuery_ABC({"foo": "bar", "some": "data"})' assert self.rewriter.rewrite(string) == expect + def test_jsonp_rewrite_1_with_whitespace(self): + string = ' jQuery_1234({"foo": "bar", "some": "data"})' + expect = 'jQuery_ABC({"foo": "bar", "some": "data"})' + assert self.rewriter.rewrite(string) == expect + def test_jsonp_rewrite_2(self): string = ' /**/ jQuery_1234({"foo": "bar", "some": "data"})' expect = 'jQuery_ABC({"foo": "bar", "some": "data"})' @@ -25,6 +30,34 @@ class TestJSONPRewriter(object): expect = 'jQuery_ABC({"foo": "bar", "some": "data"})' assert self.rewriter.rewrite(string) == expect + def test_jsonp_rewrite_4(self): + string = """// some comment + jQuery_1234({"foo": "bar", "some": "data"})""" + expect = 'jQuery_ABC({"foo": "bar", "some": "data"})' + assert self.rewriter.rewrite(string) == expect + + def test_jsonp_rewrite_5(self): + string = """// some comment + // blah = 4; + jQuery_1234({"foo": "bar", "some": "data"})""" + expect = 'jQuery_ABC({"foo": "bar", "some": "data"})' + assert self.rewriter.rewrite(string) == expect + +# JSONP valid but 'callback=' missing in url tests + def test_no_jsonp_rewrite_missing_callback_1(self): + """ JSONP valid but callback is missing in url + """ + string = 'jQuery_1234({"foo": "bar", "some": "data"})' + assert self.rewriter_missing_cb.rewrite(string) == string + + def test_no_jsonp_rewrite_missing_callback_2(self): + string = """// some comment + jQuery_1234({"foo": "bar", "some": "data"})""" + expect = 'jQuery_ABC({"foo": "bar", "some": "data"})' + assert self.rewriter_missing_cb.rewrite(string) == string + + +# Invalid JSONP Tests def test_no_jsonp_rewrite_1(self): string = ' /* comment jQuery_1234({"foo": "bar", "some": "data"})' assert self.rewriter.rewrite(string) == string @@ -37,8 +70,14 @@ class TestJSONPRewriter(object): string = 'var foo = ({"foo": "bar", "some": "data"})' assert self.rewriter.rewrite(string) == string - def test_no_jsonp_rewrite_no_callback_1(self): - string = 'jQuery_1234({"foo": "bar", "some": "data"})' - assert self.rewriter_no_cb.rewrite(string) == string + def test_jsonp_rewrite_3(self): + string = ' abc /* some comment */ jQuery_1234({"foo": "bar", "some": "data"})' + assert self.rewriter.rewrite(string) == string + + def test_no_jsonp_multiline_rewrite_2(self): + string = """// some comment + blah = 4; + jQuery_1234({"foo": "bar", "some": "data"})""" + assert self.rewriter.rewrite(string) == string