1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

JSONP Rewriter: Fix regex to match both /* and // comments (#460)

* jsonp rewriter: improve regex to match starting /* and // multiline comments, update test

* fix regex, add and cleanup jsonp rewriter tests

* Fixes #459
This commit is contained in:
Ilya Kreymer 2019-04-10 10:38:58 -07:00 committed by GitHub
parent 9448f4fe45
commit 32962be7c4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 64 additions and 7 deletions

View File

@ -4,12 +4,14 @@ from pywb.rewrite.content_rewriter import StreamingRewriter
# ============================================================================ # ============================================================================
class JSONPRewriter(StreamingRewriter): class JSONPRewriter(StreamingRewriter):
JSONP = re.compile(r'^(?:\s*\/\*(?:.*)\*\/)*\s*(\w+)\(\{') #JSONP = re.compile(r'^(?:\s*\/\*(?:.*)\*\/)*\s*(\w+)\(\{')
# Match a single /* and // style comments at the beginning
JSONP = re.compile(r'(?:^[ \t]*(?:(?:\/\*[^\*]*\*\/)|(?:\/\/[^\n]+[\n])))*[ \t]*(\w+)\(\{', re.M)
CALLBACK = re.compile(r'[?].*callback=([^&]+)') CALLBACK = re.compile(r'[?].*callback=([^&]+)')
def rewrite(self, string): def rewrite(self, string):
# see if json is jsonp, starts with callback func # see if json is jsonp, starts with callback func
m_json = self.JSONP.search(string) m_json = self.JSONP.match(string)
if not m_json: if not m_json:
return string return string

View File

@ -459,7 +459,23 @@ class TestContentRewriter(object):
def test_rewrite_js_as_json_generic_jsonp(self): def test_rewrite_js_as_json_generic_jsonp(self):
headers = {'Content-Type': 'application/json'} headers = {'Content-Type': 'application/json'}
content = '/**/ jsonpCallbackABCDEF({"foo": "bar"});' content = '/*abc*/ jsonpCallbackABCDEF({"foo": "bar"});'
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_',
url='http://example.com/path/file?callback=jsonpCallback12345')
# content-type unchanged
assert ('Content-Type', 'application/json') in headers.headers
exp = 'jsonpCallback12345({"foo": "bar"});'
assert b''.join(gen).decode('utf-8') == exp
def test_rewrite_js_as_json_generic_jsonp_multiline_comment(self):
headers = {'Content-Type': 'application/json'}
content = """\
// A comment
// Another?
jsonpCallbackABCDEF({"foo": "bar"});"""
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_', headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_',
url='http://example.com/path/file?callback=jsonpCallback12345') url='http://example.com/path/file?callback=jsonpCallback12345')

View File

@ -8,13 +8,18 @@ class TestJSONPRewriter(object):
cls.rewriter = JSONPRewriter(urlrewriter) cls.rewriter = JSONPRewriter(urlrewriter)
urlrewriter = UrlRewriter('20161226/http://example.com/', '/web/', 'https://localhost/web/') urlrewriter = UrlRewriter('20161226/http://example.com/', '/web/', 'https://localhost/web/')
cls.rewriter_no_cb = JSONPRewriter(urlrewriter) cls.rewriter_missing_cb = JSONPRewriter(urlrewriter)
def test_jsonp_rewrite_1(self): def test_jsonp_rewrite_1(self):
string = 'jQuery_1234({"foo": "bar", "some": "data"})' string = 'jQuery_1234({"foo": "bar", "some": "data"})'
expect = 'jQuery_ABC({"foo": "bar", "some": "data"})' expect = 'jQuery_ABC({"foo": "bar", "some": "data"})'
assert self.rewriter.rewrite(string) == expect assert self.rewriter.rewrite(string) == expect
def test_jsonp_rewrite_1_with_whitespace(self):
string = ' jQuery_1234({"foo": "bar", "some": "data"})'
expect = 'jQuery_ABC({"foo": "bar", "some": "data"})'
assert self.rewriter.rewrite(string) == expect
def test_jsonp_rewrite_2(self): def test_jsonp_rewrite_2(self):
string = ' /**/ jQuery_1234({"foo": "bar", "some": "data"})' string = ' /**/ jQuery_1234({"foo": "bar", "some": "data"})'
expect = 'jQuery_ABC({"foo": "bar", "some": "data"})' expect = 'jQuery_ABC({"foo": "bar", "some": "data"})'
@ -25,6 +30,34 @@ class TestJSONPRewriter(object):
expect = 'jQuery_ABC({"foo": "bar", "some": "data"})' expect = 'jQuery_ABC({"foo": "bar", "some": "data"})'
assert self.rewriter.rewrite(string) == expect assert self.rewriter.rewrite(string) == expect
def test_jsonp_rewrite_4(self):
string = """// some comment
jQuery_1234({"foo": "bar", "some": "data"})"""
expect = 'jQuery_ABC({"foo": "bar", "some": "data"})'
assert self.rewriter.rewrite(string) == expect
def test_jsonp_rewrite_5(self):
string = """// some comment
// blah = 4;
jQuery_1234({"foo": "bar", "some": "data"})"""
expect = 'jQuery_ABC({"foo": "bar", "some": "data"})'
assert self.rewriter.rewrite(string) == expect
# JSONP valid but 'callback=' missing in url tests
def test_no_jsonp_rewrite_missing_callback_1(self):
""" JSONP valid but callback is missing in url
"""
string = 'jQuery_1234({"foo": "bar", "some": "data"})'
assert self.rewriter_missing_cb.rewrite(string) == string
def test_no_jsonp_rewrite_missing_callback_2(self):
string = """// some comment
jQuery_1234({"foo": "bar", "some": "data"})"""
expect = 'jQuery_ABC({"foo": "bar", "some": "data"})'
assert self.rewriter_missing_cb.rewrite(string) == string
# Invalid JSONP Tests
def test_no_jsonp_rewrite_1(self): def test_no_jsonp_rewrite_1(self):
string = ' /* comment jQuery_1234({"foo": "bar", "some": "data"})' string = ' /* comment jQuery_1234({"foo": "bar", "some": "data"})'
assert self.rewriter.rewrite(string) == string assert self.rewriter.rewrite(string) == string
@ -37,8 +70,14 @@ class TestJSONPRewriter(object):
string = 'var foo = ({"foo": "bar", "some": "data"})' string = 'var foo = ({"foo": "bar", "some": "data"})'
assert self.rewriter.rewrite(string) == string assert self.rewriter.rewrite(string) == string
def test_no_jsonp_rewrite_no_callback_1(self): def test_jsonp_rewrite_3(self):
string = 'jQuery_1234({"foo": "bar", "some": "data"})' string = ' abc /* some comment */ jQuery_1234({"foo": "bar", "some": "data"})'
assert self.rewriter_no_cb.rewrite(string) == string assert self.rewriter.rewrite(string) == string
def test_no_jsonp_multiline_rewrite_2(self):
string = """// some comment
blah = 4;
jQuery_1234({"foo": "bar", "some": "data"})"""
assert self.rewriter.rewrite(string) == string