1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

jsonp rewriter: expand jsonp matching: (#336)

- treat as jsonp if url query contains 'callback=jsonp',
- fuzzy match query containing 'callback=jsonp'
- tests: add test for additional jsonp matching
This commit is contained in:
Ilya Kreymer 2018-05-29 08:57:50 -07:00 committed by GitHub
parent efb7b2db90
commit a138fca5e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 25 additions and 1 deletions

View File

@ -313,6 +313,11 @@ class StreamingRewriter(object):
class RewriteInfo(object):
TAG_REGEX = re.compile(b'^\s*\<')
JSONP_CONTAINS = ['callback=jQuery',
'callback=jsonp',
'.json?'
]
def __init__(self, record, content_rewriter, url_rewriter, cookie_rewriter=None):
self.record = record
@ -347,12 +352,14 @@ class RewriteInfo(object):
orig_text_type = self.rewrite_types.get(mime)
text_type = self._resolve_text_type(orig_text_type)
url = self.url_rewriter.wburl.url
if text_type in ('guess-text', 'guess-bin'):
text_type = None
if text_type == 'js':
if 'callback=jQuery' in self.url_rewriter.wburl.url or '.json?' in self.url_rewriter.wburl.url:
# determine if url contains strings that indicate jsonp
if any(jsonp_string in url for jsonp_string in self.JSONP_CONTAINS):
text_type = 'json'
if (text_type and orig_text_type != text_type) or text_type == 'html':

View File

@ -250,6 +250,19 @@ class TestContentRewriter(object):
exp = 'jQuery_DEF({"foo": "bar"});'
assert b''.join(gen).decode('utf-8') == exp
def test_rewrite_js_as_json_generic_jsonp(self):
headers = {'Content-Type': 'application/json'}
content = '/**/ jsonpCallbackABCDEF({"foo": "bar"});'
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701js_',
url='http://example.com/path/file?callback=jsonpCallback12345')
# content-type unchanged
assert ('Content-Type', 'application/json') in headers.headers
exp = 'jsonpCallback12345({"foo": "bar"});'
assert b''.join(gen).decode('utf-8') == exp
def test_rewrite_js_not_json(self):
# callback not set
headers = {}

View File

@ -34,6 +34,10 @@ default_filters:
- match: '[?&]utm_[^=]+=[^&]+(?=&|$)'
replace: ''
# additional callback=jsonpCallbackXYZ
- match: '[?&](callback=jsonp)[^&]+(?=&|$)'
replace: '\1'
# remove jquery callback dynamic timestamp
- match: '[?&]((?:\w+)=jquery)[\d]+_[\d]+'
replace: '\1'