From 777cc30e82719badfa29373c0497718446f4fad8 Mon Sep 17 00:00:00 2001 From: John Berlin Date: Tue, 5 Feb 2019 18:11:21 -0500 Subject: [PATCH] Updated RewriteInfo._resolve_text_type to recognize the `fr_` rewrite modifier (indicates that the content is from a frameset's frame) (#438) Added a test, test_rewrite_frameset_frame_content, to test_content_rewriter.py for these changes --- pywb/rewrite/content_rewriter.py | 2 +- pywb/rewrite/test/test_content_rewriter.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/pywb/rewrite/content_rewriter.py b/pywb/rewrite/content_rewriter.py index ebf5c72b..6887959e 100644 --- a/pywb/rewrite/content_rewriter.py +++ b/pywb/rewrite/content_rewriter.py @@ -434,7 +434,7 @@ class RewriteInfo(object): # if html or no-content type, allow resolving on js, css, # or other templates if text_type in ('guess-text', 'guess-html'): - if not is_js_or_css and mod not in ('if_', 'mp_', 'bn_', ''): + if not is_js_or_css and mod not in ('fr_', 'if_', 'mp_', 'bn_', ''): return None # if application/octet-stream binary, only resolve if in js/css content diff --git a/pywb/rewrite/test/test_content_rewriter.py b/pywb/rewrite/test/test_content_rewriter.py index 59068bc7..2117948b 100644 --- a/pywb/rewrite/test/test_content_rewriter.py +++ b/pywb/rewrite/test/test_content_rewriter.py @@ -524,6 +524,24 @@ class TestContentRewriter(object): assert b''.join(gen).decode('utf-8') == '{"ssid":"5678"}' + def test_rewrite_frameset_frame_content(self): + """Determines if the content rewriter correctly determines that HTML loaded via a frameset's frame, + frame's src url is rewritten with the **fr_** rewrite modifier, is content to be rewritten + """ + headers = {'Content-Type': 'text/html; charset=UTF-8'} + prefix = 'http://localhost:8080/live/' + dt = '20190205180554%s' + content = '' + rw_headers, gen, is_rw = self.rewrite_record(headers, content, ts=dt % 'fr_', + prefix=prefix, + url='http://r-u-ins.tumblr.com/', + is_live='1') + # is_rw should be true indicating the content was rewritten + assert is_rw + assert b''.join(gen).decode('utf-8') == content.replace('href="', 'href="%s%s' % (prefix, dt % 'oe_/')) + assert rw_headers.headers == [('Content-Type', 'text/html; charset=UTF-8')] + def test_custom_live_only(self): headers = {'Content-Type': 'application/json'} content = '{"foo":"bar", "dash": {"on": "true"}, "some": ["list"]'