From 7feb0893eb3ec9a691c080d6fdf02839980c8357 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Tue, 30 Sep 2014 10:57:25 -0700 Subject: [PATCH] rewrite: add 'application/json' to a seperate 'json' regex rewriter type (rewrite links only, no http), can be customized via rules wombat: add rewrite_style for rewriting style attrs query: don't include any filter in latest, custom filter can be used without any other filters tests: fix typos in tests --- pywb/rewrite/header_rewriter.py | 2 ++ pywb/rewrite/regex_rewriters.py | 2 +- pywb/rewrite/rewriterules.py | 1 + pywb/static/wombat.js | 27 ++++++++++++++++++++------- pywb/webapp/query_handler.py | 11 +++++++++-- tests/test_config.yaml | 1 + tests/test_proxy_http_cookie.py | 1 - tests/test_proxy_https_cookie.py | 1 - 8 files changed, 34 insertions(+), 12 deletions(-) diff --git a/pywb/rewrite/header_rewriter.py b/pywb/rewrite/header_rewriter.py index 2d505e88..950817d4 100644 --- a/pywb/rewrite/header_rewriter.py +++ b/pywb/rewrite/header_rewriter.py @@ -28,6 +28,8 @@ class HeaderRewriter: 'application/javascript', 'application/x-javascript'], + 'json': ['application/json'], + 'xml': ['/xml', '+xml', '.xml', '.rss'], } diff --git a/pywb/rewrite/regex_rewriters.py b/pywb/rewrite/regex_rewriters.py index df7a128e..97151190 100644 --- a/pywb/rewrite/regex_rewriters.py +++ b/pywb/rewrite/regex_rewriters.py @@ -123,7 +123,7 @@ class JSLinkAndLocationRewriter(JSLinkOnlyRewriter): def __init__(self, rewriter, rules=[], prefix='WB_wombat_'): rules = rules + [ - (r'(? " + tmp_href); this._autooverride = false; var _set_hash = function(hash) { @@ -495,7 +494,7 @@ WB_wombat_init = (function() { } //============================================ - function rewrite_attr(elem, name) { + function rewrite_attr(elem, name, func) { if (!elem || !elem.getAttribute) { return; } @@ -510,17 +509,31 @@ WB_wombat_init = (function() { return; } - //var orig_value = value; - value = rewrite_url(value); + value = func(value); elem.setAttribute(name, value); } - + + //============================================ + function rewrite_style(value) + { + //console.log("style rewrite: " + value); + + STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/g; + + function style_replacer(match, n1, n2, n3, offset, string) { + return n1 + rewrite_url(n2) + n3; + } + + return value.replace(STYLE_REGEX, style_replacer); + } + //============================================ function rewrite_elem(elem) { - rewrite_attr(elem, "src"); - rewrite_attr(elem, "href"); + rewrite_attr(elem, "src", rewrite_url); + rewrite_attr(elem, "href", rewrite_url); + rewrite_attr(elem, "style", rewrite_style); if (elem && elem.getAttribute && elem.getAttribute("crossorigin")) { elem.removeAttribute("crossorigin"); diff --git a/pywb/webapp/query_handler.py b/pywb/webapp/query_handler.py index 92526179..ee03b8b9 100644 --- a/pywb/webapp/query_handler.py +++ b/pywb/webapp/query_handler.py @@ -76,10 +76,15 @@ class QueryHandler(object): return self.make_cdx_response(wbrequest, cdx_iter, params['output']) def load_cdx(self, wbrequest, params): + print(params) if wbrequest: # add any custom filter from the request if wbrequest.query_filter: - params['filter'].extend(wbrequest.query_filter) + filters = params.get('filter') + if filters: + filters.extend(wbrequest.query_filter) + else: + params['filter'] = wbrequest.query_filter if wbrequest.custom_params: params.update(wbrequest.custom_params) @@ -144,7 +149,9 @@ class QueryHandler(object): wburl.LATEST_REPLAY: {'sort': 'reverse', - 'filter': ['statuscode:[23]..|-'], + # Not appropriate as default + # Should be an option to configure status code filtering in general + # 'filter': ['statuscode:[23]..|-'], 'limit': '1', 'resolveRevisits': True, } diff --git a/tests/test_config.yaml b/tests/test_config.yaml index 2d0fc5b6..ea0f21a9 100644 --- a/tests/test_config.yaml +++ b/tests/test_config.yaml @@ -92,6 +92,7 @@ absoulte_paths: true # : static_routes: static/test/route: pywb/static/ + static/default: pywb/static/ # Enable simple http proxy mode enable_http_proxy: true diff --git a/tests/test_proxy_http_cookie.py b/tests/test_proxy_http_cookie.py index c7330984..a79ef308 100644 --- a/tests/test_proxy_http_cookie.py +++ b/tests/test_proxy_http_cookie.py @@ -94,7 +94,6 @@ class TestProxyHttpCookie: global sesh_key sesh_key = sesh1 - global sesh_key sesh2 = self.session.cookies.get('__pywb_proxy_sesh', domain='.iana.org') assert sesh_key == sesh2 diff --git a/tests/test_proxy_https_cookie.py b/tests/test_proxy_https_cookie.py index 91f24610..bfd1d398 100644 --- a/tests/test_proxy_https_cookie.py +++ b/tests/test_proxy_https_cookie.py @@ -108,7 +108,6 @@ class TestProxyHttpsCookie: global sesh_key sesh_key = sesh1 - global sesh_key sesh2 = self.session.cookies.get('__pywb_proxy_sesh', domain='.iana.org') assert sesh_key == sesh2