1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

rewrite: add 'application/json' to a seperate 'json' regex rewriter type (rewrite links only, no

http), can be customized via rules
wombat: add rewrite_style for rewriting style attrs
query: don't include any filter in latest, custom filter can be used
without any other filters
tests: fix typos in tests
This commit is contained in:
Ilya Kreymer 2014-09-30 10:57:25 -07:00
parent 00efe33870
commit 7feb0893eb
8 changed files with 34 additions and 12 deletions

View File

@ -28,6 +28,8 @@ class HeaderRewriter:
'application/javascript',
'application/x-javascript'],
'json': ['application/json'],
'xml': ['/xml', '+xml', '.xml', '.rss'],
}

View File

@ -123,7 +123,7 @@ class JSLinkAndLocationRewriter(JSLinkOnlyRewriter):
def __init__(self, rewriter, rules=[], prefix='WB_wombat_'):
rules = rules + [
(r'(?<!/)\blocation\b', RegexRewriter.add_prefix(prefix), 0),
(r'(?<!/)\blocation\b(?!\":)', RegexRewriter.add_prefix(prefix), 0),
(r'(?<=document\.)domain', RegexRewriter.add_prefix(prefix), 0),
(r'(?<=document\.)referrer', RegexRewriter.add_prefix(prefix), 0),
(r'(?<=document\.)cookie', RegexRewriter.add_prefix(prefix), 0),

View File

@ -22,6 +22,7 @@ class RewriteRules(BaseRule):
self.rewriters['css'] = config.get('css_class', CSSRewriter)
self.rewriters['xml'] = config.get('xml_class', XMLRewriter)
self.rewriters['html'] = config.get('html_class', HTMLRewriter)
self.rewriters['json'] = config.get('json_class', JSLinkOnlyRewriter)
self.parse_comments = config.get('parse_comments', False)

View File

@ -280,7 +280,6 @@ WB_wombat_init = (function() {
var href = extract_orig(this._orig_href);
parser.href = href;
//console.log(this._orig_href + " -> " + tmp_href);
this._autooverride = false;
var _set_hash = function(hash) {
@ -495,7 +494,7 @@ WB_wombat_init = (function() {
}
//============================================
function rewrite_attr(elem, name) {
function rewrite_attr(elem, name, func) {
if (!elem || !elem.getAttribute) {
return;
}
@ -510,17 +509,31 @@ WB_wombat_init = (function() {
return;
}
//var orig_value = value;
value = rewrite_url(value);
value = func(value);
elem.setAttribute(name, value);
}
//============================================
function rewrite_style(value)
{
//console.log("style rewrite: " + value);
STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/g;
function style_replacer(match, n1, n2, n3, offset, string) {
return n1 + rewrite_url(n2) + n3;
}
return value.replace(STYLE_REGEX, style_replacer);
}
//============================================
function rewrite_elem(elem)
{
rewrite_attr(elem, "src");
rewrite_attr(elem, "href");
rewrite_attr(elem, "src", rewrite_url);
rewrite_attr(elem, "href", rewrite_url);
rewrite_attr(elem, "style", rewrite_style);
if (elem && elem.getAttribute && elem.getAttribute("crossorigin")) {
elem.removeAttribute("crossorigin");

View File

@ -76,10 +76,15 @@ class QueryHandler(object):
return self.make_cdx_response(wbrequest, cdx_iter, params['output'])
def load_cdx(self, wbrequest, params):
print(params)
if wbrequest:
# add any custom filter from the request
if wbrequest.query_filter:
params['filter'].extend(wbrequest.query_filter)
filters = params.get('filter')
if filters:
filters.extend(wbrequest.query_filter)
else:
params['filter'] = wbrequest.query_filter
if wbrequest.custom_params:
params.update(wbrequest.custom_params)
@ -144,7 +149,9 @@ class QueryHandler(object):
wburl.LATEST_REPLAY:
{'sort': 'reverse',
'filter': ['statuscode:[23]..|-'],
# Not appropriate as default
# Should be an option to configure status code filtering in general
# 'filter': ['statuscode:[23]..|-'],
'limit': '1',
'resolveRevisits': True,
}

View File

@ -92,6 +92,7 @@ absoulte_paths: true
# <route>: <package or file path>
static_routes:
static/test/route: pywb/static/
static/default: pywb/static/
# Enable simple http proxy mode
enable_http_proxy: true

View File

@ -94,7 +94,6 @@ class TestProxyHttpCookie:
global sesh_key
sesh_key = sesh1
global sesh_key
sesh2 = self.session.cookies.get('__pywb_proxy_sesh', domain='.iana.org')
assert sesh_key == sesh2

View File

@ -108,7 +108,6 @@ class TestProxyHttpsCookie:
global sesh_key
sesh_key = sesh1
global sesh_key
sesh2 = self.session.cookies.get('__pywb_proxy_sesh', domain='.iana.org')
assert sesh_key == sesh2