mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
rewrite: refactor JS rewriters into seperate mixins, allowing for
link only, location only, and link + location JS rewriters. location-only rewriter is new js_rewrite_location options: all, location, urls (for now)
This commit is contained in:
parent
0495423e86
commit
5a11714b41
@ -35,7 +35,7 @@ class RegexRewriter(object):
|
||||
|
||||
#DEFAULT_OP = add_prefix
|
||||
|
||||
def __init__(self, rules):
|
||||
def __init__(self, rewriter, rules):
|
||||
#rules = self.create_rules(http_prefix)
|
||||
|
||||
# Build regexstr, concatenating regex list
|
||||
@ -106,7 +106,7 @@ class RegexRewriter(object):
|
||||
|
||||
|
||||
#=================================================================
|
||||
class JSLinkOnlyRewriter(RegexRewriter):
|
||||
class JSLinkRewriterMixin(object):
|
||||
"""
|
||||
JS Rewriter which rewrites absolute http://, https:// and // urls
|
||||
at the beginning of a string
|
||||
@ -118,13 +118,14 @@ class JSLinkOnlyRewriter(RegexRewriter):
|
||||
rules = rules + [
|
||||
(self.JS_HTTPX, RegexRewriter.archival_rewrite(rewriter), 0)
|
||||
]
|
||||
super(JSLinkOnlyRewriter, self).__init__(rules)
|
||||
super(JSLinkRewriterMixin, self).__init__(rewriter, rules)
|
||||
|
||||
|
||||
#=================================================================
|
||||
class JSLinkAndLocationRewriter(JSLinkOnlyRewriter):
|
||||
class JSLocationRewriterMixin(object):
|
||||
#class JSLinkAndLocationRewriter(JSLinkOnlyRewriter):
|
||||
"""
|
||||
JS Rewriter which also rewrites location and domain to the
|
||||
JS Rewriter mixin which rewrites location and domain to the
|
||||
specified prefix (default: 'WB_wombat_')
|
||||
"""
|
||||
|
||||
@ -148,7 +149,23 @@ class JSLinkAndLocationRewriter(JSLinkOnlyRewriter):
|
||||
#(r'\b(?:self|window)\b[!=\W]+\b(top)\b',
|
||||
#RegexRewriter.add_prefix(prefix), 1),
|
||||
]
|
||||
super(JSLinkAndLocationRewriter, self).__init__(rewriter, rules)
|
||||
super(JSLocationRewriterMixin, self).__init__(rewriter, rules)
|
||||
|
||||
|
||||
#=================================================================
|
||||
class JSLocationOnlyRewriter(JSLocationRewriterMixin, RegexRewriter):
|
||||
pass
|
||||
|
||||
|
||||
#=================================================================
|
||||
class JSLinkOnlyRewriter(JSLinkRewriterMixin, RegexRewriter):
|
||||
pass
|
||||
|
||||
#=================================================================
|
||||
class JSLinkAndLocationRewriter(JSLocationRewriterMixin,
|
||||
JSLinkRewriterMixin,
|
||||
RegexRewriter):
|
||||
pass
|
||||
|
||||
|
||||
#=================================================================
|
||||
@ -161,7 +178,7 @@ class XMLRewriter(RegexRewriter):
|
||||
def __init__(self, rewriter, extra=[]):
|
||||
rules = self._create_rules(rewriter)
|
||||
|
||||
super(XMLRewriter, self).__init__(rules)
|
||||
super(XMLRewriter, self).__init__(rewriter, rules)
|
||||
|
||||
# custom filter to reject 'xmlns' attr
|
||||
def filter(self, m):
|
||||
@ -189,7 +206,7 @@ class CSSRewriter(RegexRewriter):
|
||||
|
||||
def __init__(self, rewriter):
|
||||
rules = self._create_rules(rewriter)
|
||||
super(CSSRewriter, self).__init__(rules)
|
||||
super(CSSRewriter, self).__init__(rewriter, rules)
|
||||
|
||||
def _create_rules(self, rewriter):
|
||||
return [
|
||||
|
@ -1,7 +1,7 @@
|
||||
from pywb.utils.dsrules import BaseRule
|
||||
|
||||
from regex_rewriters import RegexRewriter, CSSRewriter, XMLRewriter
|
||||
from regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter
|
||||
from regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter, JSLocationOnlyRewriter
|
||||
|
||||
from header_rewriter import HeaderRewriter
|
||||
from html_rewriter import HTMLRewriter
|
||||
@ -27,12 +27,13 @@ class RewriteRules(BaseRule):
|
||||
self.parse_comments = config.get('parse_comments', False)
|
||||
|
||||
# Custom handling for js rewriting, often the most complex
|
||||
self.js_rewrite_location = config.get('js_rewrite_location', True)
|
||||
self.js_rewrite_location = bool(self.js_rewrite_location)
|
||||
self.js_rewrite_location = config.get('js_rewrite_location', 'all')
|
||||
|
||||
# ability to toggle rewriting
|
||||
if self.js_rewrite_location:
|
||||
if self.js_rewrite_location == 'all':
|
||||
js_default_class = JSLinkAndLocationRewriter
|
||||
elif self.js_rewrite_location == 'location':
|
||||
js_default_class = JSLocationOnlyRewriter
|
||||
else:
|
||||
js_default_class = JSLinkOnlyRewriter
|
||||
|
||||
|
@ -3,7 +3,7 @@ r"""
|
||||
# Custom Regex
|
||||
#=================================================================
|
||||
# Test https->http converter (other tests below in subclasses)
|
||||
>>> RegexRewriter([(RegexRewriter.HTTPX_MATCH_STR, RegexRewriter.remove_https, 0)]).rewrite('a = https://example.com; b = http://example.com; c = https://some-url/path/https://embedded.example.com')
|
||||
>>> RegexRewriter(urlrewriter, [(RegexRewriter.HTTPX_MATCH_STR, RegexRewriter.remove_https, 0)]).rewrite('a = https://example.com; b = http://example.com; c = https://some-url/path/https://embedded.example.com')
|
||||
'a = http://example.com; b = http://example.com; c = http://some-url/path/http://embedded.example.com'
|
||||
|
||||
|
||||
|
@ -13,7 +13,7 @@ urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.htm
|
||||
bn_urlrewriter = UrlRewriter('20131226101010bn_/http://example.com/some/path/index.html', '/pywb/')
|
||||
|
||||
def head_insert_func(rule, cdx):
|
||||
if rule.js_rewrite_location == True:
|
||||
if rule.js_rewrite_location != 'urls':
|
||||
return '<script src="/static/default/wombat.js"> </script>'
|
||||
else:
|
||||
return ''
|
||||
@ -26,10 +26,10 @@ def test_local_1():
|
||||
'com,example,test)/')
|
||||
|
||||
# wombat insert added
|
||||
assert '<head><script src="/static/default/wombat.js"> </script>' in buff
|
||||
assert '<head><script src="/static/default/wombat.js"> </script>' in buff, buff
|
||||
|
||||
# location rewritten
|
||||
assert 'window.WB_wombat_location = "/other.html"' in buff
|
||||
# JS location and JS link rewritten
|
||||
assert 'window.WB_wombat_location = "/pywb/20131226101010/http:\/\/example.com/dynamic_page.html"' in buff
|
||||
|
||||
# link rewritten
|
||||
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
||||
@ -65,7 +65,7 @@ def test_local_no_head_banner_only():
|
||||
# link NOT rewritten
|
||||
assert '"another.html"' in buff
|
||||
|
||||
def test_local_banner_only():
|
||||
def test_local_banner_only_no_rewrite():
|
||||
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
|
||||
bn_urlrewriter,
|
||||
head_insert_func,
|
||||
@ -74,13 +74,13 @@ def test_local_banner_only():
|
||||
# wombat insert added
|
||||
assert '<head><script src="/static/default/wombat.js"> </script>' in buff
|
||||
|
||||
# location NOT rewritten
|
||||
assert 'window.location = "/other.html"' in buff
|
||||
# JS location NOT rewritten, JS link NOT rewritten
|
||||
assert 'window.location = "http:\/\/example.com/dynamic_page.html"' in buff, buff
|
||||
|
||||
# link NOT rewritten
|
||||
assert '"another.html"' in buff
|
||||
|
||||
def test_local_2_no_js_location_rewrite():
|
||||
def test_local_2_link_only_rewrite():
|
||||
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
|
||||
urlrewriter,
|
||||
head_insert_func,
|
||||
@ -89,13 +89,28 @@ def test_local_2_no_js_location_rewrite():
|
||||
# no wombat insert
|
||||
assert '<head><script src="/static/default/wombat.js"> </script>' not in buff
|
||||
|
||||
# no location rewrite
|
||||
assert 'window.location = "/other.html"' in buff
|
||||
# JS location NOT rewritten, JS link rewritten
|
||||
assert 'window.location = "/pywb/20131226101010/http:\/\/example.com/dynamic_page.html"' in buff
|
||||
|
||||
# still link rewrite
|
||||
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
||||
|
||||
|
||||
def test_local_2_js_loc_only_rewrite():
|
||||
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
|
||||
urlrewriter,
|
||||
head_insert_func,
|
||||
'example,example,test,loconly)/')
|
||||
|
||||
# wombat insert added
|
||||
assert '<script src="/static/default/wombat.js"> </script>' in buff
|
||||
|
||||
# JS location rewritten, JS link NOT rewritten
|
||||
assert 'window.WB_wombat_location = "http:\/\/example.com/dynamic_page.html"' in buff
|
||||
|
||||
# still link rewrite in HTML
|
||||
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
||||
|
||||
def test_example_1():
|
||||
status_headers, buff = get_rewritten('http://example.com/', urlrewriter, req_headers={'Connection': 'close'})
|
||||
|
||||
|
@ -120,6 +120,11 @@ rules:
|
||||
# testing rules -- not for valid domain
|
||||
#=================================================================
|
||||
# this rule block is a non-existent prefix merely for testing
|
||||
- url_prefix: 'example,example,test,loconly)/'
|
||||
|
||||
rewrite:
|
||||
js_rewrite_location: location
|
||||
|
||||
- url_prefix: 'example,example,test)/'
|
||||
|
||||
canonicalize:
|
||||
@ -131,10 +136,10 @@ rules:
|
||||
- id
|
||||
|
||||
rewrite:
|
||||
js_rewrite_location: False
|
||||
js_rewrite_location: urls
|
||||
|
||||
|
||||
# all domain rules -- fallback to this dataset
|
||||
# all domain rules -- fallback to this dataset
|
||||
#=================================================================
|
||||
# Applies to all urls -- should be last
|
||||
- url_prefix: ''
|
||||
|
@ -1,5 +1,5 @@
|
||||
<!-- WB Insert -->
|
||||
{% if rule.js_rewrite_location and include_wombat %}
|
||||
{% if rule.js_rewrite_location != 'urls' and include_wombat %}
|
||||
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wombat.js'> </script>
|
||||
<script>
|
||||
{% set urlsplit = cdx.original | urlsplit %}
|
||||
|
@ -6,7 +6,7 @@
|
||||
<script>
|
||||
var some_val = false;
|
||||
if (some_val) {
|
||||
window.location = "/other.html";
|
||||
window.location = "http:\/\/example.com/dynamic_page.html";
|
||||
}
|
||||
</script>
|
||||
Test Content
|
||||
|
Loading…
x
Reference in New Issue
Block a user