mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
rewrite: refactor JS rewriters into seperate mixins, allowing for
link only, location only, and link + location JS rewriters. location-only rewriter is new js_rewrite_location options: all, location, urls (for now)
This commit is contained in:
parent
b951b304b6
commit
0a6838ac2b
@ -35,7 +35,7 @@ class RegexRewriter(object):
|
|||||||
|
|
||||||
#DEFAULT_OP = add_prefix
|
#DEFAULT_OP = add_prefix
|
||||||
|
|
||||||
def __init__(self, rules):
|
def __init__(self, rewriter, rules):
|
||||||
#rules = self.create_rules(http_prefix)
|
#rules = self.create_rules(http_prefix)
|
||||||
|
|
||||||
# Build regexstr, concatenating regex list
|
# Build regexstr, concatenating regex list
|
||||||
@ -106,7 +106,7 @@ class RegexRewriter(object):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class JSLinkOnlyRewriter(RegexRewriter):
|
class JSLinkRewriterMixin(object):
|
||||||
"""
|
"""
|
||||||
JS Rewriter which rewrites absolute http://, https:// and // urls
|
JS Rewriter which rewrites absolute http://, https:// and // urls
|
||||||
at the beginning of a string
|
at the beginning of a string
|
||||||
@ -118,13 +118,14 @@ class JSLinkOnlyRewriter(RegexRewriter):
|
|||||||
rules = rules + [
|
rules = rules + [
|
||||||
(self.JS_HTTPX, RegexRewriter.archival_rewrite(rewriter), 0)
|
(self.JS_HTTPX, RegexRewriter.archival_rewrite(rewriter), 0)
|
||||||
]
|
]
|
||||||
super(JSLinkOnlyRewriter, self).__init__(rules)
|
super(JSLinkRewriterMixin, self).__init__(rewriter, rules)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class JSLinkAndLocationRewriter(JSLinkOnlyRewriter):
|
class JSLocationRewriterMixin(object):
|
||||||
|
#class JSLinkAndLocationRewriter(JSLinkOnlyRewriter):
|
||||||
"""
|
"""
|
||||||
JS Rewriter which also rewrites location and domain to the
|
JS Rewriter mixin which rewrites location and domain to the
|
||||||
specified prefix (default: 'WB_wombat_')
|
specified prefix (default: 'WB_wombat_')
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -148,7 +149,23 @@ class JSLinkAndLocationRewriter(JSLinkOnlyRewriter):
|
|||||||
#(r'\b(?:self|window)\b[!=\W]+\b(top)\b',
|
#(r'\b(?:self|window)\b[!=\W]+\b(top)\b',
|
||||||
#RegexRewriter.add_prefix(prefix), 1),
|
#RegexRewriter.add_prefix(prefix), 1),
|
||||||
]
|
]
|
||||||
super(JSLinkAndLocationRewriter, self).__init__(rewriter, rules)
|
super(JSLocationRewriterMixin, self).__init__(rewriter, rules)
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class JSLocationOnlyRewriter(JSLocationRewriterMixin, RegexRewriter):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class JSLinkOnlyRewriter(JSLinkRewriterMixin, RegexRewriter):
|
||||||
|
pass
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class JSLinkAndLocationRewriter(JSLocationRewriterMixin,
|
||||||
|
JSLinkRewriterMixin,
|
||||||
|
RegexRewriter):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
@ -161,7 +178,7 @@ class XMLRewriter(RegexRewriter):
|
|||||||
def __init__(self, rewriter, extra=[]):
|
def __init__(self, rewriter, extra=[]):
|
||||||
rules = self._create_rules(rewriter)
|
rules = self._create_rules(rewriter)
|
||||||
|
|
||||||
super(XMLRewriter, self).__init__(rules)
|
super(XMLRewriter, self).__init__(rewriter, rules)
|
||||||
|
|
||||||
# custom filter to reject 'xmlns' attr
|
# custom filter to reject 'xmlns' attr
|
||||||
def filter(self, m):
|
def filter(self, m):
|
||||||
@ -189,7 +206,7 @@ class CSSRewriter(RegexRewriter):
|
|||||||
|
|
||||||
def __init__(self, rewriter):
|
def __init__(self, rewriter):
|
||||||
rules = self._create_rules(rewriter)
|
rules = self._create_rules(rewriter)
|
||||||
super(CSSRewriter, self).__init__(rules)
|
super(CSSRewriter, self).__init__(rewriter, rules)
|
||||||
|
|
||||||
def _create_rules(self, rewriter):
|
def _create_rules(self, rewriter):
|
||||||
return [
|
return [
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from pywb.utils.dsrules import BaseRule
|
from pywb.utils.dsrules import BaseRule
|
||||||
|
|
||||||
from regex_rewriters import RegexRewriter, CSSRewriter, XMLRewriter
|
from regex_rewriters import RegexRewriter, CSSRewriter, XMLRewriter
|
||||||
from regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter
|
from regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter, JSLocationOnlyRewriter
|
||||||
|
|
||||||
from header_rewriter import HeaderRewriter
|
from header_rewriter import HeaderRewriter
|
||||||
from html_rewriter import HTMLRewriter
|
from html_rewriter import HTMLRewriter
|
||||||
@ -27,12 +27,13 @@ class RewriteRules(BaseRule):
|
|||||||
self.parse_comments = config.get('parse_comments', False)
|
self.parse_comments = config.get('parse_comments', False)
|
||||||
|
|
||||||
# Custom handling for js rewriting, often the most complex
|
# Custom handling for js rewriting, often the most complex
|
||||||
self.js_rewrite_location = config.get('js_rewrite_location', True)
|
self.js_rewrite_location = config.get('js_rewrite_location', 'all')
|
||||||
self.js_rewrite_location = bool(self.js_rewrite_location)
|
|
||||||
|
|
||||||
# ability to toggle rewriting
|
# ability to toggle rewriting
|
||||||
if self.js_rewrite_location:
|
if self.js_rewrite_location == 'all':
|
||||||
js_default_class = JSLinkAndLocationRewriter
|
js_default_class = JSLinkAndLocationRewriter
|
||||||
|
elif self.js_rewrite_location == 'location':
|
||||||
|
js_default_class = JSLocationOnlyRewriter
|
||||||
else:
|
else:
|
||||||
js_default_class = JSLinkOnlyRewriter
|
js_default_class = JSLinkOnlyRewriter
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ r"""
|
|||||||
# Custom Regex
|
# Custom Regex
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# Test https->http converter (other tests below in subclasses)
|
# Test https->http converter (other tests below in subclasses)
|
||||||
>>> RegexRewriter([(RegexRewriter.HTTPX_MATCH_STR, RegexRewriter.remove_https, 0)]).rewrite('a = https://example.com; b = http://example.com; c = https://some-url/path/https://embedded.example.com')
|
>>> RegexRewriter(urlrewriter, [(RegexRewriter.HTTPX_MATCH_STR, RegexRewriter.remove_https, 0)]).rewrite('a = https://example.com; b = http://example.com; c = https://some-url/path/https://embedded.example.com')
|
||||||
'a = http://example.com; b = http://example.com; c = http://some-url/path/http://embedded.example.com'
|
'a = http://example.com; b = http://example.com; c = http://some-url/path/http://embedded.example.com'
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@ urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.htm
|
|||||||
bn_urlrewriter = UrlRewriter('20131226101010bn_/http://example.com/some/path/index.html', '/pywb/')
|
bn_urlrewriter = UrlRewriter('20131226101010bn_/http://example.com/some/path/index.html', '/pywb/')
|
||||||
|
|
||||||
def head_insert_func(rule, cdx):
|
def head_insert_func(rule, cdx):
|
||||||
if rule.js_rewrite_location == True:
|
if rule.js_rewrite_location != 'urls':
|
||||||
return '<script src="/static/default/wombat.js"> </script>'
|
return '<script src="/static/default/wombat.js"> </script>'
|
||||||
else:
|
else:
|
||||||
return ''
|
return ''
|
||||||
@ -26,10 +26,10 @@ def test_local_1():
|
|||||||
'com,example,test)/')
|
'com,example,test)/')
|
||||||
|
|
||||||
# wombat insert added
|
# wombat insert added
|
||||||
assert '<head><script src="/static/default/wombat.js"> </script>' in buff
|
assert '<head><script src="/static/default/wombat.js"> </script>' in buff, buff
|
||||||
|
|
||||||
# location rewritten
|
# JS location and JS link rewritten
|
||||||
assert 'window.WB_wombat_location = "/other.html"' in buff
|
assert 'window.WB_wombat_location = "/pywb/20131226101010/http:\/\/example.com/dynamic_page.html"' in buff
|
||||||
|
|
||||||
# link rewritten
|
# link rewritten
|
||||||
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
||||||
@ -65,7 +65,7 @@ def test_local_no_head_banner_only():
|
|||||||
# link NOT rewritten
|
# link NOT rewritten
|
||||||
assert '"another.html"' in buff
|
assert '"another.html"' in buff
|
||||||
|
|
||||||
def test_local_banner_only():
|
def test_local_banner_only_no_rewrite():
|
||||||
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
|
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
|
||||||
bn_urlrewriter,
|
bn_urlrewriter,
|
||||||
head_insert_func,
|
head_insert_func,
|
||||||
@ -74,13 +74,13 @@ def test_local_banner_only():
|
|||||||
# wombat insert added
|
# wombat insert added
|
||||||
assert '<head><script src="/static/default/wombat.js"> </script>' in buff
|
assert '<head><script src="/static/default/wombat.js"> </script>' in buff
|
||||||
|
|
||||||
# location NOT rewritten
|
# JS location NOT rewritten, JS link NOT rewritten
|
||||||
assert 'window.location = "/other.html"' in buff
|
assert 'window.location = "http:\/\/example.com/dynamic_page.html"' in buff, buff
|
||||||
|
|
||||||
# link NOT rewritten
|
# link NOT rewritten
|
||||||
assert '"another.html"' in buff
|
assert '"another.html"' in buff
|
||||||
|
|
||||||
def test_local_2_no_js_location_rewrite():
|
def test_local_2_link_only_rewrite():
|
||||||
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
|
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
|
||||||
urlrewriter,
|
urlrewriter,
|
||||||
head_insert_func,
|
head_insert_func,
|
||||||
@ -89,13 +89,28 @@ def test_local_2_no_js_location_rewrite():
|
|||||||
# no wombat insert
|
# no wombat insert
|
||||||
assert '<head><script src="/static/default/wombat.js"> </script>' not in buff
|
assert '<head><script src="/static/default/wombat.js"> </script>' not in buff
|
||||||
|
|
||||||
# no location rewrite
|
# JS location NOT rewritten, JS link rewritten
|
||||||
assert 'window.location = "/other.html"' in buff
|
assert 'window.location = "/pywb/20131226101010/http:\/\/example.com/dynamic_page.html"' in buff
|
||||||
|
|
||||||
# still link rewrite
|
# still link rewrite
|
||||||
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
||||||
|
|
||||||
|
|
||||||
|
def test_local_2_js_loc_only_rewrite():
|
||||||
|
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
|
||||||
|
urlrewriter,
|
||||||
|
head_insert_func,
|
||||||
|
'example,example,test,loconly)/')
|
||||||
|
|
||||||
|
# wombat insert added
|
||||||
|
assert '<script src="/static/default/wombat.js"> </script>' in buff
|
||||||
|
|
||||||
|
# JS location rewritten, JS link NOT rewritten
|
||||||
|
assert 'window.WB_wombat_location = "http:\/\/example.com/dynamic_page.html"' in buff
|
||||||
|
|
||||||
|
# still link rewrite in HTML
|
||||||
|
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
||||||
|
|
||||||
def test_example_1():
|
def test_example_1():
|
||||||
status_headers, buff = get_rewritten('http://example.com/', urlrewriter, req_headers={'Connection': 'close'})
|
status_headers, buff = get_rewritten('http://example.com/', urlrewriter, req_headers={'Connection': 'close'})
|
||||||
|
|
||||||
|
@ -120,6 +120,11 @@ rules:
|
|||||||
# testing rules -- not for valid domain
|
# testing rules -- not for valid domain
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# this rule block is a non-existent prefix merely for testing
|
# this rule block is a non-existent prefix merely for testing
|
||||||
|
- url_prefix: 'example,example,test,loconly)/'
|
||||||
|
|
||||||
|
rewrite:
|
||||||
|
js_rewrite_location: location
|
||||||
|
|
||||||
- url_prefix: 'example,example,test)/'
|
- url_prefix: 'example,example,test)/'
|
||||||
|
|
||||||
canonicalize:
|
canonicalize:
|
||||||
@ -131,10 +136,10 @@ rules:
|
|||||||
- id
|
- id
|
||||||
|
|
||||||
rewrite:
|
rewrite:
|
||||||
js_rewrite_location: False
|
js_rewrite_location: urls
|
||||||
|
|
||||||
|
|
||||||
# all domain rules -- fallback to this dataset
|
# all domain rules -- fallback to this dataset
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# Applies to all urls -- should be last
|
# Applies to all urls -- should be last
|
||||||
- url_prefix: ''
|
- url_prefix: ''
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
<!-- WB Insert -->
|
<!-- WB Insert -->
|
||||||
{% if rule.js_rewrite_location and include_wombat %}
|
{% if rule.js_rewrite_location != 'urls' and include_wombat %}
|
||||||
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wombat.js'> </script>
|
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wombat.js'> </script>
|
||||||
<script>
|
<script>
|
||||||
{% set urlsplit = cdx.original | urlsplit %}
|
{% set urlsplit = cdx.original | urlsplit %}
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
<script>
|
<script>
|
||||||
var some_val = false;
|
var some_val = false;
|
||||||
if (some_val) {
|
if (some_val) {
|
||||||
window.location = "/other.html";
|
window.location = "http:\/\/example.com/dynamic_page.html";
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
Test Content
|
Test Content
|
||||||
|
Loading…
x
Reference in New Issue
Block a user