mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Merge branch 'develop'
This commit is contained in:
commit
bee70260ac
25
CHANGES.rst
25
CHANGES.rst
@ -1,3 +1,20 @@
|
|||||||
|
pywb 0.6.6 changelist
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
* JS client side improvements: check for double-inits, preserve anchor in wb.js top location redirect
|
||||||
|
|
||||||
|
* JS Rewriters: add mixins for link + location (default), link only, location only rewriting by setting ``js_rewrite_location`` to ``all``, ``urls``, ``location``, respectively.
|
||||||
|
|
||||||
|
(New: location only rewriting does not change JS urls)
|
||||||
|
|
||||||
|
* Beginning of new rewrite options, settable per collections and stored in UrlRewriter. Available options:
|
||||||
|
|
||||||
|
- ``rewrite_base`` - set to False to disable rewriting ``<base href="...">`` tag
|
||||||
|
- ``rewrite_rel_canon`` - set to false to disable rewriting ``<link rel=canon href="...">``
|
||||||
|
|
||||||
|
* JS rewrite: Don't rewrite location if starting with '$'
|
||||||
|
|
||||||
|
|
||||||
pywb 0.6.5 changelist
|
pywb 0.6.5 changelist
|
||||||
~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
@ -40,17 +57,17 @@ pywb 0.6.3 changelist
|
|||||||
pywb 0.6.2 changelist
|
pywb 0.6.2 changelist
|
||||||
~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
* Invert framed replay paradigm: Canonical page is always without a modifier (instead of with `mp_`), if using frames, the page redirects to `tf_`, and uses replaceState() to change url back to canonical form.
|
* Invert framed replay paradigm: Canonical page is always without a modifier (instead of with ``mp_``), if using frames, the page redirects to ``tf_``, and uses replaceState() to change url back to canonical form.
|
||||||
|
|
||||||
* Enable Memento support for framed replay, include Memento headers in top frame
|
* Enable Memento support for framed replay, include Memento headers in top frame
|
||||||
|
|
||||||
* Easier to customize just the banner html, via `banner_html` setting in the config. Default banner uses ui/banner.html and inserts the script default_banner.js, which creates the banner.
|
* Easier to customize just the banner html, via ``banner_html`` setting in the config. Default banner uses ui/banner.html and inserts the script default_banner.js, which creates the banner.
|
||||||
|
|
||||||
Other implementations may create banner via custom JS or directly insert HTML, as needed. Setting `banner_html: False` will disable the banner.
|
Other implementations may create banner via custom JS or directly insert HTML, as needed. Setting ``banner_html: False`` will disable the banner.
|
||||||
|
|
||||||
* Small improvements to streaming response, read in fixed chunks to allow better streaming from live.
|
* Small improvements to streaming response, read in fixed chunks to allow better streaming from live.
|
||||||
|
|
||||||
* Improved cookie and csrf-token rewriting, including: ability to set `cookie_scope: root` per collection to have all replayed cookies have their Path set to application root.
|
* Improved cookie and csrf-token rewriting, including: ability to set ``cookie_scope: root`` per collection to have all replayed cookies have their Path set to application root.
|
||||||
|
|
||||||
This is useful for replaying sites which share cookies amongst different pages and across archived time ranges.
|
This is useful for replaying sites which share cookies amongst different pages and across archived time ranges.
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
PyWb 0.6.5
|
PyWb 0.6.6
|
||||||
==========
|
==========
|
||||||
|
|
||||||
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master
|
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master
|
||||||
|
@ -62,7 +62,8 @@ class ArchivalRouter(object):
|
|||||||
use_abs_prefix=use_abs_prefix,
|
use_abs_prefix=use_abs_prefix,
|
||||||
wburl_class=route.handler.get_wburl_type(),
|
wburl_class=route.handler.get_wburl_type(),
|
||||||
urlrewriter_class=UrlRewriter,
|
urlrewriter_class=UrlRewriter,
|
||||||
cookie_scope=route.cookie_scope)
|
cookie_scope=route.cookie_scope,
|
||||||
|
rewrite_opts=route.rewrite_opts)
|
||||||
|
|
||||||
# Allow for applying of additional filters
|
# Allow for applying of additional filters
|
||||||
route.apply_filters(wbrequest, matcher)
|
route.apply_filters(wbrequest, matcher)
|
||||||
@ -101,6 +102,7 @@ class Route(object):
|
|||||||
# collection id from regex group (default 0)
|
# collection id from regex group (default 0)
|
||||||
self.coll_group = coll_group
|
self.coll_group = coll_group
|
||||||
self.cookie_scope = config.get('cookie_scope')
|
self.cookie_scope = config.get('cookie_scope')
|
||||||
|
self.rewrite_opts = config.get('rewrite_opts', {})
|
||||||
self._custom_init(config)
|
self._custom_init(config)
|
||||||
|
|
||||||
def is_handling(self, request_uri):
|
def is_handling(self, request_uri):
|
||||||
|
@ -38,7 +38,8 @@ class WbRequest(object):
|
|||||||
wburl_class=None,
|
wburl_class=None,
|
||||||
urlrewriter_class=None,
|
urlrewriter_class=None,
|
||||||
is_proxy=False,
|
is_proxy=False,
|
||||||
cookie_scope=None):
|
cookie_scope=None,
|
||||||
|
rewrite_opts={}):
|
||||||
|
|
||||||
self.env = env
|
self.env = env
|
||||||
|
|
||||||
@ -77,7 +78,8 @@ class WbRequest(object):
|
|||||||
host_prefix + rel_prefix,
|
host_prefix + rel_prefix,
|
||||||
rel_prefix,
|
rel_prefix,
|
||||||
env.get('SCRIPT_NAME', '/'),
|
env.get('SCRIPT_NAME', '/'),
|
||||||
cookie_scope)
|
cookie_scope,
|
||||||
|
rewrite_opts)
|
||||||
|
|
||||||
self.urlrewriter.deprefix_url()
|
self.urlrewriter.deprefix_url()
|
||||||
else:
|
else:
|
||||||
|
@ -92,6 +92,9 @@ class HTMLRewriterMixin(object):
|
|||||||
|
|
||||||
self.rewrite_tags = self._init_rewrite_tags(defmod)
|
self.rewrite_tags = self._init_rewrite_tags(defmod)
|
||||||
|
|
||||||
|
# get opts from urlrewriter
|
||||||
|
self.opts = url_rewriter.rewrite_opts
|
||||||
|
|
||||||
# ===========================
|
# ===========================
|
||||||
META_REFRESH_REGEX = re.compile('^[\\d.]+\\s*;\\s*url\\s*=\\s*(.+?)\\s*$',
|
META_REFRESH_REGEX = re.compile('^[\\d.]+\\s*;\\s*url\\s*=\\s*(.+?)\\s*$',
|
||||||
re.IGNORECASE | re.MULTILINE)
|
re.IGNORECASE | re.MULTILINE)
|
||||||
@ -174,9 +177,11 @@ class HTMLRewriterMixin(object):
|
|||||||
elif attr_name == 'crossorigin':
|
elif attr_name == 'crossorigin':
|
||||||
attr_name = '_crossorigin'
|
attr_name = '_crossorigin'
|
||||||
|
|
||||||
# special case: link don't rewrite canonical
|
# special case: if rewrite_canon not set,
|
||||||
|
# don't rewrite rel=canonical
|
||||||
elif tag == 'link' and attr_name == 'href':
|
elif tag == 'link' and attr_name == 'href':
|
||||||
if not self.has_attr(tag_attrs, ('rel', 'canonical')):
|
if (self.opts.get('rewrite_rel_canon', True) or
|
||||||
|
not self.has_attr(tag_attrs, ('rel', 'canonical'))):
|
||||||
rw_mod = handler.get(attr_name)
|
rw_mod = handler.get(attr_name)
|
||||||
attr_value = self._rewrite_url(attr_value, rw_mod)
|
attr_value = self._rewrite_url(attr_value, rw_mod)
|
||||||
|
|
||||||
@ -191,17 +196,21 @@ class HTMLRewriterMixin(object):
|
|||||||
rw_mod = 'oe_'
|
rw_mod = 'oe_'
|
||||||
attr_value = self._rewrite_url(attr_value, rw_mod)
|
attr_value = self._rewrite_url(attr_value, rw_mod)
|
||||||
|
|
||||||
|
# special case: base tag
|
||||||
|
elif (tag == 'base') and (attr_name == 'href') and attr_value:
|
||||||
|
rw_mod = handler.get(attr_name)
|
||||||
|
base_value = self._rewrite_url(attr_value, rw_mod)
|
||||||
|
if self.opts.get('rewrite_base', True):
|
||||||
|
attr_value = base_value
|
||||||
|
self.url_rewriter = (self.url_rewriter.
|
||||||
|
rebase_rewriter(base_value))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# rewrite url using tag handler
|
# rewrite url using tag handler
|
||||||
rw_mod = handler.get(attr_name)
|
rw_mod = handler.get(attr_name)
|
||||||
if rw_mod is not None:
|
if rw_mod is not None:
|
||||||
attr_value = self._rewrite_url(attr_value, rw_mod)
|
attr_value = self._rewrite_url(attr_value, rw_mod)
|
||||||
|
|
||||||
# special case: base tag
|
|
||||||
if (tag == 'base') and (attr_name == 'href') and attr_value:
|
|
||||||
self.url_rewriter = (self.url_rewriter.
|
|
||||||
rebase_rewriter(attr_value))
|
|
||||||
|
|
||||||
# write the attr!
|
# write the attr!
|
||||||
self._write_attr(attr_name, attr_value)
|
self._write_attr(attr_name, attr_value)
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ class RegexRewriter(object):
|
|||||||
|
|
||||||
#DEFAULT_OP = add_prefix
|
#DEFAULT_OP = add_prefix
|
||||||
|
|
||||||
def __init__(self, rules):
|
def __init__(self, rewriter, rules):
|
||||||
#rules = self.create_rules(http_prefix)
|
#rules = self.create_rules(http_prefix)
|
||||||
|
|
||||||
# Build regexstr, concatenating regex list
|
# Build regexstr, concatenating regex list
|
||||||
@ -106,7 +106,7 @@ class RegexRewriter(object):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class JSLinkOnlyRewriter(RegexRewriter):
|
class JSLinkRewriterMixin(object):
|
||||||
"""
|
"""
|
||||||
JS Rewriter which rewrites absolute http://, https:// and // urls
|
JS Rewriter which rewrites absolute http://, https:// and // urls
|
||||||
at the beginning of a string
|
at the beginning of a string
|
||||||
@ -118,19 +118,20 @@ class JSLinkOnlyRewriter(RegexRewriter):
|
|||||||
rules = rules + [
|
rules = rules + [
|
||||||
(self.JS_HTTPX, RegexRewriter.archival_rewrite(rewriter), 0)
|
(self.JS_HTTPX, RegexRewriter.archival_rewrite(rewriter), 0)
|
||||||
]
|
]
|
||||||
super(JSLinkOnlyRewriter, self).__init__(rules)
|
super(JSLinkRewriterMixin, self).__init__(rewriter, rules)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class JSLinkAndLocationRewriter(JSLinkOnlyRewriter):
|
class JSLocationRewriterMixin(object):
|
||||||
|
#class JSLinkAndLocationRewriter(JSLinkOnlyRewriter):
|
||||||
"""
|
"""
|
||||||
JS Rewriter which also rewrites location and domain to the
|
JS Rewriter mixin which rewrites location and domain to the
|
||||||
specified prefix (default: 'WB_wombat_')
|
specified prefix (default: 'WB_wombat_')
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, rewriter, rules=[], prefix='WB_wombat_'):
|
def __init__(self, rewriter, rules=[], prefix='WB_wombat_'):
|
||||||
rules = rules + [
|
rules = rules + [
|
||||||
(r'(?<!/)\blocation\b(?!\":)', RegexRewriter.add_prefix(prefix), 0),
|
(r'(?<![/$])\blocation\b(?!\":)', RegexRewriter.add_prefix(prefix), 0),
|
||||||
(r'(?<=document\.)domain', RegexRewriter.add_prefix(prefix), 0),
|
(r'(?<=document\.)domain', RegexRewriter.add_prefix(prefix), 0),
|
||||||
(r'(?<=document\.)referrer', RegexRewriter.add_prefix(prefix), 0),
|
(r'(?<=document\.)referrer', RegexRewriter.add_prefix(prefix), 0),
|
||||||
(r'(?<=document\.)cookie', RegexRewriter.add_prefix(prefix), 0),
|
(r'(?<=document\.)cookie', RegexRewriter.add_prefix(prefix), 0),
|
||||||
@ -148,7 +149,23 @@ class JSLinkAndLocationRewriter(JSLinkOnlyRewriter):
|
|||||||
#(r'\b(?:self|window)\b[!=\W]+\b(top)\b',
|
#(r'\b(?:self|window)\b[!=\W]+\b(top)\b',
|
||||||
#RegexRewriter.add_prefix(prefix), 1),
|
#RegexRewriter.add_prefix(prefix), 1),
|
||||||
]
|
]
|
||||||
super(JSLinkAndLocationRewriter, self).__init__(rewriter, rules)
|
super(JSLocationRewriterMixin, self).__init__(rewriter, rules)
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class JSLocationOnlyRewriter(JSLocationRewriterMixin, RegexRewriter):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class JSLinkOnlyRewriter(JSLinkRewriterMixin, RegexRewriter):
|
||||||
|
pass
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class JSLinkAndLocationRewriter(JSLocationRewriterMixin,
|
||||||
|
JSLinkRewriterMixin,
|
||||||
|
RegexRewriter):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
@ -161,7 +178,7 @@ class XMLRewriter(RegexRewriter):
|
|||||||
def __init__(self, rewriter, extra=[]):
|
def __init__(self, rewriter, extra=[]):
|
||||||
rules = self._create_rules(rewriter)
|
rules = self._create_rules(rewriter)
|
||||||
|
|
||||||
super(XMLRewriter, self).__init__(rules)
|
super(XMLRewriter, self).__init__(rewriter, rules)
|
||||||
|
|
||||||
# custom filter to reject 'xmlns' attr
|
# custom filter to reject 'xmlns' attr
|
||||||
def filter(self, m):
|
def filter(self, m):
|
||||||
@ -189,7 +206,7 @@ class CSSRewriter(RegexRewriter):
|
|||||||
|
|
||||||
def __init__(self, rewriter):
|
def __init__(self, rewriter):
|
||||||
rules = self._create_rules(rewriter)
|
rules = self._create_rules(rewriter)
|
||||||
super(CSSRewriter, self).__init__(rules)
|
super(CSSRewriter, self).__init__(rewriter, rules)
|
||||||
|
|
||||||
def _create_rules(self, rewriter):
|
def _create_rules(self, rewriter):
|
||||||
return [
|
return [
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from pywb.utils.dsrules import BaseRule
|
from pywb.utils.dsrules import BaseRule
|
||||||
|
|
||||||
from regex_rewriters import RegexRewriter, CSSRewriter, XMLRewriter
|
from regex_rewriters import RegexRewriter, CSSRewriter, XMLRewriter
|
||||||
from regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter
|
from regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter, JSLocationOnlyRewriter
|
||||||
|
|
||||||
from header_rewriter import HeaderRewriter
|
from header_rewriter import HeaderRewriter
|
||||||
from html_rewriter import HTMLRewriter
|
from html_rewriter import HTMLRewriter
|
||||||
@ -27,12 +27,13 @@ class RewriteRules(BaseRule):
|
|||||||
self.parse_comments = config.get('parse_comments', False)
|
self.parse_comments = config.get('parse_comments', False)
|
||||||
|
|
||||||
# Custom handling for js rewriting, often the most complex
|
# Custom handling for js rewriting, often the most complex
|
||||||
self.js_rewrite_location = config.get('js_rewrite_location', True)
|
self.js_rewrite_location = config.get('js_rewrite_location', 'all')
|
||||||
self.js_rewrite_location = bool(self.js_rewrite_location)
|
|
||||||
|
|
||||||
# ability to toggle rewriting
|
# ability to toggle rewriting
|
||||||
if self.js_rewrite_location:
|
if self.js_rewrite_location == 'all':
|
||||||
js_default_class = JSLinkAndLocationRewriter
|
js_default_class = JSLinkAndLocationRewriter
|
||||||
|
elif self.js_rewrite_location == 'location':
|
||||||
|
js_default_class = JSLocationOnlyRewriter
|
||||||
else:
|
else:
|
||||||
js_default_class = JSLinkOnlyRewriter
|
js_default_class = JSLinkOnlyRewriter
|
||||||
|
|
||||||
|
@ -20,13 +20,22 @@ ur"""
|
|||||||
#>>> parse('<input "selected"><img src></div>')
|
#>>> parse('<input "selected"><img src></div>')
|
||||||
#<input "selected"=""><img src=""></div>
|
#<input "selected"=""><img src=""></div>
|
||||||
|
|
||||||
# Base Tests
|
# Base Tests -- w/ rewrite (default)
|
||||||
>>> parse('<html><head><base href="http://example.com/diff/path/file.html"/>')
|
>>> parse('<html><head><base href="http://example.com/diff/path/file.html"/>')
|
||||||
<html><head><base href="/web/20131226101010/http://example.com/diff/path/file.html"/>
|
<html><head><base href="/web/20131226101010/http://example.com/diff/path/file.html"/>
|
||||||
|
|
||||||
>>> parse('<base href="static/"/><img src="image.gif"/>')
|
>>> parse('<base href="static/"/><img src="image.gif"/>')
|
||||||
<base href="/web/20131226101010/http://example.com/some/path/static/"/><img src="/web/20131226101010im_/http://example.com/some/path/static/image.gif"/>
|
<base href="/web/20131226101010/http://example.com/some/path/static/"/><img src="/web/20131226101010im_/http://example.com/some/path/static/image.gif"/>
|
||||||
|
|
||||||
|
# Base Tests -- no rewrite
|
||||||
|
>>> parse('<html><head><base href="http://example.com/diff/path/file.html"/>', urlrewriter=no_base_canon_rewriter)
|
||||||
|
<html><head><base href="http://example.com/diff/path/file.html"/>
|
||||||
|
|
||||||
|
>>> parse('<base href="static/"/><img src="image.gif"/>', urlrewriter=no_base_canon_rewriter)
|
||||||
|
<base href="static/"/><img src="/web/20131226101010im_/http://example.com/some/path/static/image.gif"/>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# HTML Entities
|
# HTML Entities
|
||||||
>>> parse('<a href="">› > ?</div>')
|
>>> parse('<a href="">› > ?</div>')
|
||||||
<a href="">› > ?</div>
|
<a href="">› > ?</div>
|
||||||
@ -102,8 +111,12 @@ ur"""
|
|||||||
>>> parse('<link href="abc.txt"><div>SomeTest</div>', head_insert = '<script>load_stuff();</script>')
|
>>> parse('<link href="abc.txt"><div>SomeTest</div>', head_insert = '<script>load_stuff();</script>')
|
||||||
<link href="/web/20131226101010oe_/http://example.com/some/path/abc.txt"><script>load_stuff();</script><div>SomeTest</div>
|
<link href="/web/20131226101010oe_/http://example.com/some/path/abc.txt"><script>load_stuff();</script><div>SomeTest</div>
|
||||||
|
|
||||||
# don't rewrite rel=canonical
|
# rel=canonical: rewrite (default)
|
||||||
>>> parse('<link rel=canonical href="http://example.com/">')
|
>>> parse('<link rel=canonical href="http://example.com/">')
|
||||||
|
<link rel="canonical" href="/web/20131226101010oe_/http://example.com/">
|
||||||
|
|
||||||
|
# rel=canonical: no_rewrite
|
||||||
|
>>> parse('<link rel=canonical href="http://example.com/">', urlrewriter=no_base_canon_rewriter)
|
||||||
<link rel="canonical" href="http://example.com/">
|
<link rel="canonical" href="http://example.com/">
|
||||||
|
|
||||||
# doctype
|
# doctype
|
||||||
@ -143,7 +156,12 @@ import pprint
|
|||||||
|
|
||||||
urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/web/')
|
urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/web/')
|
||||||
|
|
||||||
def parse(data, head_insert = None):
|
no_base_canon_rewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html',
|
||||||
|
'/web/',
|
||||||
|
rewrite_opts=dict(rewrite_rel_canon=False,
|
||||||
|
rewrite_base=False))
|
||||||
|
|
||||||
|
def parse(data, head_insert=None, urlrewriter=urlrewriter):
|
||||||
parser = HTMLRewriter(urlrewriter, head_insert = head_insert)
|
parser = HTMLRewriter(urlrewriter, head_insert = head_insert)
|
||||||
#data = data.decode('utf-8')
|
#data = data.decode('utf-8')
|
||||||
result = parser.rewrite(data) + parser.close()
|
result = parser.rewrite(data) + parser.close()
|
||||||
|
@ -3,7 +3,7 @@ r"""
|
|||||||
# Custom Regex
|
# Custom Regex
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# Test https->http converter (other tests below in subclasses)
|
# Test https->http converter (other tests below in subclasses)
|
||||||
>>> RegexRewriter([(RegexRewriter.HTTPX_MATCH_STR, RegexRewriter.remove_https, 0)]).rewrite('a = https://example.com; b = http://example.com; c = https://some-url/path/https://embedded.example.com')
|
>>> RegexRewriter(urlrewriter, [(RegexRewriter.HTTPX_MATCH_STR, RegexRewriter.remove_https, 0)]).rewrite('a = https://example.com; b = http://example.com; c = https://some-url/path/https://embedded.example.com')
|
||||||
'a = http://example.com; b = http://example.com; c = http://some-url/path/http://embedded.example.com'
|
'a = http://example.com; b = http://example.com; c = http://some-url/path/http://embedded.example.com'
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@ urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.htm
|
|||||||
bn_urlrewriter = UrlRewriter('20131226101010bn_/http://example.com/some/path/index.html', '/pywb/')
|
bn_urlrewriter = UrlRewriter('20131226101010bn_/http://example.com/some/path/index.html', '/pywb/')
|
||||||
|
|
||||||
def head_insert_func(rule, cdx):
|
def head_insert_func(rule, cdx):
|
||||||
if rule.js_rewrite_location == True:
|
if rule.js_rewrite_location != 'urls':
|
||||||
return '<script src="/static/default/wombat.js"> </script>'
|
return '<script src="/static/default/wombat.js"> </script>'
|
||||||
else:
|
else:
|
||||||
return ''
|
return ''
|
||||||
@ -26,10 +26,10 @@ def test_local_1():
|
|||||||
'com,example,test)/')
|
'com,example,test)/')
|
||||||
|
|
||||||
# wombat insert added
|
# wombat insert added
|
||||||
assert '<head><script src="/static/default/wombat.js"> </script>' in buff
|
assert '<head><script src="/static/default/wombat.js"> </script>' in buff, buff
|
||||||
|
|
||||||
# location rewritten
|
# JS location and JS link rewritten
|
||||||
assert 'window.WB_wombat_location = "/other.html"' in buff
|
assert 'window.WB_wombat_location = "/pywb/20131226101010/http:\/\/example.com/dynamic_page.html"' in buff
|
||||||
|
|
||||||
# link rewritten
|
# link rewritten
|
||||||
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
||||||
@ -65,7 +65,7 @@ def test_local_no_head_banner_only():
|
|||||||
# link NOT rewritten
|
# link NOT rewritten
|
||||||
assert '"another.html"' in buff
|
assert '"another.html"' in buff
|
||||||
|
|
||||||
def test_local_banner_only():
|
def test_local_banner_only_no_rewrite():
|
||||||
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
|
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
|
||||||
bn_urlrewriter,
|
bn_urlrewriter,
|
||||||
head_insert_func,
|
head_insert_func,
|
||||||
@ -74,13 +74,13 @@ def test_local_banner_only():
|
|||||||
# wombat insert added
|
# wombat insert added
|
||||||
assert '<head><script src="/static/default/wombat.js"> </script>' in buff
|
assert '<head><script src="/static/default/wombat.js"> </script>' in buff
|
||||||
|
|
||||||
# location NOT rewritten
|
# JS location NOT rewritten, JS link NOT rewritten
|
||||||
assert 'window.location = "/other.html"' in buff
|
assert 'window.location = "http:\/\/example.com/dynamic_page.html"' in buff, buff
|
||||||
|
|
||||||
# link NOT rewritten
|
# link NOT rewritten
|
||||||
assert '"another.html"' in buff
|
assert '"another.html"' in buff
|
||||||
|
|
||||||
def test_local_2_no_js_location_rewrite():
|
def test_local_2_link_only_rewrite():
|
||||||
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
|
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
|
||||||
urlrewriter,
|
urlrewriter,
|
||||||
head_insert_func,
|
head_insert_func,
|
||||||
@ -89,13 +89,28 @@ def test_local_2_no_js_location_rewrite():
|
|||||||
# no wombat insert
|
# no wombat insert
|
||||||
assert '<head><script src="/static/default/wombat.js"> </script>' not in buff
|
assert '<head><script src="/static/default/wombat.js"> </script>' not in buff
|
||||||
|
|
||||||
# no location rewrite
|
# JS location NOT rewritten, JS link rewritten
|
||||||
assert 'window.location = "/other.html"' in buff
|
assert 'window.location = "/pywb/20131226101010/http:\/\/example.com/dynamic_page.html"' in buff
|
||||||
|
|
||||||
# still link rewrite
|
# still link rewrite
|
||||||
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
||||||
|
|
||||||
|
|
||||||
|
def test_local_2_js_loc_only_rewrite():
|
||||||
|
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
|
||||||
|
urlrewriter,
|
||||||
|
head_insert_func,
|
||||||
|
'example,example,test,loconly)/')
|
||||||
|
|
||||||
|
# wombat insert added
|
||||||
|
assert '<script src="/static/default/wombat.js"> </script>' in buff
|
||||||
|
|
||||||
|
# JS location rewritten, JS link NOT rewritten
|
||||||
|
assert 'window.WB_wombat_location = "http:\/\/example.com/dynamic_page.html"' in buff
|
||||||
|
|
||||||
|
# still link rewrite in HTML
|
||||||
|
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
||||||
|
|
||||||
def test_example_1():
|
def test_example_1():
|
||||||
status_headers, buff = get_rewritten('http://example.com/', urlrewriter, req_headers={'Connection': 'close'})
|
status_headers, buff = get_rewritten('http://example.com/', urlrewriter, req_headers={'Connection': 'close'})
|
||||||
|
|
||||||
|
@ -20,13 +20,14 @@ class UrlRewriter(object):
|
|||||||
REL_SCHEME = ('//', r'\/\/', r'\\/\\/')
|
REL_SCHEME = ('//', r'\/\/', r'\\/\\/')
|
||||||
|
|
||||||
def __init__(self, wburl, prefix, full_prefix=None, rel_prefix=None,
|
def __init__(self, wburl, prefix, full_prefix=None, rel_prefix=None,
|
||||||
root_path=None, cookie_scope=None):
|
root_path=None, cookie_scope=None, rewrite_opts={}):
|
||||||
self.wburl = wburl if isinstance(wburl, WbUrl) else WbUrl(wburl)
|
self.wburl = wburl if isinstance(wburl, WbUrl) else WbUrl(wburl)
|
||||||
self.prefix = prefix
|
self.prefix = prefix
|
||||||
self.full_prefix = full_prefix
|
self.full_prefix = full_prefix
|
||||||
self.rel_prefix = rel_prefix if rel_prefix else prefix
|
self.rel_prefix = rel_prefix if rel_prefix else prefix
|
||||||
self.root_path = root_path if root_path else '/'
|
self.root_path = root_path if root_path else '/'
|
||||||
self.cookie_scope = cookie_scope
|
self.cookie_scope = cookie_scope
|
||||||
|
self.rewrite_opts = rewrite_opts
|
||||||
|
|
||||||
def rewrite(self, url, mod=None):
|
def rewrite(self, url, mod=None):
|
||||||
# if special protocol, no rewriting at all
|
# if special protocol, no rewriting at all
|
||||||
|
@ -120,6 +120,11 @@ rules:
|
|||||||
# testing rules -- not for valid domain
|
# testing rules -- not for valid domain
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# this rule block is a non-existent prefix merely for testing
|
# this rule block is a non-existent prefix merely for testing
|
||||||
|
- url_prefix: 'example,example,test,loconly)/'
|
||||||
|
|
||||||
|
rewrite:
|
||||||
|
js_rewrite_location: location
|
||||||
|
|
||||||
- url_prefix: 'example,example,test)/'
|
- url_prefix: 'example,example,test)/'
|
||||||
|
|
||||||
canonicalize:
|
canonicalize:
|
||||||
@ -131,7 +136,7 @@ rules:
|
|||||||
- id
|
- id
|
||||||
|
|
||||||
rewrite:
|
rewrite:
|
||||||
js_rewrite_location: False
|
js_rewrite_location: urls
|
||||||
|
|
||||||
|
|
||||||
# all domain rules -- fallback to this dataset
|
# all domain rules -- fallback to this dataset
|
||||||
|
@ -35,6 +35,10 @@ function init_banner() {
|
|||||||
bid = PLAIN_BANNER_ID;
|
bid = PLAIN_BANNER_ID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!document || !document.body) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (document.getElementById(bid) != null) {
|
if (document.getElementById(bid) != null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -117,10 +121,21 @@ function notify_top() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
this.load = function() {
|
this.load = function() {
|
||||||
|
if (window._wb_js_inited) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
window._wb_js_inited = true;
|
||||||
|
|
||||||
if ((window.self == window.top) && wbinfo) {
|
if ((window.self == window.top) && wbinfo) {
|
||||||
if (wbinfo.top_url && (window.location.href != wbinfo.top_url) && wbinfo.mod != "bn_") {
|
|
||||||
|
var hash = window.location.hash;
|
||||||
|
|
||||||
|
var loc = window.location.href.replace(window.location.hash, "");
|
||||||
|
|
||||||
|
if (wbinfo.top_url && (loc != wbinfo.top_url) && wbinfo.mod != "bn_") {
|
||||||
// Auto-redirect to top frame
|
// Auto-redirect to top frame
|
||||||
window.location.replace(wbinfo.top_url);
|
window.location.replace(wbinfo.top_url + hash);
|
||||||
} else {
|
} else {
|
||||||
// Init Banner (no frame or top frame)
|
// Init Banner (no frame or top frame)
|
||||||
add_event("readystatechange", init_banner, document);
|
add_event("readystatechange", init_banner, document);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
<!-- WB Insert -->
|
<!-- WB Insert -->
|
||||||
{% if rule.js_rewrite_location and include_wombat %}
|
{% if rule.js_rewrite_location != 'urls' and include_wombat %}
|
||||||
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wombat.js'> </script>
|
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wombat.js'> </script>
|
||||||
<script>
|
<script>
|
||||||
{% set urlsplit = cdx.original | urlsplit %}
|
{% set urlsplit = cdx.original | urlsplit %}
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
<script>
|
<script>
|
||||||
var some_val = false;
|
var some_val = false;
|
||||||
if (some_val) {
|
if (some_val) {
|
||||||
window.location = "/other.html";
|
window.location = "http:\/\/example.com/dynamic_page.html";
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
Test Content
|
Test Content
|
||||||
|
2
setup.py
2
setup.py
@ -34,7 +34,7 @@ class PyTest(TestCommand):
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='pywb',
|
name='pywb',
|
||||||
version='0.6.5',
|
version='0.6.6',
|
||||||
url='https://github.com/ikreymer/pywb',
|
url='https://github.com/ikreymer/pywb',
|
||||||
author='Ilya Kreymer',
|
author='Ilya Kreymer',
|
||||||
author_email='ikreymer@gmail.com',
|
author_email='ikreymer@gmail.com',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user