diff --git a/CHANGES.rst b/CHANGES.rst
index 0fe9ae07..6a4f8439 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,3 +1,20 @@
+pywb 0.6.6 changelist
+~~~~~~~~~~~~~~~~~~~~~
+
+* JS client side improvements: check for double-inits, preserve anchor in wb.js top location redirect
+
+* JS Rewriters: add mixins for link + location (default), link only, location only rewriting by setting ``js_rewrite_location`` to ``all``, ``urls``, ``location``, respectively.
+
+ (New: location only rewriting does not change JS urls)
+
+* Beginning of new rewrite options, settable per collections and stored in UrlRewriter. Available options:
+
+ - ``rewrite_base`` - set to False to disable rewriting ```` tag
+ - ``rewrite_rel_canon`` - set to false to disable rewriting ````
+
+* JS rewrite: Don't rewrite location if starting with '$'
+
+
pywb 0.6.5 changelist
~~~~~~~~~~~~~~~~~~~~~
@@ -40,17 +57,17 @@ pywb 0.6.3 changelist
pywb 0.6.2 changelist
~~~~~~~~~~~~~~~~~~~~~
-* Invert framed replay paradigm: Canonical page is always without a modifier (instead of with `mp_`), if using frames, the page redirects to `tf_`, and uses replaceState() to change url back to canonical form.
+* Invert framed replay paradigm: Canonical page is always without a modifier (instead of with ``mp_``), if using frames, the page redirects to ``tf_``, and uses replaceState() to change url back to canonical form.
* Enable Memento support for framed replay, include Memento headers in top frame
-* Easier to customize just the banner html, via `banner_html` setting in the config. Default banner uses ui/banner.html and inserts the script default_banner.js, which creates the banner.
+* Easier to customize just the banner html, via ``banner_html`` setting in the config. Default banner uses ui/banner.html and inserts the script default_banner.js, which creates the banner.
- Other implementations may create banner via custom JS or directly insert HTML, as needed. Setting `banner_html: False` will disable the banner.
+ Other implementations may create banner via custom JS or directly insert HTML, as needed. Setting ``banner_html: False`` will disable the banner.
* Small improvements to streaming response, read in fixed chunks to allow better streaming from live.
-* Improved cookie and csrf-token rewriting, including: ability to set `cookie_scope: root` per collection to have all replayed cookies have their Path set to application root.
+* Improved cookie and csrf-token rewriting, including: ability to set ``cookie_scope: root`` per collection to have all replayed cookies have their Path set to application root.
This is useful for replaying sites which share cookies amongst different pages and across archived time ranges.
diff --git a/README.rst b/README.rst
index 5150c8f8..29d50bfb 100644
--- a/README.rst
+++ b/README.rst
@@ -1,4 +1,4 @@
-PyWb 0.6.5
+PyWb 0.6.6
==========
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master
@@ -44,7 +44,7 @@ This README contains a basic overview of using pywb. After reading this intro, c
pywb Tools Overview
-----------------------------
-In addition to the standard wayback machine (explained further below), pywb tool suite includes a
+In addition to the standard wayback machine (explained further below), pywb tool suite includes a
number of useful command-line and web server tools. The tools should be available to run after
running ``python setup.py install``:
@@ -58,10 +58,10 @@ running ``python setup.py install``:
for all options.
-* ``cdx-server`` -- a CDX API only server which returns a responses about CDX captures in bulk.
+* ``cdx-server`` -- a CDX API only server which returns a responses about CDX captures in bulk.
Includes most of the features of the `original cdx server implementation `_,
updated documentation coming soon.
-
+
* ``proxy-cert-auth`` -- a utility to support proxy mode. It can be used in CA root certificate, or per-host certificate with an existing root cert.
@@ -151,7 +151,7 @@ If you would like to use non-SURT ordered .cdx files, simply add this field to t
::
surt_ordered: false
-
+
UI Customization
"""""""""""""""""""""
diff --git a/pywb/framework/archivalrouter.py b/pywb/framework/archivalrouter.py
index 33230027..3b0b5a6d 100644
--- a/pywb/framework/archivalrouter.py
+++ b/pywb/framework/archivalrouter.py
@@ -62,7 +62,8 @@ class ArchivalRouter(object):
use_abs_prefix=use_abs_prefix,
wburl_class=route.handler.get_wburl_type(),
urlrewriter_class=UrlRewriter,
- cookie_scope=route.cookie_scope)
+ cookie_scope=route.cookie_scope,
+ rewrite_opts=route.rewrite_opts)
# Allow for applying of additional filters
route.apply_filters(wbrequest, matcher)
@@ -101,6 +102,7 @@ class Route(object):
# collection id from regex group (default 0)
self.coll_group = coll_group
self.cookie_scope = config.get('cookie_scope')
+ self.rewrite_opts = config.get('rewrite_opts', {})
self._custom_init(config)
def is_handling(self, request_uri):
diff --git a/pywb/framework/wbrequestresponse.py b/pywb/framework/wbrequestresponse.py
index 06970316..7c48dbb3 100644
--- a/pywb/framework/wbrequestresponse.py
+++ b/pywb/framework/wbrequestresponse.py
@@ -38,7 +38,8 @@ class WbRequest(object):
wburl_class=None,
urlrewriter_class=None,
is_proxy=False,
- cookie_scope=None):
+ cookie_scope=None,
+ rewrite_opts={}):
self.env = env
@@ -77,7 +78,8 @@ class WbRequest(object):
host_prefix + rel_prefix,
rel_prefix,
env.get('SCRIPT_NAME', '/'),
- cookie_scope)
+ cookie_scope,
+ rewrite_opts)
self.urlrewriter.deprefix_url()
else:
diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py
index 618c5191..cae65a89 100644
--- a/pywb/rewrite/html_rewriter.py
+++ b/pywb/rewrite/html_rewriter.py
@@ -92,6 +92,9 @@ class HTMLRewriterMixin(object):
self.rewrite_tags = self._init_rewrite_tags(defmod)
+ # get opts from urlrewriter
+ self.opts = url_rewriter.rewrite_opts
+
# ===========================
META_REFRESH_REGEX = re.compile('^[\\d.]+\\s*;\\s*url\\s*=\\s*(.+?)\\s*$',
re.IGNORECASE | re.MULTILINE)
@@ -174,9 +177,11 @@ class HTMLRewriterMixin(object):
elif attr_name == 'crossorigin':
attr_name = '_crossorigin'
- # special case: link don't rewrite canonical
+ # special case: if rewrite_canon not set,
+ # don't rewrite rel=canonical
elif tag == 'link' and attr_name == 'href':
- if not self.has_attr(tag_attrs, ('rel', 'canonical')):
+ if (self.opts.get('rewrite_rel_canon', True) or
+ not self.has_attr(tag_attrs, ('rel', 'canonical'))):
rw_mod = handler.get(attr_name)
attr_value = self._rewrite_url(attr_value, rw_mod)
@@ -191,17 +196,21 @@ class HTMLRewriterMixin(object):
rw_mod = 'oe_'
attr_value = self._rewrite_url(attr_value, rw_mod)
+ # special case: base tag
+ elif (tag == 'base') and (attr_name == 'href') and attr_value:
+ rw_mod = handler.get(attr_name)
+ base_value = self._rewrite_url(attr_value, rw_mod)
+ if self.opts.get('rewrite_base', True):
+ attr_value = base_value
+ self.url_rewriter = (self.url_rewriter.
+ rebase_rewriter(base_value))
+
else:
# rewrite url using tag handler
rw_mod = handler.get(attr_name)
if rw_mod is not None:
attr_value = self._rewrite_url(attr_value, rw_mod)
- # special case: base tag
- if (tag == 'base') and (attr_name == 'href') and attr_value:
- self.url_rewriter = (self.url_rewriter.
- rebase_rewriter(attr_value))
-
# write the attr!
self._write_attr(attr_name, attr_value)
diff --git a/pywb/rewrite/regex_rewriters.py b/pywb/rewrite/regex_rewriters.py
index 179e06fd..5d680068 100644
--- a/pywb/rewrite/regex_rewriters.py
+++ b/pywb/rewrite/regex_rewriters.py
@@ -35,7 +35,7 @@ class RegexRewriter(object):
#DEFAULT_OP = add_prefix
- def __init__(self, rules):
+ def __init__(self, rewriter, rules):
#rules = self.create_rules(http_prefix)
# Build regexstr, concatenating regex list
@@ -106,7 +106,7 @@ class RegexRewriter(object):
#=================================================================
-class JSLinkOnlyRewriter(RegexRewriter):
+class JSLinkRewriterMixin(object):
"""
JS Rewriter which rewrites absolute http://, https:// and // urls
at the beginning of a string
@@ -118,19 +118,20 @@ class JSLinkOnlyRewriter(RegexRewriter):
rules = rules + [
(self.JS_HTTPX, RegexRewriter.archival_rewrite(rewriter), 0)
]
- super(JSLinkOnlyRewriter, self).__init__(rules)
+ super(JSLinkRewriterMixin, self).__init__(rewriter, rules)
#=================================================================
-class JSLinkAndLocationRewriter(JSLinkOnlyRewriter):
+class JSLocationRewriterMixin(object):
+#class JSLinkAndLocationRewriter(JSLinkOnlyRewriter):
"""
- JS Rewriter which also rewrites location and domain to the
+ JS Rewriter mixin which rewrites location and domain to the
specified prefix (default: 'WB_wombat_')
"""
def __init__(self, rewriter, rules=[], prefix='WB_wombat_'):
rules = rules + [
- (r'(?>> parse('')
#
-# Base Tests
+# Base Tests -- w/ rewrite (default)
>>> parse('')
>>> parse('')
+# Base Tests -- no rewrite
+>>> parse('', urlrewriter=no_base_canon_rewriter)
+
+
+>>> parse('', urlrewriter=no_base_canon_rewriter)
+
+
+
+
# HTML Entities
>>> parse('› > ?')
› > ?
@@ -102,8 +111,12 @@ ur"""
>>> parse('