From 6440e2503f3fcc4534922641dcf5b134a33bdc57 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sat, 6 Dec 2014 15:22:57 -0800 Subject: [PATCH 1/8] bump version to 0.6.6 --- README.rst | 10 +++++----- setup.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index 30e9979e..43f1bfde 100644 --- a/README.rst +++ b/README.rst @@ -1,4 +1,4 @@ -PyWb 0.6.5 +PyWb 0.6.6 ========== .. image:: https://travis-ci.org/ikreymer/pywb.png?branch=develop @@ -44,7 +44,7 @@ This README contains a basic overview of using pywb. After reading this intro, c pywb Tools Overview ----------------------------- -In addition to the standard wayback machine (explained further below), pywb tool suite includes a +In addition to the standard wayback machine (explained further below), pywb tool suite includes a number of useful command-line and web server tools. The tools should be available to run after running ``python setup.py install``: @@ -58,10 +58,10 @@ running ``python setup.py install``: for all options. -* ``cdx-server`` -- a CDX API only server which returns a responses about CDX captures in bulk. +* ``cdx-server`` -- a CDX API only server which returns a responses about CDX captures in bulk. Includes most of the features of the `original cdx server implementation `_, updated documentation coming soon. - + * ``proxy-cert-auth`` -- a utility to support proxy mode. It can be used in CA root certificate, or per-host certificate with an existing root cert. @@ -151,7 +151,7 @@ If you would like to use non-SURT ordered .cdx files, simply add this field to t :: surt_ordered: false - + UI Customization """"""""""""""""""""" diff --git a/setup.py b/setup.py index b5ef2d26..4a36b078 100755 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ class PyTest(TestCommand): setup( name='pywb', - version='0.6.5', + version='0.6.6', url='https://github.com/ikreymer/pywb', author='Ilya Kreymer', author_email='ikreymer@gmail.com', From 0495423e86204f461bcb0336e4f6cde0a7cda61b Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sat, 6 Dec 2014 17:16:35 -0800 Subject: [PATCH 2/8] rewrite: add per-collection rewrite options, settable in 'rewrite_opts' block in each collection. Added rewrite_base to disable rewriting tag and rewrite_rel_canon to disable rewriting link rel=canon. Disabling tag rewrite fixex #51 and new system addresses #50 as well. --- CHANGES.rst | 11 +++++++++++ pywb/framework/archivalrouter.py | 4 +++- pywb/framework/wbrequestresponse.py | 6 ++++-- pywb/rewrite/html_rewriter.py | 23 ++++++++++++++++------- pywb/rewrite/regex_rewriters.py | 2 +- pywb/rewrite/test/test_html_rewriter.py | 24 +++++++++++++++++++++--- pywb/rewrite/url_rewriter.py | 3 ++- 7 files changed, 58 insertions(+), 15 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 0fe9ae07..0be413f1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,14 @@ +pywb 0.6.6 changelist +~~~~~~~~~~~~~~~~~~~~~ + +* Beginning of new rewrite options, settable per collections and stored in UrlRewriter. Available options: + + - `rewrite_base` - set to False to disable rewriting `` tag + - `rewrite_rel_canon` - set to false to disable rewriting `` + +* JS rewrite: Don't rewrite location if starting with '$' + + pywb 0.6.5 changelist ~~~~~~~~~~~~~~~~~~~~~ diff --git a/pywb/framework/archivalrouter.py b/pywb/framework/archivalrouter.py index 33230027..3b0b5a6d 100644 --- a/pywb/framework/archivalrouter.py +++ b/pywb/framework/archivalrouter.py @@ -62,7 +62,8 @@ class ArchivalRouter(object): use_abs_prefix=use_abs_prefix, wburl_class=route.handler.get_wburl_type(), urlrewriter_class=UrlRewriter, - cookie_scope=route.cookie_scope) + cookie_scope=route.cookie_scope, + rewrite_opts=route.rewrite_opts) # Allow for applying of additional filters route.apply_filters(wbrequest, matcher) @@ -101,6 +102,7 @@ class Route(object): # collection id from regex group (default 0) self.coll_group = coll_group self.cookie_scope = config.get('cookie_scope') + self.rewrite_opts = config.get('rewrite_opts', {}) self._custom_init(config) def is_handling(self, request_uri): diff --git a/pywb/framework/wbrequestresponse.py b/pywb/framework/wbrequestresponse.py index 06970316..7c48dbb3 100644 --- a/pywb/framework/wbrequestresponse.py +++ b/pywb/framework/wbrequestresponse.py @@ -38,7 +38,8 @@ class WbRequest(object): wburl_class=None, urlrewriter_class=None, is_proxy=False, - cookie_scope=None): + cookie_scope=None, + rewrite_opts={}): self.env = env @@ -77,7 +78,8 @@ class WbRequest(object): host_prefix + rel_prefix, rel_prefix, env.get('SCRIPT_NAME', '/'), - cookie_scope) + cookie_scope, + rewrite_opts) self.urlrewriter.deprefix_url() else: diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py index 618c5191..cae65a89 100644 --- a/pywb/rewrite/html_rewriter.py +++ b/pywb/rewrite/html_rewriter.py @@ -92,6 +92,9 @@ class HTMLRewriterMixin(object): self.rewrite_tags = self._init_rewrite_tags(defmod) + # get opts from urlrewriter + self.opts = url_rewriter.rewrite_opts + # =========================== META_REFRESH_REGEX = re.compile('^[\\d.]+\\s*;\\s*url\\s*=\\s*(.+?)\\s*$', re.IGNORECASE | re.MULTILINE) @@ -174,9 +177,11 @@ class HTMLRewriterMixin(object): elif attr_name == 'crossorigin': attr_name = '_crossorigin' - # special case: link don't rewrite canonical + # special case: if rewrite_canon not set, + # don't rewrite rel=canonical elif tag == 'link' and attr_name == 'href': - if not self.has_attr(tag_attrs, ('rel', 'canonical')): + if (self.opts.get('rewrite_rel_canon', True) or + not self.has_attr(tag_attrs, ('rel', 'canonical'))): rw_mod = handler.get(attr_name) attr_value = self._rewrite_url(attr_value, rw_mod) @@ -191,17 +196,21 @@ class HTMLRewriterMixin(object): rw_mod = 'oe_' attr_value = self._rewrite_url(attr_value, rw_mod) + # special case: base tag + elif (tag == 'base') and (attr_name == 'href') and attr_value: + rw_mod = handler.get(attr_name) + base_value = self._rewrite_url(attr_value, rw_mod) + if self.opts.get('rewrite_base', True): + attr_value = base_value + self.url_rewriter = (self.url_rewriter. + rebase_rewriter(base_value)) + else: # rewrite url using tag handler rw_mod = handler.get(attr_name) if rw_mod is not None: attr_value = self._rewrite_url(attr_value, rw_mod) - # special case: base tag - if (tag == 'base') and (attr_name == 'href') and attr_value: - self.url_rewriter = (self.url_rewriter. - rebase_rewriter(attr_value)) - # write the attr! self._write_attr(attr_name, attr_value) diff --git a/pywb/rewrite/regex_rewriters.py b/pywb/rewrite/regex_rewriters.py index 179e06fd..375bca08 100644 --- a/pywb/rewrite/regex_rewriters.py +++ b/pywb/rewrite/regex_rewriters.py @@ -130,7 +130,7 @@ class JSLinkAndLocationRewriter(JSLinkOnlyRewriter): def __init__(self, rewriter, rules=[], prefix='WB_wombat_'): rules = rules + [ - (r'(?>> parse('') # -# Base Tests +# Base Tests -- w/ rewrite (default) >>> parse('') >>> parse('') +# Base Tests -- no rewrite +>>> parse('', urlrewriter=no_base_canon_rewriter) + + +>>> parse('', urlrewriter=no_base_canon_rewriter) + + + + # HTML Entities >>> parse('›   > ?') ›   > ? @@ -102,8 +111,12 @@ ur""" >>> parse('
SomeTest
', head_insert = '')
SomeTest
-# don't rewrite rel=canonical +# rel=canonical: rewrite (default) >>> parse('') + + +# rel=canonical: no_rewrite +>>> parse('', urlrewriter=no_base_canon_rewriter) # doctype @@ -143,7 +156,12 @@ import pprint urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/web/') -def parse(data, head_insert = None): +no_base_canon_rewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', + '/web/', + rewrite_opts=dict(rewrite_rel_canon=False, + rewrite_base=False)) + +def parse(data, head_insert=None, urlrewriter=urlrewriter): parser = HTMLRewriter(urlrewriter, head_insert = head_insert) #data = data.decode('utf-8') result = parser.rewrite(data) + parser.close() diff --git a/pywb/rewrite/url_rewriter.py b/pywb/rewrite/url_rewriter.py index aa87260c..a5cc7952 100644 --- a/pywb/rewrite/url_rewriter.py +++ b/pywb/rewrite/url_rewriter.py @@ -20,13 +20,14 @@ class UrlRewriter(object): REL_SCHEME = ('//', r'\/\/', r'\\/\\/') def __init__(self, wburl, prefix, full_prefix=None, rel_prefix=None, - root_path=None, cookie_scope=None): + root_path=None, cookie_scope=None, rewrite_opts={}): self.wburl = wburl if isinstance(wburl, WbUrl) else WbUrl(wburl) self.prefix = prefix self.full_prefix = full_prefix self.rel_prefix = rel_prefix if rel_prefix else prefix self.root_path = root_path if root_path else '/' self.cookie_scope = cookie_scope + self.rewrite_opts = rewrite_opts def rewrite(self, url, mod=None): # if special protocol, no rewriting at all From b951b304b65d5f5bc51c7bb8afcb46b9e14574b6 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sat, 6 Dec 2014 17:28:43 -0800 Subject: [PATCH 3/8] Fix quotes in CHANGES.rst --- CHANGES.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 0be413f1..398ace92 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -3,8 +3,8 @@ pywb 0.6.6 changelist * Beginning of new rewrite options, settable per collections and stored in UrlRewriter. Available options: - - `rewrite_base` - set to False to disable rewriting `` tag - - `rewrite_rel_canon` - set to false to disable rewriting `` + - ``rewrite_base`` - set to False to disable rewriting ```` tag + - ``rewrite_rel_canon`` - set to false to disable rewriting ```` * JS rewrite: Don't rewrite location if starting with '$' @@ -51,17 +51,17 @@ pywb 0.6.3 changelist pywb 0.6.2 changelist ~~~~~~~~~~~~~~~~~~~~~ -* Invert framed replay paradigm: Canonical page is always without a modifier (instead of with `mp_`), if using frames, the page redirects to `tf_`, and uses replaceState() to change url back to canonical form. +* Invert framed replay paradigm: Canonical page is always without a modifier (instead of with ``mp_``), if using frames, the page redirects to ``tf_``, and uses replaceState() to change url back to canonical form. * Enable Memento support for framed replay, include Memento headers in top frame -* Easier to customize just the banner html, via `banner_html` setting in the config. Default banner uses ui/banner.html and inserts the script default_banner.js, which creates the banner. +* Easier to customize just the banner html, via ``banner_html`` setting in the config. Default banner uses ui/banner.html and inserts the script default_banner.js, which creates the banner. - Other implementations may create banner via custom JS or directly insert HTML, as needed. Setting `banner_html: False` will disable the banner. + Other implementations may create banner via custom JS or directly insert HTML, as needed. Setting ``banner_html: False`` will disable the banner. * Small improvements to streaming response, read in fixed chunks to allow better streaming from live. -* Improved cookie and csrf-token rewriting, including: ability to set `cookie_scope: root` per collection to have all replayed cookies have their Path set to application root. +* Improved cookie and csrf-token rewriting, including: ability to set ``cookie_scope: root`` per collection to have all replayed cookies have their Path set to application root. This is useful for replaying sites which share cookies amongst different pages and across archived time ranges. From 0a6838ac2b98618d0a0cc7419dd5698b7e7e45ac Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 7 Dec 2014 21:09:37 -0800 Subject: [PATCH 4/8] rewrite: refactor JS rewriters into seperate mixins, allowing for link only, location only, and link + location JS rewriters. location-only rewriter is new js_rewrite_location options: all, location, urls (for now) --- pywb/rewrite/regex_rewriters.py | 33 +++++++++++++++------ pywb/rewrite/rewriterules.py | 9 +++--- pywb/rewrite/test/test_regex_rewriters.py | 2 +- pywb/rewrite/test/test_rewrite_live.py | 35 ++++++++++++++++------- pywb/rules.yaml | 9 ++++-- pywb/ui/head_insert.html | 2 +- sample_archive/text_content/sample.html | 2 +- 7 files changed, 65 insertions(+), 27 deletions(-) diff --git a/pywb/rewrite/regex_rewriters.py b/pywb/rewrite/regex_rewriters.py index 375bca08..5d680068 100644 --- a/pywb/rewrite/regex_rewriters.py +++ b/pywb/rewrite/regex_rewriters.py @@ -35,7 +35,7 @@ class RegexRewriter(object): #DEFAULT_OP = add_prefix - def __init__(self, rules): + def __init__(self, rewriter, rules): #rules = self.create_rules(http_prefix) # Build regexstr, concatenating regex list @@ -106,7 +106,7 @@ class RegexRewriter(object): #================================================================= -class JSLinkOnlyRewriter(RegexRewriter): +class JSLinkRewriterMixin(object): """ JS Rewriter which rewrites absolute http://, https:// and // urls at the beginning of a string @@ -118,13 +118,14 @@ class JSLinkOnlyRewriter(RegexRewriter): rules = rules + [ (self.JS_HTTPX, RegexRewriter.archival_rewrite(rewriter), 0) ] - super(JSLinkOnlyRewriter, self).__init__(rules) + super(JSLinkRewriterMixin, self).__init__(rewriter, rules) #================================================================= -class JSLinkAndLocationRewriter(JSLinkOnlyRewriter): +class JSLocationRewriterMixin(object): +#class JSLinkAndLocationRewriter(JSLinkOnlyRewriter): """ - JS Rewriter which also rewrites location and domain to the + JS Rewriter mixin which rewrites location and domain to the specified prefix (default: 'WB_wombat_') """ @@ -148,7 +149,23 @@ class JSLinkAndLocationRewriter(JSLinkOnlyRewriter): #(r'\b(?:self|window)\b[!=\W]+\b(top)\b', #RegexRewriter.add_prefix(prefix), 1), ] - super(JSLinkAndLocationRewriter, self).__init__(rewriter, rules) + super(JSLocationRewriterMixin, self).__init__(rewriter, rules) + + +#================================================================= +class JSLocationOnlyRewriter(JSLocationRewriterMixin, RegexRewriter): + pass + + +#================================================================= +class JSLinkOnlyRewriter(JSLinkRewriterMixin, RegexRewriter): + pass + +#================================================================= +class JSLinkAndLocationRewriter(JSLocationRewriterMixin, + JSLinkRewriterMixin, + RegexRewriter): + pass #================================================================= @@ -161,7 +178,7 @@ class XMLRewriter(RegexRewriter): def __init__(self, rewriter, extra=[]): rules = self._create_rules(rewriter) - super(XMLRewriter, self).__init__(rules) + super(XMLRewriter, self).__init__(rewriter, rules) # custom filter to reject 'xmlns' attr def filter(self, m): @@ -189,7 +206,7 @@ class CSSRewriter(RegexRewriter): def __init__(self, rewriter): rules = self._create_rules(rewriter) - super(CSSRewriter, self).__init__(rules) + super(CSSRewriter, self).__init__(rewriter, rules) def _create_rules(self, rewriter): return [ diff --git a/pywb/rewrite/rewriterules.py b/pywb/rewrite/rewriterules.py index 5bc99e3a..583115f7 100644 --- a/pywb/rewrite/rewriterules.py +++ b/pywb/rewrite/rewriterules.py @@ -1,7 +1,7 @@ from pywb.utils.dsrules import BaseRule from regex_rewriters import RegexRewriter, CSSRewriter, XMLRewriter -from regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter +from regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter, JSLocationOnlyRewriter from header_rewriter import HeaderRewriter from html_rewriter import HTMLRewriter @@ -27,12 +27,13 @@ class RewriteRules(BaseRule): self.parse_comments = config.get('parse_comments', False) # Custom handling for js rewriting, often the most complex - self.js_rewrite_location = config.get('js_rewrite_location', True) - self.js_rewrite_location = bool(self.js_rewrite_location) + self.js_rewrite_location = config.get('js_rewrite_location', 'all') # ability to toggle rewriting - if self.js_rewrite_location: + if self.js_rewrite_location == 'all': js_default_class = JSLinkAndLocationRewriter + elif self.js_rewrite_location == 'location': + js_default_class = JSLocationOnlyRewriter else: js_default_class = JSLinkOnlyRewriter diff --git a/pywb/rewrite/test/test_regex_rewriters.py b/pywb/rewrite/test/test_regex_rewriters.py index 92975a7f..19ea5eb6 100644 --- a/pywb/rewrite/test/test_regex_rewriters.py +++ b/pywb/rewrite/test/test_regex_rewriters.py @@ -3,7 +3,7 @@ r""" # Custom Regex #================================================================= # Test https->http converter (other tests below in subclasses) ->>> RegexRewriter([(RegexRewriter.HTTPX_MATCH_STR, RegexRewriter.remove_https, 0)]).rewrite('a = https://example.com; b = http://example.com; c = https://some-url/path/https://embedded.example.com') +>>> RegexRewriter(urlrewriter, [(RegexRewriter.HTTPX_MATCH_STR, RegexRewriter.remove_https, 0)]).rewrite('a = https://example.com; b = http://example.com; c = https://some-url/path/https://embedded.example.com') 'a = http://example.com; b = http://example.com; c = http://some-url/path/http://embedded.example.com' diff --git a/pywb/rewrite/test/test_rewrite_live.py b/pywb/rewrite/test/test_rewrite_live.py index b54138fa..3ea189a4 100644 --- a/pywb/rewrite/test/test_rewrite_live.py +++ b/pywb/rewrite/test/test_rewrite_live.py @@ -13,7 +13,7 @@ urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.htm bn_urlrewriter = UrlRewriter('20131226101010bn_/http://example.com/some/path/index.html', '/pywb/') def head_insert_func(rule, cdx): - if rule.js_rewrite_location == True: + if rule.js_rewrite_location != 'urls': return '' else: return '' @@ -26,10 +26,10 @@ def test_local_1(): 'com,example,test)/') # wombat insert added - assert '' in buff + assert '' in buff, buff - # location rewritten - assert 'window.WB_wombat_location = "/other.html"' in buff + # JS location and JS link rewritten + assert 'window.WB_wombat_location = "/pywb/20131226101010/http:\/\/example.com/dynamic_page.html"' in buff # link rewritten assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff @@ -65,7 +65,7 @@ def test_local_no_head_banner_only(): # link NOT rewritten assert '"another.html"' in buff -def test_local_banner_only(): +def test_local_banner_only_no_rewrite(): status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html', bn_urlrewriter, head_insert_func, @@ -74,13 +74,13 @@ def test_local_banner_only(): # wombat insert added assert '' in buff - # location NOT rewritten - assert 'window.location = "/other.html"' in buff + # JS location NOT rewritten, JS link NOT rewritten + assert 'window.location = "http:\/\/example.com/dynamic_page.html"' in buff, buff # link NOT rewritten assert '"another.html"' in buff -def test_local_2_no_js_location_rewrite(): +def test_local_2_link_only_rewrite(): status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html', urlrewriter, head_insert_func, @@ -89,13 +89,28 @@ def test_local_2_no_js_location_rewrite(): # no wombat insert assert '' not in buff - # no location rewrite - assert 'window.location = "/other.html"' in buff + # JS location NOT rewritten, JS link rewritten + assert 'window.location = "/pywb/20131226101010/http:\/\/example.com/dynamic_page.html"' in buff # still link rewrite assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff +def test_local_2_js_loc_only_rewrite(): + status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html', + urlrewriter, + head_insert_func, + 'example,example,test,loconly)/') + + # wombat insert added + assert '' in buff + + # JS location rewritten, JS link NOT rewritten + assert 'window.WB_wombat_location = "http:\/\/example.com/dynamic_page.html"' in buff + + # still link rewrite in HTML + assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff + def test_example_1(): status_headers, buff = get_rewritten('http://example.com/', urlrewriter, req_headers={'Connection': 'close'}) diff --git a/pywb/rules.yaml b/pywb/rules.yaml index aa80717e..b55d8278 100644 --- a/pywb/rules.yaml +++ b/pywb/rules.yaml @@ -120,6 +120,11 @@ rules: # testing rules -- not for valid domain #================================================================= # this rule block is a non-existent prefix merely for testing + - url_prefix: 'example,example,test,loconly)/' + + rewrite: + js_rewrite_location: location + - url_prefix: 'example,example,test)/' canonicalize: @@ -131,10 +136,10 @@ rules: - id rewrite: - js_rewrite_location: False + js_rewrite_location: urls - # all domain rules -- fallback to this dataset + # all domain rules -- fallback to this dataset #================================================================= # Applies to all urls -- should be last - url_prefix: '' diff --git a/pywb/ui/head_insert.html b/pywb/ui/head_insert.html index 4e53a5d0..812bcfe0 100644 --- a/pywb/ui/head_insert.html +++ b/pywb/ui/head_insert.html @@ -1,5 +1,5 @@ -{% if rule.js_rewrite_location and include_wombat %} +{% if rule.js_rewrite_location != 'urls' and include_wombat %} Test Content From 5129c83095cdb9ee1285e30bdc7b881aecba00a2 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 7 Dec 2014 21:15:35 -0800 Subject: [PATCH 5/8] update changelist --- CHANGES.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 398ace92..b3b2bf6d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,10 @@ pywb 0.6.6 changelist ~~~~~~~~~~~~~~~~~~~~~ +* JS Rewriters: add mixins for link + location (default), link only, location only rewriting by setting ``js_rewrite_location`` to ``all``, ``urls``, ``location``, respectively. + + (New: location only rewriting does not change JS urls) + * Beginning of new rewrite options, settable per collections and stored in UrlRewriter. Available options: - ``rewrite_base`` - set to False to disable rewriting ```` tag From 6d5592f890cfc7b6427fc428b701b89fae24d8d5 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 7 Dec 2014 23:33:21 -0800 Subject: [PATCH 6/8] wb js: add guard around wb js to prevent double-inits (such as from fragments!) --- pywb/static/wb.js | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/pywb/static/wb.js b/pywb/static/wb.js index 00592bbf..f6b2fb41 100644 --- a/pywb/static/wb.js +++ b/pywb/static/wb.js @@ -35,10 +35,14 @@ function init_banner() { bid = PLAIN_BANNER_ID; } + if (!document || !document.body) { + return; + } + if (document.getElementById(bid) != null) { return; } - + _wb_js.create_banner_element(bid); } @@ -56,14 +60,14 @@ this.ts_to_date = function(ts, is_gmt) if (ts.length < 14) { return ts; } - - var datestr = (ts.substring(0, 4) + "-" + + + var datestr = (ts.substring(0, 4) + "-" + ts.substring(4, 6) + "-" + ts.substring(6, 8) + "T" + ts.substring(8, 10) + ":" + ts.substring(10, 12) + ":" + ts.substring(12, 14) + "-00:00"); - + var date = new Date(datestr); if (is_gmt) { return date.toGMTString(); @@ -117,6 +121,12 @@ function notify_top() { } this.load = function() { + if (window._wb_js_inited) { + return; + } + + window._wb_js_inited = true; + if ((window.self == window.top) && wbinfo) { if (wbinfo.top_url && (window.location.href != wbinfo.top_url) && wbinfo.mod != "bn_") { // Auto-redirect to top frame From 80c5b45178100f4931ba5fb4dedf41668715c5b7 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sat, 13 Dec 2014 23:12:42 -0800 Subject: [PATCH 7/8] wb.js: don't include anchor when comparing current url with expected top url --- pywb/static/wb.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pywb/static/wb.js b/pywb/static/wb.js index f6b2fb41..1974c59f 100644 --- a/pywb/static/wb.js +++ b/pywb/static/wb.js @@ -128,7 +128,10 @@ this.load = function() { window._wb_js_inited = true; if ((window.self == window.top) && wbinfo) { - if (wbinfo.top_url && (window.location.href != wbinfo.top_url) && wbinfo.mod != "bn_") { + + var loc = window.location.href.replace(window.location.hash, ""); + + if (wbinfo.top_url && (loc != wbinfo.top_url) && wbinfo.mod != "bn_") { // Auto-redirect to top frame window.location.replace(wbinfo.top_url); } else { From 09861ada2ce1bdc6ca1c567672500e74219c62b7 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sat, 13 Dec 2014 23:19:04 -0800 Subject: [PATCH 8/8] wb.js: preserve anchor in location redirect update CHANGELIST --- CHANGES.rst | 2 ++ pywb/static/wb.js | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index b3b2bf6d..6a4f8439 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,8 @@ pywb 0.6.6 changelist ~~~~~~~~~~~~~~~~~~~~~ +* JS client side improvements: check for double-inits, preserve anchor in wb.js top location redirect + * JS Rewriters: add mixins for link + location (default), link only, location only rewriting by setting ``js_rewrite_location`` to ``all``, ``urls``, ``location``, respectively. (New: location only rewriting does not change JS urls) diff --git a/pywb/static/wb.js b/pywb/static/wb.js index 1974c59f..ea14c9fa 100644 --- a/pywb/static/wb.js +++ b/pywb/static/wb.js @@ -129,11 +129,13 @@ this.load = function() { if ((window.self == window.top) && wbinfo) { + var hash = window.location.hash; + var loc = window.location.href.replace(window.location.hash, ""); if (wbinfo.top_url && (loc != wbinfo.top_url) && wbinfo.mod != "bn_") { // Auto-redirect to top frame - window.location.replace(wbinfo.top_url); + window.location.replace(wbinfo.top_url + hash); } else { // Init Banner (no frame or top frame) add_event("readystatechange", init_banner, document);