diff --git a/CHANGES.rst b/CHANGES.rst index f1f494ee..982b0bd2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,21 @@ pywb 0.8.0 changelist ~~~~~~~~~~~~~~~~~~~~~ +pywb 0.7.8 changelist +~~~~~~~~~~~~~~~~~~~~~ + +* live rewrite fix: When forwarding ``X-Forwarded-Proto`` header, set scheme to actual url scheme to avoid possible redirect loops (#57) + + +pywb 0.7.7 changelist +~~~~~~~~~~~~~~~~~~~~~ + +* client-side rewrite: improved rewriting of all style changes using mutation observers + +* rules: fix YT rewrite rule, add rule for wikimedia + +* cdx-indexer: minor cleanup, add support for custom writer for batched cdx (write_multi_cdx_index) + pywb 0.7.6 changelist ~~~~~~~~~~~~~~~~~~~~~ diff --git a/pywb/rewrite/rewrite_live.py b/pywb/rewrite/rewrite_live.py index e6860d22..dc43d51e 100644 --- a/pywb/rewrite/rewrite_live.py +++ b/pywb/rewrite/rewrite_live.py @@ -75,6 +75,10 @@ class LiveRewriter(object): elif name == 'HTTP_REFERER': continue + elif name == 'HTTP_X_FORWARDED_PROTO': + name = 'X-Forwarded-Proto' + value = splits.scheme + elif name == 'HTTP_COOKIE': name = 'Cookie' value = self._req_cookie_rewrite(urlkey, value) @@ -89,7 +93,7 @@ class LiveRewriter(object): elif name == 'REL_REFERER': name = 'Referer' else: - continue + value = None if value: headers[name] = value diff --git a/pywb/rewrite/test/test_rewrite_live.py b/pywb/rewrite/test/test_rewrite_live.py index 99fb6074..eb4d51d4 100644 --- a/pywb/rewrite/test/test_rewrite_live.py +++ b/pywb/rewrite/test/test_rewrite_live.py @@ -26,6 +26,14 @@ def test_csrf_token_headers(): assert req_headers == {'X-CSRFToken': 'foobar', 'Cookie': 'csrftoken=foobar'} +def test_forwarded_scheme(): + rewriter = LiveRewriter() + env = {'HTTP_X_FORWARDED_PROTO': 'https', 'Other': 'Value'} + + req_headers = rewriter.translate_headers('http://example.com/', 'com,example)/', env) + + assert req_headers == {'X-Forwarded-Proto': 'http'} + def test_req_cookie_rewrite_1(): rewriter = LiveRewriter() env = {'HTTP_COOKIE': 'A=B'} diff --git a/pywb/rules.yaml b/pywb/rules.yaml index 7490a078..f3016efd 100644 --- a/pywb/rules.yaml +++ b/pywb/rules.yaml @@ -65,7 +65,7 @@ rules: fuzzy_lookup: '()' - # instagram rules + # wikimedia rules #================================================================= - url_prefix: 'org,wikimedia,meta)/' @@ -144,7 +144,7 @@ rules: js_regexs: - match: 'window.location' replace: 'WB_wombat_location' - + # youtube rules #================================================================= @@ -213,7 +213,7 @@ rules: - match: 'ytplayer.load\(\);' replace: 'ytplayer.config.args.dash = "0"; ytplayer.config.args.dashmpd = ""; {0}' - - match: 'yt\.setConfig.*PLAYER_CONFIG.*args": {' + - match: 'yt\.setConfig.*PLAYER_CONFIG.*args":\s*{' replace: '{0} "dash": "0", dashmpd: "", ' req_cookie_rewrite: diff --git a/pywb/static/wombat.js b/pywb/static/wombat.js index 684fcc67..07fe6a4d 100644 --- a/pywb/static/wombat.js +++ b/pywb/static/wombat.js @@ -626,6 +626,36 @@ _WBWombat = (function() { window.Worker = undefined; } + + //============================================ + function init_mutation_obs() { + if (!window.MutationObserver) { + return; + } + + var m = new MutationObserver(function(records, observer) + { + for (var i = 0; i < records.length; i++) { + var r = records[i]; + if (r.type == "attributes" && r.attributeName == "style") { + var style = r.target.style.cssText; + if (style.indexOf("url(") > 0) { + var new_style = rewrite_style(style); + if (new_style != style) { + r.target.style.cssText = new_style; + } + } + } + } + }); + + m.observe(document.documentElement, {childList: false, + attributes: true, + subtree: true, + //attributeOldValue: true, + attributeFilter: ["style"]}); + } + //============================================ function rewrite_attr(elem, name, func) { if (!elem || !elem.getAttribute) { @@ -988,6 +1018,9 @@ _WBWombat = (function() { init_ajax_rewrite(); init_worker_override(); + // Init mutation observer (for style only) + init_mutation_obs(); + // setAttribute init_setAttribute_override(); diff --git a/pywb/warc/cdxindexer.py b/pywb/warc/cdxindexer.py index acd492f9..11cf76cd 100644 --- a/pywb/warc/cdxindexer.py +++ b/pywb/warc/cdxindexer.py @@ -107,6 +107,19 @@ def cdx_filename(filename): return remove_ext(filename) + '.cdx' +#================================================================= +def get_cdx_writer_cls(options): + writer_cls = options.get('writer_cls') + + if not writer_cls: + if options.get('sort'): + writer_cls = SortedCDXWriter + else: + writer_cls = CDXWriter + + return writer_cls + + #================================================================= def write_multi_cdx_index(output, inputs, **options): # write one cdx per dir @@ -117,7 +130,7 @@ def write_multi_cdx_index(output, inputs, **options): with open(outpath, 'wb') as outfile: with open(fullpath, 'rb') as infile: - write_cdx_index(outfile, infile, filename, **options) + return write_cdx_index(outfile, infile, filename, **options) # write to one cdx file else: @@ -126,10 +139,7 @@ def write_multi_cdx_index(output, inputs, **options): else: outfile = open(output, 'wb') - if options.get('sort'): - writer_cls = SortedCDXWriter - else: - writer_cls = CDXWriter + writer_cls = get_cdx_writer_cls(options) with writer_cls(outfile, options.get('cdx09')) as writer: for fullpath, filename in iter_file_or_dir(inputs): @@ -139,20 +149,15 @@ def write_multi_cdx_index(output, inputs, **options): for entry in entry_iter: writer.write(entry, filename) + return writer + #================================================================= def write_cdx_index(outfile, infile, filename, **options): - writer_cls = options.get('writer_cls') - if type(filename) is unicode: filename = filename.encode(sys.getfilesystemencoding()) - if writer_cls: - pass - elif options.get('sort'): - writer_cls = SortedCDXWriter - else: - writer_cls = CDXWriter + writer_cls = get_cdx_writer_cls(options) with writer_cls(outfile, options.get('cdx09')) as writer: entry_iter = create_index_iter(infile, **options)