From de403c457e8099a2849a72f37d260c5f0eb14030 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sat, 3 Jan 2015 13:51:47 -0800 Subject: [PATCH 1/8] update README for 0.7.2 master --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 1d9c8b57..ab3be784 100644 --- a/README.rst +++ b/README.rst @@ -1,10 +1,10 @@ PyWb 0.7.2 ========== -.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=develop +.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master :target: https://travis-ci.org/ikreymer/pywb -.. image:: https://coveralls.io/repos/ikreymer/pywb/badge.png?branch=develop - :target: https://coveralls.io/r/ikreymer/pywb?branch=develop +.. image:: https://coveralls.io/repos/ikreymer/pywb/badge.png?branch=master + :target: https://coveralls.io/r/ikreymer/pywb?branch=master .. image:: https://img.shields.io/gratipay/ikreymer.svg :target: https://www.gratipay.com/ikreymer/ From c47d3ca925292a093b203d3a42f6084079b1e781 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Tue, 3 Feb 2015 11:14:06 -0800 Subject: [PATCH 2/8] wombat: add mutation observers, addressing #71 and maybe #67 rules: fix regex for yt, add rx for wikimedia --- pywb/rules.yaml | 16 +++++++++++++--- pywb/static/wombat.js | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/pywb/rules.yaml b/pywb/rules.yaml index 325bbd9d..f3016efd 100644 --- a/pywb/rules.yaml +++ b/pywb/rules.yaml @@ -65,7 +65,17 @@ rules: fuzzy_lookup: '()' - # flickr rules + # wikimedia rules + #================================================================= + - url_prefix: 'org,wikimedia,meta)/' + + rewrite: + js_regexs: + - match: '\burl\((\\?/\\?/[^)]+)\)' + rewrite: true + group: 1 + + # flickr rules #================================================================= - url_prefix: ['com,yimg,l)/g/combo', 'com,yimg,s)/pw/combo', 'com,yahooapis,yui)/combo'] fuzzy_lookup: '([^/]+(?:\.css|\.js))' @@ -134,7 +144,7 @@ rules: js_regexs: - match: 'window.location' replace: 'WB_wombat_location' - + # youtube rules #================================================================= @@ -203,7 +213,7 @@ rules: - match: 'ytplayer.load\(\);' replace: 'ytplayer.config.args.dash = "0"; ytplayer.config.args.dashmpd = ""; {0}' - - match: 'yt\.setConfig.*PLAYER_CONFIG.*args": {' + - match: 'yt\.setConfig.*PLAYER_CONFIG.*args":\s*{' replace: '{0} "dash": "0", dashmpd: "", ' req_cookie_rewrite: diff --git a/pywb/static/wombat.js b/pywb/static/wombat.js index f0582b43..94587b0b 100644 --- a/pywb/static/wombat.js +++ b/pywb/static/wombat.js @@ -616,6 +616,36 @@ _WBWombat = (function() { window.Worker = undefined; } + + //============================================ + function init_mutation_obs() { + if (!window.MutationObserver) { + return; + } + + var m = new MutationObserver(function(records, observer) + { + for (var i = 0; i < records.length; i++) { + var r = records[i]; + if (r.type == "attributes" && r.attributeName == "style") { + var style = r.target.style.cssText; + if (style.indexOf("url(") > 0) { + var new_style = rewrite_style(style); + if (new_style != style) { + r.target.style.cssText = new_style; + } + } + } + } + }); + + m.observe(document.documentElement, {childList: false, + attributes: true, + subtree: true, + //attributeOldValue: true, + attributeFilter: ["style"]}); + } + //============================================ function rewrite_attr(elem, name, func) { if (!elem || !elem.getAttribute) { @@ -959,6 +989,9 @@ _WBWombat = (function() { init_ajax_rewrite(); init_worker_override(); + // Init mutation observer (for style only) + init_mutation_obs(); + // setAttribute init_setAttribute_override(); From ef98716bd8d3f52ec225869fa4cfb85171cf7971 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Tue, 3 Feb 2015 11:23:12 -0800 Subject: [PATCH 3/8] bump version to 0.7.7 in prep for release --- CHANGES.rst | 8 ++++++++ README.rst | 2 +- setup.py | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 19993af1..3bc77b51 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,11 @@ +pywb 0.7.7 changelist +~~~~~~~~~~~~~~~~~~~~~ + +* client-side rewrite: improved rewriting of all style changes using mutation observers + +* rules: fix YT rewrite rule, add rule for wikimedia + + pywb 0.7.6 changelist ~~~~~~~~~~~~~~~~~~~~~ diff --git a/README.rst b/README.rst index d0ec127d..71f39f6b 100644 --- a/README.rst +++ b/README.rst @@ -1,4 +1,4 @@ -PyWb 0.7.6 +PyWb 0.7.7 ========== .. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master diff --git a/setup.py b/setup.py index 2efb448b..3fa33c63 100755 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ class PyTest(TestCommand): setup( name='pywb', - version='0.7.6', + version='0.7.7', url='https://github.com/ikreymer/pywb', author='Ilya Kreymer', author_email='ikreymer@gmail.com', From 40fba3c27b2dc79e825af12fb23a3bd56948c680 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 4 Feb 2015 11:17:26 -0800 Subject: [PATCH 4/8] cdx-indexer: minor cleanup, add custom writer override to write_multi_cdx_index --- CHANGES.rst | 2 ++ pywb/warc/cdxindexer.py | 31 ++++++++++++++++++------------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 3bc77b51..d239eaee 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,8 @@ pywb 0.7.7 changelist * rules: fix YT rewrite rule, add rule for wikimedia +* cdx-indexer: minor cleanup, add support for custom writer for batched cdx (write_multi_cdx_index) + pywb 0.7.6 changelist ~~~~~~~~~~~~~~~~~~~~~ diff --git a/pywb/warc/cdxindexer.py b/pywb/warc/cdxindexer.py index acd492f9..11cf76cd 100644 --- a/pywb/warc/cdxindexer.py +++ b/pywb/warc/cdxindexer.py @@ -107,6 +107,19 @@ def cdx_filename(filename): return remove_ext(filename) + '.cdx' +#================================================================= +def get_cdx_writer_cls(options): + writer_cls = options.get('writer_cls') + + if not writer_cls: + if options.get('sort'): + writer_cls = SortedCDXWriter + else: + writer_cls = CDXWriter + + return writer_cls + + #================================================================= def write_multi_cdx_index(output, inputs, **options): # write one cdx per dir @@ -117,7 +130,7 @@ def write_multi_cdx_index(output, inputs, **options): with open(outpath, 'wb') as outfile: with open(fullpath, 'rb') as infile: - write_cdx_index(outfile, infile, filename, **options) + return write_cdx_index(outfile, infile, filename, **options) # write to one cdx file else: @@ -126,10 +139,7 @@ def write_multi_cdx_index(output, inputs, **options): else: outfile = open(output, 'wb') - if options.get('sort'): - writer_cls = SortedCDXWriter - else: - writer_cls = CDXWriter + writer_cls = get_cdx_writer_cls(options) with writer_cls(outfile, options.get('cdx09')) as writer: for fullpath, filename in iter_file_or_dir(inputs): @@ -139,20 +149,15 @@ def write_multi_cdx_index(output, inputs, **options): for entry in entry_iter: writer.write(entry, filename) + return writer + #================================================================= def write_cdx_index(outfile, infile, filename, **options): - writer_cls = options.get('writer_cls') - if type(filename) is unicode: filename = filename.encode(sys.getfilesystemencoding()) - if writer_cls: - pass - elif options.get('sort'): - writer_cls = SortedCDXWriter - else: - writer_cls = CDXWriter + writer_cls = get_cdx_writer_cls(options) with writer_cls(outfile, options.get('cdx09')) as writer: entry_iter = create_index_iter(infile, **options) From cdb3dcc3d2894a7bb828019458476bc8faac0a30 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 4 Feb 2015 14:19:37 -0800 Subject: [PATCH 5/8] rewrite_live: don't forward via or https_x headers, only standard (for now) possible fix for #57 --- pywb/rewrite/rewrite_live.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pywb/rewrite/rewrite_live.py b/pywb/rewrite/rewrite_live.py index e6860d22..99592786 100644 --- a/pywb/rewrite/rewrite_live.py +++ b/pywb/rewrite/rewrite_live.py @@ -75,6 +75,9 @@ class LiveRewriter(object): elif name == 'HTTP_REFERER': continue + elif name.startswith(('HTTP_X_', 'HTTP_VIA')): + continue + elif name == 'HTTP_COOKIE': name = 'Cookie' value = self._req_cookie_rewrite(urlkey, value) @@ -144,6 +147,8 @@ class LiveRewriter(object): else: data = input_ + print(url) + print(req_headers) response = requests.request(method=method, url=url, data=data, From 78812c80853cdf12c30a545df4403aa7faa6b44c Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 4 Feb 2015 15:17:23 -0800 Subject: [PATCH 6/8] rewrite: more conservative change, only rewrite the X-Forwarded-Proto header for now, #57 --- pywb/rewrite/rewrite_live.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pywb/rewrite/rewrite_live.py b/pywb/rewrite/rewrite_live.py index 99592786..ca2c04a0 100644 --- a/pywb/rewrite/rewrite_live.py +++ b/pywb/rewrite/rewrite_live.py @@ -75,8 +75,9 @@ class LiveRewriter(object): elif name == 'HTTP_REFERER': continue - elif name.startswith(('HTTP_X_', 'HTTP_VIA')): - continue + elif name == 'HTTP_X_FORWARDED_PROTO': + name = 'X-Forwarded-Proto' + value = splits.scheme elif name == 'HTTP_COOKIE': name = 'Cookie' @@ -147,8 +148,6 @@ class LiveRewriter(object): else: data = input_ - print(url) - print(req_headers) response = requests.request(method=method, url=url, data=data, From cc144fdeade6ea29623746695becfa02a617ab3c Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 4 Feb 2015 21:44:18 -0800 Subject: [PATCH 7/8] rewrite: add basic test for X-Forwarded-Proto #57 --- pywb/rewrite/rewrite_live.py | 2 +- pywb/rewrite/test/test_rewrite_live.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pywb/rewrite/rewrite_live.py b/pywb/rewrite/rewrite_live.py index ca2c04a0..dc43d51e 100644 --- a/pywb/rewrite/rewrite_live.py +++ b/pywb/rewrite/rewrite_live.py @@ -93,7 +93,7 @@ class LiveRewriter(object): elif name == 'REL_REFERER': name = 'Referer' else: - continue + value = None if value: headers[name] = value diff --git a/pywb/rewrite/test/test_rewrite_live.py b/pywb/rewrite/test/test_rewrite_live.py index 99fb6074..eb4d51d4 100644 --- a/pywb/rewrite/test/test_rewrite_live.py +++ b/pywb/rewrite/test/test_rewrite_live.py @@ -26,6 +26,14 @@ def test_csrf_token_headers(): assert req_headers == {'X-CSRFToken': 'foobar', 'Cookie': 'csrftoken=foobar'} +def test_forwarded_scheme(): + rewriter = LiveRewriter() + env = {'HTTP_X_FORWARDED_PROTO': 'https', 'Other': 'Value'} + + req_headers = rewriter.translate_headers('http://example.com/', 'com,example)/', env) + + assert req_headers == {'X-Forwarded-Proto': 'http'} + def test_req_cookie_rewrite_1(): rewriter = LiveRewriter() env = {'HTTP_COOKIE': 'A=B'} From 384e68c84b4cd1e4e124f721e07d18fa42d017a3 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 4 Feb 2015 21:46:57 -0800 Subject: [PATCH 8/8] bump version to 0.7.8 for latest fix --- CHANGES.rst | 6 ++++++ README.rst | 2 +- setup.py | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index d239eaee..40dcf42d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,9 @@ +pywb 0.7.8 changelist +~~~~~~~~~~~~~~~~~~~~~ + +* live rewrite fix: When forwarding ``X-Forwarded-Proto`` header, set scheme to actual url scheme to avoid possible redirect loops (#57) + + pywb 0.7.7 changelist ~~~~~~~~~~~~~~~~~~~~~ diff --git a/README.rst b/README.rst index 71f39f6b..d680ae23 100644 --- a/README.rst +++ b/README.rst @@ -1,4 +1,4 @@ -PyWb 0.7.7 +PyWb 0.7.8 ========== .. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master diff --git a/setup.py b/setup.py index 3fa33c63..f0eedcea 100755 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ class PyTest(TestCommand): setup( name='pywb', - version='0.7.7', + version='0.7.8', url='https://github.com/ikreymer/pywb', author='Ilya Kreymer', author_email='ikreymer@gmail.com',