1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Merge branch 'master' for 0.7.8 into develop

This commit is contained in:
Ilya Kreymer 2015-02-05 08:45:55 -08:00
commit 78ae86b6b6
6 changed files with 82 additions and 17 deletions

View File

@ -1,6 +1,21 @@
pywb 0.8.0 changelist
~~~~~~~~~~~~~~~~~~~~~
pywb 0.7.8 changelist
~~~~~~~~~~~~~~~~~~~~~
* live rewrite fix: When forwarding ``X-Forwarded-Proto`` header, set scheme to actual url scheme to avoid possible redirect loops (#57)
pywb 0.7.7 changelist
~~~~~~~~~~~~~~~~~~~~~
* client-side rewrite: improved rewriting of all style changes using mutation observers
* rules: fix YT rewrite rule, add rule for wikimedia
* cdx-indexer: minor cleanup, add support for custom writer for batched cdx (write_multi_cdx_index)
pywb 0.7.6 changelist
~~~~~~~~~~~~~~~~~~~~~

View File

@ -75,6 +75,10 @@ class LiveRewriter(object):
elif name == 'HTTP_REFERER':
continue
elif name == 'HTTP_X_FORWARDED_PROTO':
name = 'X-Forwarded-Proto'
value = splits.scheme
elif name == 'HTTP_COOKIE':
name = 'Cookie'
value = self._req_cookie_rewrite(urlkey, value)
@ -89,7 +93,7 @@ class LiveRewriter(object):
elif name == 'REL_REFERER':
name = 'Referer'
else:
continue
value = None
if value:
headers[name] = value

View File

@ -26,6 +26,14 @@ def test_csrf_token_headers():
assert req_headers == {'X-CSRFToken': 'foobar', 'Cookie': 'csrftoken=foobar'}
def test_forwarded_scheme():
rewriter = LiveRewriter()
env = {'HTTP_X_FORWARDED_PROTO': 'https', 'Other': 'Value'}
req_headers = rewriter.translate_headers('http://example.com/', 'com,example)/', env)
assert req_headers == {'X-Forwarded-Proto': 'http'}
def test_req_cookie_rewrite_1():
rewriter = LiveRewriter()
env = {'HTTP_COOKIE': 'A=B'}

View File

@ -65,7 +65,7 @@ rules:
fuzzy_lookup: '()'
# instagram rules
# wikimedia rules
#=================================================================
- url_prefix: 'org,wikimedia,meta)/'
@ -144,7 +144,7 @@ rules:
js_regexs:
- match: 'window.location'
replace: 'WB_wombat_location'
# youtube rules
#=================================================================
@ -213,7 +213,7 @@ rules:
- match: 'ytplayer.load\(\);'
replace: 'ytplayer.config.args.dash = "0"; ytplayer.config.args.dashmpd = ""; {0}'
- match: 'yt\.setConfig.*PLAYER_CONFIG.*args": {'
- match: 'yt\.setConfig.*PLAYER_CONFIG.*args":\s*{'
replace: '{0} "dash": "0", dashmpd: "", '
req_cookie_rewrite:

View File

@ -626,6 +626,36 @@ _WBWombat = (function() {
window.Worker = undefined;
}
//============================================
function init_mutation_obs() {
if (!window.MutationObserver) {
return;
}
var m = new MutationObserver(function(records, observer)
{
for (var i = 0; i < records.length; i++) {
var r = records[i];
if (r.type == "attributes" && r.attributeName == "style") {
var style = r.target.style.cssText;
if (style.indexOf("url(") > 0) {
var new_style = rewrite_style(style);
if (new_style != style) {
r.target.style.cssText = new_style;
}
}
}
}
});
m.observe(document.documentElement, {childList: false,
attributes: true,
subtree: true,
//attributeOldValue: true,
attributeFilter: ["style"]});
}
//============================================
function rewrite_attr(elem, name, func) {
if (!elem || !elem.getAttribute) {
@ -988,6 +1018,9 @@ _WBWombat = (function() {
init_ajax_rewrite();
init_worker_override();
// Init mutation observer (for style only)
init_mutation_obs();
// setAttribute
init_setAttribute_override();

View File

@ -107,6 +107,19 @@ def cdx_filename(filename):
return remove_ext(filename) + '.cdx'
#=================================================================
def get_cdx_writer_cls(options):
writer_cls = options.get('writer_cls')
if not writer_cls:
if options.get('sort'):
writer_cls = SortedCDXWriter
else:
writer_cls = CDXWriter
return writer_cls
#=================================================================
def write_multi_cdx_index(output, inputs, **options):
# write one cdx per dir
@ -117,7 +130,7 @@ def write_multi_cdx_index(output, inputs, **options):
with open(outpath, 'wb') as outfile:
with open(fullpath, 'rb') as infile:
write_cdx_index(outfile, infile, filename, **options)
return write_cdx_index(outfile, infile, filename, **options)
# write to one cdx file
else:
@ -126,10 +139,7 @@ def write_multi_cdx_index(output, inputs, **options):
else:
outfile = open(output, 'wb')
if options.get('sort'):
writer_cls = SortedCDXWriter
else:
writer_cls = CDXWriter
writer_cls = get_cdx_writer_cls(options)
with writer_cls(outfile, options.get('cdx09')) as writer:
for fullpath, filename in iter_file_or_dir(inputs):
@ -139,20 +149,15 @@ def write_multi_cdx_index(output, inputs, **options):
for entry in entry_iter:
writer.write(entry, filename)
return writer
#=================================================================
def write_cdx_index(outfile, infile, filename, **options):
writer_cls = options.get('writer_cls')
if type(filename) is unicode:
filename = filename.encode(sys.getfilesystemencoding())
if writer_cls:
pass
elif options.get('sort'):
writer_cls = SortedCDXWriter
else:
writer_cls = CDXWriter
writer_cls = get_cdx_writer_cls(options)
with writer_cls(outfile, options.get('cdx09')) as writer:
entry_iter = create_index_iter(infile, **options)