mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Rewriting fixes for http-only cookies, bad content-length, and document with base (#386)
* rewriting fixes: server side: cookie rewriting: if httponly cookie with mp_/if_ modifier and path ends with '/', add set-cookie for all known modifiers content length parsing: improve content-length parsing to support 'content-length: num,num', parse out the first number (occasionally seen with range requests when range is dropped for upstream) wombat: rewrite_elem: use element.ownerDocument for resolving baseUri for parent paths tests: add tests for cookie all modifier rewrite, bad content-length parsing (skip for py2.7)
This commit is contained in:
parent
e6f00ce58d
commit
671dd2c204
@ -175,6 +175,12 @@ class RewriterApp(object):
|
||||
|
||||
content_length = (record.http_headers.
|
||||
get_header('Content-Length'))
|
||||
|
||||
if content_length is None:
|
||||
return
|
||||
|
||||
content_length = content_length.split(',')[0]
|
||||
|
||||
try:
|
||||
content_length = int(content_length)
|
||||
if not range_end:
|
||||
|
@ -26,10 +26,39 @@ class WbUrlBaseCookieRewriter(object):
|
||||
morsel = self.rewrite_cookie(name, morsel)
|
||||
|
||||
self._filter_morsel(morsel)
|
||||
results.append((header, morsel.OutputString()))
|
||||
|
||||
if not self.add_prefix_cookie_for_all_mods(morsel, results, header):
|
||||
value = morsel.OutputString()
|
||||
results.append((header, value))
|
||||
|
||||
return results
|
||||
|
||||
def add_prefix_cookie_for_all_mods(self, morsel, results, header):
|
||||
""" If HttpOnly cookie that is set to a path ending in /,
|
||||
and current mod is mp_ or if_,
|
||||
then assume its meant to be a prefix, and likely needed for
|
||||
other content.
|
||||
Set cookie with same prefix but for all common modifiers:
|
||||
(mp_, js_, cs_, oe_, if_)
|
||||
"""
|
||||
curr_mod = self.url_rewriter.wburl.mod
|
||||
if curr_mod not in ('mp_', 'if_'):
|
||||
return False
|
||||
|
||||
if not morsel.get('httponly'):
|
||||
return False
|
||||
|
||||
path = morsel.get('path')
|
||||
if not path or not path.endswith('/'):
|
||||
return False
|
||||
|
||||
for mod in ('mp_', 'cs_', 'js_', 'im_', 'oe_', 'if_'):
|
||||
new_path = path.replace(curr_mod + '/', mod + '/')
|
||||
morsel['path'] = new_path
|
||||
results.append((header, morsel.OutputString()))
|
||||
|
||||
return True
|
||||
|
||||
def _filter_morsel(self, morsel):
|
||||
path = morsel.get('path')
|
||||
if path:
|
||||
|
@ -21,6 +21,7 @@ import os
|
||||
import json
|
||||
import pytest
|
||||
import six
|
||||
import re
|
||||
|
||||
|
||||
# ============================================================================
|
||||
@ -277,6 +278,36 @@ class TestContentRewriter(object):
|
||||
|
||||
assert is_rw == False
|
||||
|
||||
def test_rewrite_cookies_all_mods(self):
|
||||
headers = {'Set-Cookie': 'foo=bar; Expires=Wed, 13 Jan 2021 22:23:01 GMT; Path=/some/path/; HttpOnly'}
|
||||
content = '\x11\x12\x13\x14'
|
||||
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701mp_')
|
||||
|
||||
mods = set()
|
||||
assert len(headers.headers) == 6
|
||||
for name, value in headers.headers:
|
||||
assert name == 'Set-Cookie'
|
||||
mods.add(re.search('Path=/prefix/201701([^/]+)', value).group(1))
|
||||
|
||||
assert mods == {'mp_', 'cs_', 'js_', 'im_', 'oe_', 'if_'}
|
||||
assert is_rw == False
|
||||
|
||||
def test_rewrite_http_cookie_no_all_mods_no_slash(self):
|
||||
headers = {'Set-Cookie': 'foo=bar; Expires=Wed, 13 Jan 2021 22:23:01 GMT; Path=/some/path; HttpOnly'}
|
||||
content = 'abcdefg'
|
||||
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701mp_')
|
||||
|
||||
assert len(headers.headers) == 1
|
||||
assert headers.headers[0][0] == 'Set-Cookie'
|
||||
|
||||
def test_rewrite_http_cookie_no_all_mods_wrong_mod(self):
|
||||
headers = {'Set-Cookie': 'foo=bar; Expires=Wed, 13 Jan 2021 22:23:01 GMT; Path=/some/path/; HttpOnly'}
|
||||
content = 'abcdefg'
|
||||
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701id_')
|
||||
|
||||
assert len(headers.headers) == 1
|
||||
assert headers.headers[0][0] == 'Set-Cookie'
|
||||
|
||||
def test_binary_no_content_type(self):
|
||||
headers = {}
|
||||
content = '\x11\x12\x13\x14'
|
||||
|
@ -251,8 +251,8 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
//============================================
|
||||
var rewrite_url = rewrite_url_;
|
||||
|
||||
function rewrite_url_debug(url, use_rel, mod) {
|
||||
var rewritten = rewrite_url_(url, use_rel, mod);
|
||||
function rewrite_url_debug(url, use_rel, mod, doc) {
|
||||
var rewritten = rewrite_url_(url, use_rel, mod, doc);
|
||||
if (url != rewritten) {
|
||||
console.log('REWRITE: ' + url + ' -> ' + rewritten);
|
||||
} else {
|
||||
@ -280,7 +280,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
|
||||
|
||||
//============================================
|
||||
function rewrite_url_(url, use_rel, mod) {
|
||||
function rewrite_url_(url, use_rel, mod, doc) {
|
||||
// If undefined, just return it
|
||||
if (!url) {
|
||||
return url;
|
||||
@ -369,7 +369,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
|
||||
// Use a parser
|
||||
if (url.charAt(0) == ".") {
|
||||
url = resolve_rel_url(url);
|
||||
url = resolve_rel_url(url, doc);
|
||||
}
|
||||
|
||||
// If full url starting with http://, https:// or //
|
||||
@ -1606,7 +1606,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
return;
|
||||
}
|
||||
var mod = rwModForElement(elem, name);
|
||||
new_value = rewrite_url(value, false, mod);
|
||||
new_value = rewrite_url(value, false, mod, elem.ownerDocument);
|
||||
}
|
||||
|
||||
if (new_value != value) {
|
||||
|
@ -108,7 +108,7 @@ class BaseLoader(object):
|
||||
# Try to set content-length, if it is available and valid
|
||||
try:
|
||||
content_len = int(content_len_str)
|
||||
except (KeyError, TypeError):
|
||||
except (ValueError, TypeError):
|
||||
content_len = -1
|
||||
|
||||
if content_len >= 0:
|
||||
|
@ -1,6 +1,7 @@
|
||||
from .base_config_test import BaseConfigTest, fmod_sl
|
||||
from pywb.warcserver.test.testutils import HttpBinLiveTests
|
||||
import pytest
|
||||
import sys
|
||||
|
||||
|
||||
# ============================================================================
|
||||
@ -37,6 +38,26 @@ class TestLiveRewriter(HttpBinLiveTests, BaseConfigTest):
|
||||
resp = self.head('/live/{0}httpbin.org/get?foo=bar', fmod_sl)
|
||||
assert resp.status_int == 200
|
||||
|
||||
@pytest.mark.skipif(sys.version_info < (3,0), reason='does not respond in 2.7')
|
||||
def test_live_bad_content_length(self, fmod_sl):
|
||||
resp = self.get('/live/{0}httpbin.org/response-headers?content-length=149,149', fmod_sl, status=200)
|
||||
assert resp.headers['Content-Length'] == '149'
|
||||
|
||||
resp = self.get('/live/{0}httpbin.org/response-headers?Content-Length=xyz', fmod_sl, status=200)
|
||||
assert resp.headers['Content-Length'] == '90'
|
||||
|
||||
@pytest.mark.skipif(sys.version_info < (3,0), reason='does not respond in 2.7')
|
||||
def test_live_bad_content_length_with_range(self, fmod_sl):
|
||||
resp = self.get('/live/{0}httpbin.org/response-headers?content-length=149,149', fmod_sl,
|
||||
headers={'Range': 'bytes=0-'}, status=206)
|
||||
assert resp.headers['Content-Length'] == '149'
|
||||
assert resp.headers['Content-Range'] == 'bytes 0-148/149'
|
||||
|
||||
resp = self.get('/live/{0}httpbin.org/response-headers?Content-Length=xyz', fmod_sl,
|
||||
headers={'Range': 'bytes=0-'}, status=206)
|
||||
assert resp.headers['Content-Length'] == '90'
|
||||
assert resp.headers['Content-Range'] == 'bytes 0-89/90'
|
||||
|
||||
def test_live_live_frame(self):
|
||||
resp = self.testapp.get('/live/http://example.com/')
|
||||
assert resp.status_int == 200
|
||||
|
Loading…
x
Reference in New Issue
Block a user