diff --git a/pywb/apps/rewriterapp.py b/pywb/apps/rewriterapp.py index f596b189..60655aaa 100644 --- a/pywb/apps/rewriterapp.py +++ b/pywb/apps/rewriterapp.py @@ -175,6 +175,12 @@ class RewriterApp(object): content_length = (record.http_headers. get_header('Content-Length')) + + if content_length is None: + return + + content_length = content_length.split(',')[0] + try: content_length = int(content_length) if not range_end: diff --git a/pywb/rewrite/cookie_rewriter.py b/pywb/rewrite/cookie_rewriter.py index 53dd622c..1cfd8a99 100644 --- a/pywb/rewrite/cookie_rewriter.py +++ b/pywb/rewrite/cookie_rewriter.py @@ -26,10 +26,39 @@ class WbUrlBaseCookieRewriter(object): morsel = self.rewrite_cookie(name, morsel) self._filter_morsel(morsel) - results.append((header, morsel.OutputString())) + + if not self.add_prefix_cookie_for_all_mods(morsel, results, header): + value = morsel.OutputString() + results.append((header, value)) return results + def add_prefix_cookie_for_all_mods(self, morsel, results, header): + """ If HttpOnly cookie that is set to a path ending in /, + and current mod is mp_ or if_, + then assume its meant to be a prefix, and likely needed for + other content. + Set cookie with same prefix but for all common modifiers: + (mp_, js_, cs_, oe_, if_) + """ + curr_mod = self.url_rewriter.wburl.mod + if curr_mod not in ('mp_', 'if_'): + return False + + if not morsel.get('httponly'): + return False + + path = morsel.get('path') + if not path or not path.endswith('/'): + return False + + for mod in ('mp_', 'cs_', 'js_', 'im_', 'oe_', 'if_'): + new_path = path.replace(curr_mod + '/', mod + '/') + morsel['path'] = new_path + results.append((header, morsel.OutputString())) + + return True + def _filter_morsel(self, morsel): path = morsel.get('path') if path: diff --git a/pywb/rewrite/test/test_content_rewriter.py b/pywb/rewrite/test/test_content_rewriter.py index 60cbd265..385896e0 100644 --- a/pywb/rewrite/test/test_content_rewriter.py +++ b/pywb/rewrite/test/test_content_rewriter.py @@ -21,6 +21,7 @@ import os import json import pytest import six +import re # ============================================================================ @@ -277,6 +278,36 @@ class TestContentRewriter(object): assert is_rw == False + def test_rewrite_cookies_all_mods(self): + headers = {'Set-Cookie': 'foo=bar; Expires=Wed, 13 Jan 2021 22:23:01 GMT; Path=/some/path/; HttpOnly'} + content = '\x11\x12\x13\x14' + headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701mp_') + + mods = set() + assert len(headers.headers) == 6 + for name, value in headers.headers: + assert name == 'Set-Cookie' + mods.add(re.search('Path=/prefix/201701([^/]+)', value).group(1)) + + assert mods == {'mp_', 'cs_', 'js_', 'im_', 'oe_', 'if_'} + assert is_rw == False + + def test_rewrite_http_cookie_no_all_mods_no_slash(self): + headers = {'Set-Cookie': 'foo=bar; Expires=Wed, 13 Jan 2021 22:23:01 GMT; Path=/some/path; HttpOnly'} + content = 'abcdefg' + headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701mp_') + + assert len(headers.headers) == 1 + assert headers.headers[0][0] == 'Set-Cookie' + + def test_rewrite_http_cookie_no_all_mods_wrong_mod(self): + headers = {'Set-Cookie': 'foo=bar; Expires=Wed, 13 Jan 2021 22:23:01 GMT; Path=/some/path/; HttpOnly'} + content = 'abcdefg' + headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701id_') + + assert len(headers.headers) == 1 + assert headers.headers[0][0] == 'Set-Cookie' + def test_binary_no_content_type(self): headers = {} content = '\x11\x12\x13\x14' diff --git a/pywb/static/wombat.js b/pywb/static/wombat.js index 1109ab0a..f6cefc4e 100644 --- a/pywb/static/wombat.js +++ b/pywb/static/wombat.js @@ -251,8 +251,8 @@ var _WBWombat = function($wbwindow, wbinfo) { //============================================ var rewrite_url = rewrite_url_; - function rewrite_url_debug(url, use_rel, mod) { - var rewritten = rewrite_url_(url, use_rel, mod); + function rewrite_url_debug(url, use_rel, mod, doc) { + var rewritten = rewrite_url_(url, use_rel, mod, doc); if (url != rewritten) { console.log('REWRITE: ' + url + ' -> ' + rewritten); } else { @@ -280,7 +280,7 @@ var _WBWombat = function($wbwindow, wbinfo) { //============================================ - function rewrite_url_(url, use_rel, mod) { + function rewrite_url_(url, use_rel, mod, doc) { // If undefined, just return it if (!url) { return url; @@ -369,7 +369,7 @@ var _WBWombat = function($wbwindow, wbinfo) { // Use a parser if (url.charAt(0) == ".") { - url = resolve_rel_url(url); + url = resolve_rel_url(url, doc); } // If full url starting with http://, https:// or // @@ -1606,7 +1606,7 @@ var _WBWombat = function($wbwindow, wbinfo) { return; } var mod = rwModForElement(elem, name); - new_value = rewrite_url(value, false, mod); + new_value = rewrite_url(value, false, mod, elem.ownerDocument); } if (new_value != value) { diff --git a/pywb/warcserver/resource/responseloader.py b/pywb/warcserver/resource/responseloader.py index e6470176..6aecbb80 100644 --- a/pywb/warcserver/resource/responseloader.py +++ b/pywb/warcserver/resource/responseloader.py @@ -108,7 +108,7 @@ class BaseLoader(object): # Try to set content-length, if it is available and valid try: content_len = int(content_len_str) - except (KeyError, TypeError): + except (ValueError, TypeError): content_len = -1 if content_len >= 0: diff --git a/tests/test_live_rewriter.py b/tests/test_live_rewriter.py index 08c6ef71..68d29cb2 100644 --- a/tests/test_live_rewriter.py +++ b/tests/test_live_rewriter.py @@ -1,6 +1,7 @@ from .base_config_test import BaseConfigTest, fmod_sl from pywb.warcserver.test.testutils import HttpBinLiveTests import pytest +import sys # ============================================================================ @@ -37,6 +38,26 @@ class TestLiveRewriter(HttpBinLiveTests, BaseConfigTest): resp = self.head('/live/{0}httpbin.org/get?foo=bar', fmod_sl) assert resp.status_int == 200 + @pytest.mark.skipif(sys.version_info < (3,0), reason='does not respond in 2.7') + def test_live_bad_content_length(self, fmod_sl): + resp = self.get('/live/{0}httpbin.org/response-headers?content-length=149,149', fmod_sl, status=200) + assert resp.headers['Content-Length'] == '149' + + resp = self.get('/live/{0}httpbin.org/response-headers?Content-Length=xyz', fmod_sl, status=200) + assert resp.headers['Content-Length'] == '90' + + @pytest.mark.skipif(sys.version_info < (3,0), reason='does not respond in 2.7') + def test_live_bad_content_length_with_range(self, fmod_sl): + resp = self.get('/live/{0}httpbin.org/response-headers?content-length=149,149', fmod_sl, + headers={'Range': 'bytes=0-'}, status=206) + assert resp.headers['Content-Length'] == '149' + assert resp.headers['Content-Range'] == 'bytes 0-148/149' + + resp = self.get('/live/{0}httpbin.org/response-headers?Content-Length=xyz', fmod_sl, + headers={'Range': 'bytes=0-'}, status=206) + assert resp.headers['Content-Length'] == '90' + assert resp.headers['Content-Range'] == 'bytes 0-89/90' + def test_live_live_frame(self): resp = self.testapp.get('/live/http://example.com/') assert resp.status_int == 200