diff --git a/pywb/rewrite/test/test_cookie_rewriter.py b/pywb/rewrite/test/test_cookie_rewriter.py index fa799787..c6a7780d 100644 --- a/pywb/rewrite/test/test_cookie_rewriter.py +++ b/pywb/rewrite/test/test_cookie_rewriter.py @@ -22,10 +22,10 @@ True [('Set-Cookie', 'some=value; Path=/pywb/')] >>> rewrite_cookie('abc=def; Path=file.html; Expires=Wed, 13 Jan 2021 22:23:01 GMT', urlrewriter, 'coll') -[('Set-Cookie', 'abc=def; Path=/pywb/20131226101010/http://example.com/some/path/file.html')] +[('Set-Cookie', 'abc=def; Path=file.html')] # keep Max-Age ->>> rewrite_cookie('abc=def; Path=file.html; Max-Age=1500', urlrewriter2, 'coll') +>>> rewrite_cookie('abc=def; Path=/file.html; Max-Age=1500', urlrewriter2, 'coll') [('Set-Cookie', 'abc=def; Max-Age=1500; Path=/preview/em_/http://example.com/file.html')] # Cookie with invalid chars, not parsed @@ -92,14 +92,14 @@ def rewrite_cookie(cookie_str, rewriter=urlrewriter, scope='default'): @pytest.mark.skipif(sys.version_info < (2,7), reason='Unsupported') def test_with_expires(): # keep expires - res = rewrite_cookie('abc=def; Path=file.html; Expires=Wed, 13 Jan 2021 22:23:01 GMT', urlrewriter2, 'coll') + res = rewrite_cookie('abc=def; Path=/file.html; Expires=Wed, 13 Jan 2021 22:23:01 GMT', urlrewriter2, 'coll') assert len(res) == 1 assert res[0][1].lower() == 'abc=def; expires=wed, 13 jan 2021 22:23:01 gmt; path=/preview/em_/http://example.com/file.html' @pytest.mark.skipif(sys.version_info < (2,7), reason='Unsupported') def test_with_expires_utc_replace(): # keep expires, UTC->GMT - res = rewrite_cookie('abc=def; Path=file.html; Expires=Wed, 13 Jan 2021 22:23:01 UTC', urlrewriter2, 'coll') + res = rewrite_cookie('abc=def; Path=/file.html; Expires=Wed, 13 Jan 2021 22:23:01 UTC', urlrewriter2, 'coll') assert len(res) == 1 assert res[0][1].lower() == 'abc=def; expires=wed, 13 jan 2021 22:23:01 gmt; path=/preview/em_/http://example.com/file.html' @@ -113,14 +113,14 @@ def test_http_secure_flag(): @pytest.mark.skipif(sys.version_info < (2,7), reason='Unsupported') def test_secure_flag_remove(): # Secure Remove - res = rewrite_cookie('abc=def; Path=file.html; HttpOnly; Secure', urlrewriter2, 'coll') + res = rewrite_cookie('abc=def; Path=/file.html; HttpOnly; Secure', urlrewriter2, 'coll') assert len(res) == 1 assert res[0][1].lower() == 'abc=def; httponly; path=/preview/em_/http://example.com/file.html' @pytest.mark.skipif(sys.version_info < (2,7), reason='Unsupported') def test_secure_flag_keep(): # Secure Keep - res = rewrite_cookie('abc=def; Path=file.html; HttpOnly; Secure', urlrewriter3, 'coll') + res = rewrite_cookie('abc=def; Path=/file.html; HttpOnly; Secure', urlrewriter3, 'coll') assert res[0][1].lower() == 'abc=def; httponly; path=/preview/em_/http://example.com/file.html; secure' diff --git a/pywb/rewrite/test/test_header_rewriter.py b/pywb/rewrite/test/test_header_rewriter.py index fc2146d7..e58c6d84 100644 --- a/pywb/rewrite/test/test_header_rewriter.py +++ b/pywb/rewrite/test/test_header_rewriter.py @@ -78,7 +78,7 @@ def _test_head_data(headers, status='200 OK', rewriter=urlrewriter): def test_cookie_headers(): # cookie, host/origin rewriting res = _test_head_data([('Connection', 'close'), - ('Set-Cookie', 'foo=bar; Path=/; abc=def; Path=somefile.html'), + ('Set-Cookie', 'foo=bar; Path=/; abc=def; Path=/somefile.html'), ('Host', 'example.com'), ('Origin', 'https://example.com')]) diff --git a/pywb/rewrite/test/test_html_rewriter.py b/pywb/rewrite/test/test_html_rewriter.py index 04c42f73..d8087555 100644 --- a/pywb/rewrite/test/test_html_rewriter.py +++ b/pywb/rewrite/test/test_html_rewriter.py @@ -8,7 +8,7 @@ r""" #================================================================= >>> parse('Text') -Text +Text >>> parse('
')
@@ -35,8 +35,8 @@ r""" >>> parse('', urlrewriter=full_path_urlrewriter) ->>> parse('') - +>>> parse('') + # ensure trailing slash added >>> parse('') @@ -47,7 +47,7 @@ r""" >>> parse('', urlrewriter=no_base_canon_rewriter) - + # Empty url >>> parse('') @@ -58,7 +58,7 @@ r""" # href on other tags >>> parse('
Text
') -
Text
+
Text
# HTML Entities >>> parse('›   > ?') @@ -148,10 +148,10 @@ r""" >>> parse('
') -
+
->>> parse('
') -
+>>> parse('
') +
>>> parse('') @@ -169,19 +169,19 @@ r""" # Style ->>> parse('') - +>>> parse('') + # Unterminated style tag, handle and auto-terminate >>> parse(' + # Head Insertion ->>> parse('Test', head_insert = '') -Test +>>> parse('Test', head_insert = '') +Test >>> parse('', head_insert = '') - + >>> parse('Test', head_insert = '') Test @@ -189,7 +189,7 @@ r""" >>> parse('
SomeTest
', head_insert = '/* Insert */') /* Insert */
SomeTest
->>> parse('
SomeTest
', head_insert = '') +>>> parse('
SomeTest
', head_insert = '')
SomeTest
>>> parse('Some Text without any tags ', head_insert = '') @@ -236,7 +236,7 @@ r""" # remove extra spaces >>> parse('
Text') -Text +Text >>> parse('Text') Text diff --git a/pywb/rewrite/test/test_regex_rewriters.py b/pywb/rewrite/test/test_regex_rewriters.py index 2f508cd8..2762b9d0 100644 --- a/pywb/rewrite/test/test_regex_rewriters.py +++ b/pywb/rewrite/test/test_regex_rewriters.py @@ -151,7 +151,7 @@ r""" 'background: url(" /web/20131010/http://domain.com/path.html x ")' >>> _test_css("background: url(file.jpeg)") -'background: url(/web/20131010/http://example.com/file.jpeg)' +'background: url(file.jpeg)' >>> _test_css("background:#abc url('/static/styles/../images/layout/logo.png')") "background:#abc url('/web/20131010/http://example.com/static/images/layout/logo.png')" @@ -163,18 +163,18 @@ r""" "background: url('')" >>> _test_css("background: url (\"weirdpath\')") -'background: url ("/web/20131010/http://example.com/weirdpath\')' +'background: url ("weirdpath\')' ->>> _test_css("@import url ('path.css')") +>>> _test_css("@import url ('/path.css')") "@import url ('/web/20131010/http://example.com/path.css')" >>> _test_css("@import url('path.css')") -"@import url('/web/20131010/http://example.com/path.css')" +"@import url('path.css')" >>> _test_css("@import ( 'path.css')") -"@import ( '/web/20131010/http://example.com/path.css')" +"@import ( 'path.css')" ->>> _test_css("@import \"path.css\"") +>>> _test_css("@import \"/path.css\"") '@import "/web/20131010/http://example.com/path.css"' >>> _test_css("@import ('../path.css\"") @@ -184,7 +184,7 @@ r""" '@import (\'/web/20131010/http://example.com/url.css"' >>> _test_css("@import (\"url.css\")") -'@import ("/web/20131010/http://example.com/url.css")' +'@import ("url.css")' >>> _test_css("@import url(/url.css)\n@import url(/anotherurl.css)\n @import url(/and_a_third.css)") '@import url(/web/20131010/http://example.com/url.css)\n@import url(/web/20131010/http://example.com/anotherurl.css)\n @import url(/web/20131010/http://example.com/and_a_third.css)' diff --git a/pywb/rewrite/test/test_rewrite_live.py b/pywb/rewrite/test/test_rewrite_live.py index d3ffc3d8..62fa3bf9 100644 --- a/pywb/rewrite/test/test_rewrite_live.py +++ b/pywb/rewrite/test/test_rewrite_live.py @@ -123,7 +123,7 @@ def test_local_no_head_banner_only(): assert 'window.location = "/other.html"' in buff # link NOT rewritten - assert '"another.html"' in buff + assert '"/some/path/another.html"' in buff def test_local_banner_only_no_rewrite(): status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html', @@ -138,7 +138,7 @@ def test_local_banner_only_no_rewrite(): assert 'window.location = "http:\/\/example.com/dynamic_page.html"' in buff, buff # link NOT rewritten - assert '"another.html"' in buff + assert '"/some/path/another.html"' in buff def test_local_2_link_only_rewrite(): status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html', diff --git a/pywb/rewrite/test/test_url_rewriter.py b/pywb/rewrite/test/test_url_rewriter.py index ac23051a..da243cd7 100644 --- a/pywb/rewrite/test/test_url_rewriter.py +++ b/pywb/rewrite/test/test_url_rewriter.py @@ -21,19 +21,19 @@ # UrlRewriter tests >>> do_rewrite('other.html', '20131010/http://example.com/path/page.html', 'https://web.archive.org/web/') -'/web/20131010/http://example.com/path/other.html' +'other.html' ->>> do_rewrite('file.js', '20131010/http://example.com/path/page.html', 'https://web.archive.org/web/', 'js_') +>>> do_rewrite('/path/file.js', '20131010/http://example.com/path/page.html', 'https://web.archive.org/web/', 'js_') '/web/20131010js_/http://example.com/path/file.js' ->>> do_rewrite('file.js', '20131010/http://example.com/', '/coll/') +>>> do_rewrite('/file.js', '20131010/http://example.com/', '/coll/') '/coll/20131010/http://example.com/file.js' ->>> do_rewrite('file.js', '20131010/http://example.com', '/coll/', 'js_') +>>> do_rewrite('/file.js', '20131010/http://example.com', '/coll/', 'js_') '/coll/20131010js_/http://example.com/file.js' >>> do_rewrite('file.js', '20131010/http://example.com', '/coll/', '') -'/coll/20131010/http://example.com/file.js' +'file.js' >>> do_rewrite('/other.html', '20130907*/http://example.com/path/page.html', 'http://localhost:8080/coll/') '/coll/20130907*/http://example.com/other.html' @@ -41,8 +41,8 @@ >>> do_rewrite('/other.html', '20130907*/http://example.com/path/page.html', '/coll/') '/coll/20130907*/http://example.com/other.html' ->>> do_rewrite('./other.html', '20130907*/http://example.com/path/page.html', '/coll/') -'/coll/20130907*/http://example.com/path/other.html' +>>> do_rewrite('other.html', '20130907*/http://example.com/path/page.html', '/coll/') +'other.html' >>> do_rewrite('../other.html', '20131112im_/http://example.com/path/page.html', '/coll/') '/coll/20131112im_/http://example.com/other.html' @@ -87,7 +87,7 @@ '2020/http://example.com/other.html' >>> do_rewrite('', '20131010010203/http://example.com/file.html', '/web/') -'/web/20131010010203/http://example.com/file.html' +'' >>> do_rewrite('#anchor', '20131010/http://example.com/path/page.html', 'https://web.archive.org/web/') '#anchor' diff --git a/pywb/rewrite/url_rewriter.py b/pywb/rewrite/url_rewriter.py index 4774bc03..d6fda47f 100644 --- a/pywb/rewrite/url_rewriter.py +++ b/pywb/rewrite/url_rewriter.py @@ -19,6 +19,9 @@ class UrlRewriter(object): REL_SCHEME = ('//', r'\/\/', r'\\/\\/') + PARENT_PATH = '../' + REL_PATH = '/' + def __init__(self, wburl, prefix='', full_prefix=None, rel_prefix=None, root_path=None, cookie_scope=None, rewrite_opts=None): self.wburl = wburl if isinstance(wburl, WbUrl) else WbUrl(wburl) @@ -60,6 +63,11 @@ class UrlRewriter(object): if url.startswith(self.REL_SCHEME): is_abs = True scheme_rel = True + elif (not is_abs and + not url.startswith(self.REL_PATH) and + self.PARENT_PATH not in url): + return url + # if prefix starts with a scheme #if self.prefix_scheme: # url = self.prefix_scheme + ':' + url diff --git a/pywb/static/wombat.js b/pywb/static/wombat.js index 882c996f..b4730072 100644 --- a/pywb/static/wombat.js +++ b/pywb/static/wombat.js @@ -109,7 +109,7 @@ var wombat_internal = function($wbwindow) { } //============================================ - var rewrite_url = rewrite_url_; + var rewrite_url = rewrite_url_debug; function rewrite_url_debug(url, use_rel, mod) { var rewritten = rewrite_url_(url, use_rel, mod); diff --git a/sample_archive/text_content/sample.html b/sample_archive/text_content/sample.html index f2ed6842..fc2d66d0 100644 --- a/sample_archive/text_content/sample.html +++ b/sample_archive/text_content/sample.html @@ -10,5 +10,5 @@ if (some_val) { } Test Content -Some Link +Some Link diff --git a/sample_archive/text_content/sample_no_head.html b/sample_archive/text_content/sample_no_head.html index ed4bc4f3..4242336f 100644 --- a/sample_archive/text_content/sample_no_head.html +++ b/sample_archive/text_content/sample_no_head.html @@ -5,4 +5,4 @@ if (some_val) { } Test Content -Some Link +Some Link