rewrite: remove extra wb_url param from rewrite_content(), the wb_url

will come from the urlrewriter, to get the 'mod'
2025-03-15 00:03:28 +01:00 · 2014-08-04 21:11:46 -07:00 · 2014-08-04 21:11:46 -07:00 · 9e4459ae50
commit 9e4459ae50
parent c3004007d7
7 changed files with 22 additions and 34 deletions
--- a/pywb/rewrite/rewrite_content.py
+++ b/pywb/rewrite/rewrite_content.py
@ -58,10 +58,12 @@ class RewriteContent:

        return (rewritten_headers, stream)

-    def rewrite_content(self, wb_url, urlrewriter, headers, stream,
+    def rewrite_content(self, urlrewriter, headers, stream,
                        head_insert_func=None, urlkey='',
                        cdx=None):

+        wb_url = urlrewriter.wburl
+
        if (wb_url.is_identity or
            (not head_insert_func and wb_url.is_banner_only)):
            status_headers, stream = self.sanitize_content(headers, stream)
--- a/pywb/rewrite/rewrite_live.py
+++ b/pywb/rewrite/rewrite_live.py
@ -119,7 +119,7 @@ class LiveRewriter(object):

        return (status_headers, stream)

-    def fetch_request(self, wb_url, urlrewriter,
+    def fetch_request(self, url, urlrewriter,
                      head_insert_func=None,
                      urlkey=None,
                      env=None,
@ -128,12 +128,6 @@ class LiveRewriter(object):
                      follow_redirects=False,
                      proxies=None):

-        if isinstance(wb_url, str):
-            url = wb_url
-            wb_url = WbUrl(url)
-        else:
-            url = wb_url.url
-
        ts_err = url.split('///')

        if len(ts_err) > 1 and ts_err[0] != 'file:':
@ -167,8 +161,7 @@ class LiveRewriter(object):
              }

        result = (self.rewriter.
-                  rewrite_content(wb_url,
-                                  urlrewriter,
+                  rewrite_content(urlrewriter,
                                  status_headers,
                                  stream,
                                  head_insert_func=head_insert_func,
--- a/pywb/rewrite/test/test_rewrite_live.py
+++ b/pywb/rewrite/test/test_rewrite_live.py
@ -10,6 +10,7 @@ from io import BytesIO
 # As such, the content may change and the test may break

 urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/pywb/')
+bn_urlrewriter = UrlRewriter('20131226101010bn_/http://example.com/some/path/index.html', '/pywb/')

 def head_insert_func(rule, cdx):
    if rule.js_rewrite_location == True:
@ -35,8 +36,7 @@ def test_local_1():


 def test_local_no_head():
-    wb_url = WbUrl('file://' + get_test_dir() + 'text_content/sample_no_head.html')
-    status_headers, buff = get_rewritten(wb_url,
+    status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head.html',
                                         urlrewriter,
                                         head_insert_func,
                                         'com,example,test)/')
@ -51,11 +51,8 @@ def test_local_no_head():
    assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff

 def test_local_no_head_banner_only():
-    wb_url = WbUrl('file://' + get_test_dir() + 'text_content/sample_no_head.html')
-    wb_url.mod = 'bn_'
-
-    status_headers, buff = get_rewritten(wb_url,
-                                         urlrewriter,
+    status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head.html',
+                                         bn_urlrewriter,
                                         head_insert_func,
                                         'com,example,test)/')

@ -69,11 +66,8 @@ def test_local_no_head_banner_only():
    assert '"another.html"' in buff

 def test_local_banner_only():
-    wb_url = WbUrl('file://' + get_test_dir() + 'text_content/sample.html')
-    wb_url.mod = 'bn_'
-
-    status_headers, buff = get_rewritten(wb_url,
-                                         urlrewriter,
+    status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
+                                         bn_urlrewriter,
                                         head_insert_func,
                                         'com,example,test)/')

@ -129,8 +123,7 @@ def test_example_4_rewrite_err():
    assert status_headers.get_statuscode() == '200'

 def test_example_domain_specific_3():
-    urlrewriter2 = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/pywb/')
-    status_headers, buff = get_rewritten('http://facebook.com/digitalpreservation', urlrewriter2, follow_redirects=True)
+    status_headers, buff = get_rewritten('http://facebook.com/digitalpreservation', urlrewriter, follow_redirects=True)

    # comment out bootloader
    assert '/* Bootloader.configurePage' in buff
--- a/pywb/rewrite/test/test_url_rewriter.py
+++ b/pywb/rewrite/test/test_url_rewriter.py
@ -76,10 +76,10 @@


 # HttpsUrlRewriter tests
->>> HttpsUrlRewriter(None, None).rewrite('https://example.com/abc')
+>>> HttpsUrlRewriter('http://example.com/', None).rewrite('https://example.com/abc')
 'http://example.com/abc'

->>> HttpsUrlRewriter(None, None).rewrite('http://example.com/abc')
+>>> HttpsUrlRewriter('http://example.com/', None).rewrite('http://example.com/abc')
 'http://example.com/abc'

 """
--- a/pywb/rewrite/url_rewriter.py
+++ b/pywb/rewrite/url_rewriter.py
@ -126,7 +126,7 @@ class UrlRewriter(object):


 #=================================================================
-class HttpsUrlRewriter(object):
+class HttpsUrlRewriter(UrlRewriter):
    """
    A url rewriter which urls that start with https:// to http://
    Other urls/input is unchanged.
@ -135,9 +135,6 @@ class HttpsUrlRewriter(object):
    HTTP = 'http://'
    HTTPS = 'https://'

-    def __init__(self, wburl, prefix, full_prefix=None):
-        pass
-
    def rewrite(self, url, mod=None):
        if url.startswith(self.HTTPS):
            result = self.HTTP + url[len(self.HTTPS):]
--- a/pywb/webapp/live_rewrite_handler.py
+++ b/pywb/webapp/live_rewrite_handler.py
@ -38,6 +38,10 @@ class RewriteHandler(SearchPageWbUrlHandler):
            return self.render_content(wbrequest)

        except Exception as exc:
+            import traceback
+            err_details = traceback.format_exc(exc)
+            print err_details
+
            url = wbrequest.wb_url.url
            msg = 'Could not load the url from the live web: ' + url
            raise LiveResourceException(msg=msg, url=url)
@ -53,8 +57,8 @@ class RewriteHandler(SearchPageWbUrlHandler):
        if ref_wburl_str:
            wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url

-        wb_url = wbrequest.wb_url
-        result = self.rewriter.fetch_request(wb_url, wbrequest.urlrewriter,
+        result = self.rewriter.fetch_request(wbrequest.wb_url.url,
+                                             wbrequest.urlrewriter,
                                             head_insert_func=head_insert_func,
                                             req_headers=req_headers,
                                             env=wbrequest.env)
--- a/pywb/webapp/replay_views.py
+++ b/pywb/webapp/replay_views.py
@ -130,8 +130,7 @@ class ReplayView(object):
                                create_insert_func(wbrequest))

        result = (self.content_rewriter.
-                  rewrite_content(wbrequest.wb_url,
-                                  urlrewriter,
+                  rewrite_content(urlrewriter,
                                  headers=status_headers,
                                  stream=stream,
                                  head_insert_func=head_insert_func,