mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
rewrite: remove extra wb_url param from rewrite_content(), the wb_url
will come from the urlrewriter, to get the 'mod'
This commit is contained in:
parent
c3004007d7
commit
9e4459ae50
@ -58,10 +58,12 @@ class RewriteContent:
|
|||||||
|
|
||||||
return (rewritten_headers, stream)
|
return (rewritten_headers, stream)
|
||||||
|
|
||||||
def rewrite_content(self, wb_url, urlrewriter, headers, stream,
|
def rewrite_content(self, urlrewriter, headers, stream,
|
||||||
head_insert_func=None, urlkey='',
|
head_insert_func=None, urlkey='',
|
||||||
cdx=None):
|
cdx=None):
|
||||||
|
|
||||||
|
wb_url = urlrewriter.wburl
|
||||||
|
|
||||||
if (wb_url.is_identity or
|
if (wb_url.is_identity or
|
||||||
(not head_insert_func and wb_url.is_banner_only)):
|
(not head_insert_func and wb_url.is_banner_only)):
|
||||||
status_headers, stream = self.sanitize_content(headers, stream)
|
status_headers, stream = self.sanitize_content(headers, stream)
|
||||||
|
@ -119,7 +119,7 @@ class LiveRewriter(object):
|
|||||||
|
|
||||||
return (status_headers, stream)
|
return (status_headers, stream)
|
||||||
|
|
||||||
def fetch_request(self, wb_url, urlrewriter,
|
def fetch_request(self, url, urlrewriter,
|
||||||
head_insert_func=None,
|
head_insert_func=None,
|
||||||
urlkey=None,
|
urlkey=None,
|
||||||
env=None,
|
env=None,
|
||||||
@ -128,12 +128,6 @@ class LiveRewriter(object):
|
|||||||
follow_redirects=False,
|
follow_redirects=False,
|
||||||
proxies=None):
|
proxies=None):
|
||||||
|
|
||||||
if isinstance(wb_url, str):
|
|
||||||
url = wb_url
|
|
||||||
wb_url = WbUrl(url)
|
|
||||||
else:
|
|
||||||
url = wb_url.url
|
|
||||||
|
|
||||||
ts_err = url.split('///')
|
ts_err = url.split('///')
|
||||||
|
|
||||||
if len(ts_err) > 1 and ts_err[0] != 'file:':
|
if len(ts_err) > 1 and ts_err[0] != 'file:':
|
||||||
@ -167,8 +161,7 @@ class LiveRewriter(object):
|
|||||||
}
|
}
|
||||||
|
|
||||||
result = (self.rewriter.
|
result = (self.rewriter.
|
||||||
rewrite_content(wb_url,
|
rewrite_content(urlrewriter,
|
||||||
urlrewriter,
|
|
||||||
status_headers,
|
status_headers,
|
||||||
stream,
|
stream,
|
||||||
head_insert_func=head_insert_func,
|
head_insert_func=head_insert_func,
|
||||||
|
@ -10,6 +10,7 @@ from io import BytesIO
|
|||||||
# As such, the content may change and the test may break
|
# As such, the content may change and the test may break
|
||||||
|
|
||||||
urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/pywb/')
|
urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/pywb/')
|
||||||
|
bn_urlrewriter = UrlRewriter('20131226101010bn_/http://example.com/some/path/index.html', '/pywb/')
|
||||||
|
|
||||||
def head_insert_func(rule, cdx):
|
def head_insert_func(rule, cdx):
|
||||||
if rule.js_rewrite_location == True:
|
if rule.js_rewrite_location == True:
|
||||||
@ -35,8 +36,7 @@ def test_local_1():
|
|||||||
|
|
||||||
|
|
||||||
def test_local_no_head():
|
def test_local_no_head():
|
||||||
wb_url = WbUrl('file://' + get_test_dir() + 'text_content/sample_no_head.html')
|
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head.html',
|
||||||
status_headers, buff = get_rewritten(wb_url,
|
|
||||||
urlrewriter,
|
urlrewriter,
|
||||||
head_insert_func,
|
head_insert_func,
|
||||||
'com,example,test)/')
|
'com,example,test)/')
|
||||||
@ -51,11 +51,8 @@ def test_local_no_head():
|
|||||||
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
||||||
|
|
||||||
def test_local_no_head_banner_only():
|
def test_local_no_head_banner_only():
|
||||||
wb_url = WbUrl('file://' + get_test_dir() + 'text_content/sample_no_head.html')
|
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head.html',
|
||||||
wb_url.mod = 'bn_'
|
bn_urlrewriter,
|
||||||
|
|
||||||
status_headers, buff = get_rewritten(wb_url,
|
|
||||||
urlrewriter,
|
|
||||||
head_insert_func,
|
head_insert_func,
|
||||||
'com,example,test)/')
|
'com,example,test)/')
|
||||||
|
|
||||||
@ -69,11 +66,8 @@ def test_local_no_head_banner_only():
|
|||||||
assert '"another.html"' in buff
|
assert '"another.html"' in buff
|
||||||
|
|
||||||
def test_local_banner_only():
|
def test_local_banner_only():
|
||||||
wb_url = WbUrl('file://' + get_test_dir() + 'text_content/sample.html')
|
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
|
||||||
wb_url.mod = 'bn_'
|
bn_urlrewriter,
|
||||||
|
|
||||||
status_headers, buff = get_rewritten(wb_url,
|
|
||||||
urlrewriter,
|
|
||||||
head_insert_func,
|
head_insert_func,
|
||||||
'com,example,test)/')
|
'com,example,test)/')
|
||||||
|
|
||||||
@ -129,8 +123,7 @@ def test_example_4_rewrite_err():
|
|||||||
assert status_headers.get_statuscode() == '200'
|
assert status_headers.get_statuscode() == '200'
|
||||||
|
|
||||||
def test_example_domain_specific_3():
|
def test_example_domain_specific_3():
|
||||||
urlrewriter2 = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/pywb/')
|
status_headers, buff = get_rewritten('http://facebook.com/digitalpreservation', urlrewriter, follow_redirects=True)
|
||||||
status_headers, buff = get_rewritten('http://facebook.com/digitalpreservation', urlrewriter2, follow_redirects=True)
|
|
||||||
|
|
||||||
# comment out bootloader
|
# comment out bootloader
|
||||||
assert '/* Bootloader.configurePage' in buff
|
assert '/* Bootloader.configurePage' in buff
|
||||||
|
@ -76,10 +76,10 @@
|
|||||||
|
|
||||||
|
|
||||||
# HttpsUrlRewriter tests
|
# HttpsUrlRewriter tests
|
||||||
>>> HttpsUrlRewriter(None, None).rewrite('https://example.com/abc')
|
>>> HttpsUrlRewriter('http://example.com/', None).rewrite('https://example.com/abc')
|
||||||
'http://example.com/abc'
|
'http://example.com/abc'
|
||||||
|
|
||||||
>>> HttpsUrlRewriter(None, None).rewrite('http://example.com/abc')
|
>>> HttpsUrlRewriter('http://example.com/', None).rewrite('http://example.com/abc')
|
||||||
'http://example.com/abc'
|
'http://example.com/abc'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
@ -126,7 +126,7 @@ class UrlRewriter(object):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class HttpsUrlRewriter(object):
|
class HttpsUrlRewriter(UrlRewriter):
|
||||||
"""
|
"""
|
||||||
A url rewriter which urls that start with https:// to http://
|
A url rewriter which urls that start with https:// to http://
|
||||||
Other urls/input is unchanged.
|
Other urls/input is unchanged.
|
||||||
@ -135,9 +135,6 @@ class HttpsUrlRewriter(object):
|
|||||||
HTTP = 'http://'
|
HTTP = 'http://'
|
||||||
HTTPS = 'https://'
|
HTTPS = 'https://'
|
||||||
|
|
||||||
def __init__(self, wburl, prefix, full_prefix=None):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def rewrite(self, url, mod=None):
|
def rewrite(self, url, mod=None):
|
||||||
if url.startswith(self.HTTPS):
|
if url.startswith(self.HTTPS):
|
||||||
result = self.HTTP + url[len(self.HTTPS):]
|
result = self.HTTP + url[len(self.HTTPS):]
|
||||||
|
@ -38,6 +38,10 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
|||||||
return self.render_content(wbrequest)
|
return self.render_content(wbrequest)
|
||||||
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
import traceback
|
||||||
|
err_details = traceback.format_exc(exc)
|
||||||
|
print err_details
|
||||||
|
|
||||||
url = wbrequest.wb_url.url
|
url = wbrequest.wb_url.url
|
||||||
msg = 'Could not load the url from the live web: ' + url
|
msg = 'Could not load the url from the live web: ' + url
|
||||||
raise LiveResourceException(msg=msg, url=url)
|
raise LiveResourceException(msg=msg, url=url)
|
||||||
@ -53,8 +57,8 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
|||||||
if ref_wburl_str:
|
if ref_wburl_str:
|
||||||
wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url
|
wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url
|
||||||
|
|
||||||
wb_url = wbrequest.wb_url
|
result = self.rewriter.fetch_request(wbrequest.wb_url.url,
|
||||||
result = self.rewriter.fetch_request(wb_url, wbrequest.urlrewriter,
|
wbrequest.urlrewriter,
|
||||||
head_insert_func=head_insert_func,
|
head_insert_func=head_insert_func,
|
||||||
req_headers=req_headers,
|
req_headers=req_headers,
|
||||||
env=wbrequest.env)
|
env=wbrequest.env)
|
||||||
|
@ -130,8 +130,7 @@ class ReplayView(object):
|
|||||||
create_insert_func(wbrequest))
|
create_insert_func(wbrequest))
|
||||||
|
|
||||||
result = (self.content_rewriter.
|
result = (self.content_rewriter.
|
||||||
rewrite_content(wbrequest.wb_url,
|
rewrite_content(urlrewriter,
|
||||||
urlrewriter,
|
|
||||||
headers=status_headers,
|
headers=status_headers,
|
||||||
stream=stream,
|
stream=stream,
|
||||||
head_insert_func=head_insert_func,
|
head_insert_func=head_insert_func,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user