1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

rewrite: remove extra wb_url param from rewrite_content(), the wb_url

will come from the urlrewriter, to get the 'mod'
This commit is contained in:
Ilya Kreymer 2014-08-04 21:11:46 -07:00
parent c3004007d7
commit 9e4459ae50
7 changed files with 22 additions and 34 deletions

View File

@ -58,10 +58,12 @@ class RewriteContent:
return (rewritten_headers, stream) return (rewritten_headers, stream)
def rewrite_content(self, wb_url, urlrewriter, headers, stream, def rewrite_content(self, urlrewriter, headers, stream,
head_insert_func=None, urlkey='', head_insert_func=None, urlkey='',
cdx=None): cdx=None):
wb_url = urlrewriter.wburl
if (wb_url.is_identity or if (wb_url.is_identity or
(not head_insert_func and wb_url.is_banner_only)): (not head_insert_func and wb_url.is_banner_only)):
status_headers, stream = self.sanitize_content(headers, stream) status_headers, stream = self.sanitize_content(headers, stream)

View File

@ -119,7 +119,7 @@ class LiveRewriter(object):
return (status_headers, stream) return (status_headers, stream)
def fetch_request(self, wb_url, urlrewriter, def fetch_request(self, url, urlrewriter,
head_insert_func=None, head_insert_func=None,
urlkey=None, urlkey=None,
env=None, env=None,
@ -128,12 +128,6 @@ class LiveRewriter(object):
follow_redirects=False, follow_redirects=False,
proxies=None): proxies=None):
if isinstance(wb_url, str):
url = wb_url
wb_url = WbUrl(url)
else:
url = wb_url.url
ts_err = url.split('///') ts_err = url.split('///')
if len(ts_err) > 1 and ts_err[0] != 'file:': if len(ts_err) > 1 and ts_err[0] != 'file:':
@ -167,8 +161,7 @@ class LiveRewriter(object):
} }
result = (self.rewriter. result = (self.rewriter.
rewrite_content(wb_url, rewrite_content(urlrewriter,
urlrewriter,
status_headers, status_headers,
stream, stream,
head_insert_func=head_insert_func, head_insert_func=head_insert_func,

View File

@ -10,6 +10,7 @@ from io import BytesIO
# As such, the content may change and the test may break # As such, the content may change and the test may break
urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/pywb/') urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/pywb/')
bn_urlrewriter = UrlRewriter('20131226101010bn_/http://example.com/some/path/index.html', '/pywb/')
def head_insert_func(rule, cdx): def head_insert_func(rule, cdx):
if rule.js_rewrite_location == True: if rule.js_rewrite_location == True:
@ -35,8 +36,7 @@ def test_local_1():
def test_local_no_head(): def test_local_no_head():
wb_url = WbUrl('file://' + get_test_dir() + 'text_content/sample_no_head.html') status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head.html',
status_headers, buff = get_rewritten(wb_url,
urlrewriter, urlrewriter,
head_insert_func, head_insert_func,
'com,example,test)/') 'com,example,test)/')
@ -51,11 +51,8 @@ def test_local_no_head():
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
def test_local_no_head_banner_only(): def test_local_no_head_banner_only():
wb_url = WbUrl('file://' + get_test_dir() + 'text_content/sample_no_head.html') status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head.html',
wb_url.mod = 'bn_' bn_urlrewriter,
status_headers, buff = get_rewritten(wb_url,
urlrewriter,
head_insert_func, head_insert_func,
'com,example,test)/') 'com,example,test)/')
@ -69,11 +66,8 @@ def test_local_no_head_banner_only():
assert '"another.html"' in buff assert '"another.html"' in buff
def test_local_banner_only(): def test_local_banner_only():
wb_url = WbUrl('file://' + get_test_dir() + 'text_content/sample.html') status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
wb_url.mod = 'bn_' bn_urlrewriter,
status_headers, buff = get_rewritten(wb_url,
urlrewriter,
head_insert_func, head_insert_func,
'com,example,test)/') 'com,example,test)/')
@ -129,8 +123,7 @@ def test_example_4_rewrite_err():
assert status_headers.get_statuscode() == '200' assert status_headers.get_statuscode() == '200'
def test_example_domain_specific_3(): def test_example_domain_specific_3():
urlrewriter2 = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/pywb/') status_headers, buff = get_rewritten('http://facebook.com/digitalpreservation', urlrewriter, follow_redirects=True)
status_headers, buff = get_rewritten('http://facebook.com/digitalpreservation', urlrewriter2, follow_redirects=True)
# comment out bootloader # comment out bootloader
assert '/* Bootloader.configurePage' in buff assert '/* Bootloader.configurePage' in buff

View File

@ -76,10 +76,10 @@
# HttpsUrlRewriter tests # HttpsUrlRewriter tests
>>> HttpsUrlRewriter(None, None).rewrite('https://example.com/abc') >>> HttpsUrlRewriter('http://example.com/', None).rewrite('https://example.com/abc')
'http://example.com/abc' 'http://example.com/abc'
>>> HttpsUrlRewriter(None, None).rewrite('http://example.com/abc') >>> HttpsUrlRewriter('http://example.com/', None).rewrite('http://example.com/abc')
'http://example.com/abc' 'http://example.com/abc'
""" """

View File

@ -126,7 +126,7 @@ class UrlRewriter(object):
#================================================================= #=================================================================
class HttpsUrlRewriter(object): class HttpsUrlRewriter(UrlRewriter):
""" """
A url rewriter which urls that start with https:// to http:// A url rewriter which urls that start with https:// to http://
Other urls/input is unchanged. Other urls/input is unchanged.
@ -135,9 +135,6 @@ class HttpsUrlRewriter(object):
HTTP = 'http://' HTTP = 'http://'
HTTPS = 'https://' HTTPS = 'https://'
def __init__(self, wburl, prefix, full_prefix=None):
pass
def rewrite(self, url, mod=None): def rewrite(self, url, mod=None):
if url.startswith(self.HTTPS): if url.startswith(self.HTTPS):
result = self.HTTP + url[len(self.HTTPS):] result = self.HTTP + url[len(self.HTTPS):]

View File

@ -38,6 +38,10 @@ class RewriteHandler(SearchPageWbUrlHandler):
return self.render_content(wbrequest) return self.render_content(wbrequest)
except Exception as exc: except Exception as exc:
import traceback
err_details = traceback.format_exc(exc)
print err_details
url = wbrequest.wb_url.url url = wbrequest.wb_url.url
msg = 'Could not load the url from the live web: ' + url msg = 'Could not load the url from the live web: ' + url
raise LiveResourceException(msg=msg, url=url) raise LiveResourceException(msg=msg, url=url)
@ -53,8 +57,8 @@ class RewriteHandler(SearchPageWbUrlHandler):
if ref_wburl_str: if ref_wburl_str:
wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url
wb_url = wbrequest.wb_url result = self.rewriter.fetch_request(wbrequest.wb_url.url,
result = self.rewriter.fetch_request(wb_url, wbrequest.urlrewriter, wbrequest.urlrewriter,
head_insert_func=head_insert_func, head_insert_func=head_insert_func,
req_headers=req_headers, req_headers=req_headers,
env=wbrequest.env) env=wbrequest.env)

View File

@ -130,8 +130,7 @@ class ReplayView(object):
create_insert_func(wbrequest)) create_insert_func(wbrequest))
result = (self.content_rewriter. result = (self.content_rewriter.
rewrite_content(wbrequest.wb_url, rewrite_content(urlrewriter,
urlrewriter,
headers=status_headers, headers=status_headers,
stream=stream, stream=stream,
head_insert_func=head_insert_func, head_insert_func=head_insert_func,