1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00
pywb/pywb/rewrite/test/test_rewrite_live.py
Ilya Kreymer 0a6838ac2b rewrite: refactor JS rewriters into seperate mixins, allowing for
link only, location only, and link + location JS rewriters.
location-only rewriter is new
js_rewrite_location options: all, location, urls (for now)
2014-12-07 21:13:19 -08:00

166 lines
6.3 KiB
Python

from pywb.rewrite.rewrite_live import LiveRewriter
from pywb.rewrite.url_rewriter import UrlRewriter
from pywb.rewrite.wburl import WbUrl
from pywb import get_test_dir
from io import BytesIO
# This module has some rewriting tests against the 'live web'
# As such, the content may change and the test may break
urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/pywb/')
bn_urlrewriter = UrlRewriter('20131226101010bn_/http://example.com/some/path/index.html', '/pywb/')
def head_insert_func(rule, cdx):
if rule.js_rewrite_location != 'urls':
return '<script src="/static/default/wombat.js"> </script>'
else:
return ''
def test_local_1():
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
urlrewriter,
head_insert_func,
'com,example,test)/')
# wombat insert added
assert '<head><script src="/static/default/wombat.js"> </script>' in buff, buff
# JS location and JS link rewritten
assert 'window.WB_wombat_location = "/pywb/20131226101010/http:\/\/example.com/dynamic_page.html"' in buff
# link rewritten
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
def test_local_no_head():
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head.html',
urlrewriter,
head_insert_func,
'com,example,test)/')
# wombat insert added
assert '<script src="/static/default/wombat.js"> </script>' in buff
# location rewritten
assert 'window.WB_wombat_location = "/other.html"' in buff
# link rewritten
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
def test_local_no_head_banner_only():
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head.html',
bn_urlrewriter,
head_insert_func,
'com,example,test)/')
# wombat insert added
assert '<script src="/static/default/wombat.js"> </script>' in buff
# location NOT rewritten
assert 'window.location = "/other.html"' in buff
# link NOT rewritten
assert '"another.html"' in buff
def test_local_banner_only_no_rewrite():
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
bn_urlrewriter,
head_insert_func,
'com,example,test)/')
# wombat insert added
assert '<head><script src="/static/default/wombat.js"> </script>' in buff
# JS location NOT rewritten, JS link NOT rewritten
assert 'window.location = "http:\/\/example.com/dynamic_page.html"' in buff, buff
# link NOT rewritten
assert '"another.html"' in buff
def test_local_2_link_only_rewrite():
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
urlrewriter,
head_insert_func,
'example,example,test)/nolocation_rewrite')
# no wombat insert
assert '<head><script src="/static/default/wombat.js"> </script>' not in buff
# JS location NOT rewritten, JS link rewritten
assert 'window.location = "/pywb/20131226101010/http:\/\/example.com/dynamic_page.html"' in buff
# still link rewrite
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
def test_local_2_js_loc_only_rewrite():
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
urlrewriter,
head_insert_func,
'example,example,test,loconly)/')
# wombat insert added
assert '<script src="/static/default/wombat.js"> </script>' in buff
# JS location rewritten, JS link NOT rewritten
assert 'window.WB_wombat_location = "http:\/\/example.com/dynamic_page.html"' in buff
# still link rewrite in HTML
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
def test_example_1():
status_headers, buff = get_rewritten('http://example.com/', urlrewriter, req_headers={'Connection': 'close'})
# verify header rewriting
assert (('X-Archive-Orig-connection', 'close') in status_headers.headers), status_headers
assert '/pywb/20131226101010/http://www.iana.org/domains/example' in buff, buff
def test_example_2_redirect():
status_headers, buff = get_rewritten('http://httpbin.org/redirect-to?url=http://example.com/', urlrewriter)
# redirect, no content
assert status_headers.get_statuscode() == '302'
assert len(buff) == 0
def test_example_3_rel():
status_headers, buff = get_rewritten('//example.com/', urlrewriter)
assert status_headers.get_statuscode() == '200'
def test_example_4_rewrite_err():
# may occur in case of rewrite mismatch, the /// gets stripped off
status_headers, buff = get_rewritten('http://localhost:8080///example.com/', urlrewriter)
assert status_headers.get_statuscode() == '200'
def test_example_domain_specific_3():
status_headers, buff = get_rewritten('http://facebook.com/digitalpreservation', urlrewriter, follow_redirects=True)
# comment out bootloader
assert '/* Bootloader.configurePage' in buff
def test_wombat_top():
#status_headers, buff = get_rewritten('https://assets-cdn.github.com/assets/github-0f06d0f46fe7bcfbf31f2380f23aec15ba21b8ec.js', urlrewriter)
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/toptest.js', urlrewriter)
assert 'WB_wombat_top!==window' in buff
def test_post():
buff = BytesIO('ABC=DEF')
env = {'REQUEST_METHOD': 'POST',
'HTTP_ORIGIN': 'http://httpbin.org',
'HTTP_HOST': 'httpbin.org',
'wsgi.input': buff}
status_headers, resp_buff = get_rewritten('http://httpbin.org/post', urlrewriter, env=env)
assert status_headers.get_statuscode() == '200', status_headers
def get_rewritten(*args, **kwargs):
return LiveRewriter().get_rewritten(*args, **kwargs)