1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-26 07:49:24 +01:00
pywb/pywb/framework/test/test_archivalrouter.py
Ilya Kreymer 3a584a1ec3 py3: all tests pass, at last!
but not yet py2... need to resolve encoding in rewriting issues
2016-02-23 13:26:53 -08:00

136 lines
5.9 KiB
Python

"""
# Test WbRequest parsed via a Route
# route with relative path, print resulting wbrequest
>>> _test_route_req(Route('web', WbUrlHandler()), {'REL_REQUEST_URI': '/web/test.example.com', 'SCRIPT_NAME': ''})
{'coll': 'web',
'request_uri': '/web/test.example.com',
'wb_prefix': '/web/',
'wb_url': ('latest_replay', '', '', 'http://test.example.com', 'http://test.example.com')}
# route with absolute path, running at script /my_pywb, print resultingwbrequest
>>> _test_route_req(Route('web', WbUrlHandler()), {'REL_REQUEST_URI': '/web/2013im_/test.example.com', 'SCRIPT_NAME': '/my_pywb', 'HTTP_HOST': 'localhost:8081', 'wsgi.url_scheme': 'https'}, True)
{'coll': 'web',
'request_uri': '/web/2013im_/test.example.com',
'wb_prefix': 'https://localhost:8081/my_pywb/web/',
'wb_url': ('replay', '2013', 'im_', 'http://test.example.com', '2013im_/http://test.example.com')}
# route with no collection
>>> _test_route_req(Route('', BaseHandler()), {'REL_REQUEST_URI': 'http://example.com', 'SCRIPT_NAME': '/pywb'})
{'coll': '',
'request_uri': 'http://example.com',
'wb_prefix': '/pywb/',
'wb_url': None}
# not matching route -- skipped
>>> _test_route_req(Route('web', BaseHandler()), {'REL_REQUEST_URI': '/other/test.example.com', 'SCRIPT_NAME': ''})
# Test Refer Redirects
>>> _test_redir('http://localhost:8080/', '/diff_path/other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html')
'http://localhost:8080/coll/20131010/http://example.com/diff_path/other.html'
>>> _test_redir('http://localhost:8080/', '/../other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html')
'http://localhost:8080/coll/20131010/http://example.com/other.html'
>>> _test_redir('http://localhost:8080/', '/../../other.html', 'http://localhost:8080/coll/20131010/http://example.com/index.html')
'http://localhost:8080/coll/20131010/http://example.com/other.html'
# Custom collection
>>> _test_redir('http://localhost:8080/', '/other.html', 'http://localhost:8080/complex/123/20131010/http://example.com/path/page.html', coll='complex/123')
'http://localhost:8080/complex/123/20131010/http://example.com/other.html'
# With timestamp included
>>> _test_redir('http://localhost:8080/', '/20131010/other.html', 'http://localhost:8080/coll/20131010/http://example.com/index.html')
'http://localhost:8080/coll/20131010/http://example.com/other.html'
# With timestamp included
>>> _test_redir('http://localhost:8080/', '/20131010/path/other.html', 'http://localhost:8080/coll/20131010/http://example.com/some/index.html')
'http://localhost:8080/coll/20131010/http://example.com/path/other.html'
# Wrong Host
>>> _test_redir('http://example.com:8080/', '/other.html', 'http://localhost:8080/coll/20131010/http://example.com/path/page.html')
False
# Right Host
>>> _test_redir('http://example.com:8080/', '/other.html', 'http://example.com:8080/coll/20131010/http://example.com/path/page.html')
'http://example.com:8080/coll/20131010/http://example.com/other.html'
# With custom SCRIPT_NAME
>>> _test_redir('http://localhost:8080/', '/../other.html', 'http://localhost:8080/extra/coll/20131010/http://example.com/path/page.html', '/extra')
'http://localhost:8080/extra/coll/20131010/http://example.com/other.html'
# With custom SCRIPT_NAME + timestamp
>>> _test_redir('http://localhost:8080/', '/20131010/other.html', 'http://localhost:8080/extra/coll/20131010/http://example.com/path/page.html', '/extra')
'http://localhost:8080/extra/coll/20131010/http://example.com/other.html'
# With custom SCRIPT_NAME, bad match
>>> _test_redir('http://localhost:8080/', '/../other.html', 'http://localhost:8080/extra/coll/20131010/http://example.com/path/page.html', '/extr')
False
# With no collection
>>> _test_redir('http://localhost:8080/', '/other.html', 'http://localhost:8080/2013/http://example.com/path/page.html', coll='')
'http://localhost:8080/2013/http://example.com/other.html'
# With SCRIPT_NAME but no collection
>>> _test_redir('http://localhost:8080/', '/other.html', 'http://localhost:8080/pywb-access/http://example.com/path/page.html', '/pywb-access', coll='')
'http://localhost:8080/pywb-access/http://example.com/other.html'
>>> _test_redir('http://localhost:8080/', '/some/example/other.html', 'http://localhost:8080/user/coll/http://example.com/path/page.html', '/user/coll', coll='')
'http://localhost:8080/user/coll/http://example.com/some/example/other.html'
## Test ensure_rel_uri_set
# Simple test:
>>> ArchivalRouter.ensure_rel_uri_set({'PATH_INFO': '/pywb/example.com'})
'/pywb/example.com'
# Test all unecoded special chars and double-quote
# (double-quote must be encoded but not single quote)
>>> ArchivalRouter.ensure_rel_uri_set({'PATH_INFO': "/pywb/example.com/0~!+$&'()*+,;=:\\\""})
"/pywb/example.com/0~!+$&'()*+,;=:%22"
"""
from pywb.framework.archivalrouter import Route, ReferRedirect, ArchivalRouter
from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
import pprint
from six.moves.urllib.parse import urlsplit
def _test_route_req(route, env, abs_path=False):
matcher, coll = route.is_handling(env['REL_REQUEST_URI'])
if not matcher:
return
the_router = ArchivalRouter([route], abs_path=abs_path)
req = the_router.parse_request(route, env, matcher, coll, env['REL_REQUEST_URI'], abs_path)
varlist = vars(req)
the_dict = dict((k, varlist[k]) for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll'))
pprint.pprint(the_dict)
def _test_redir(match_host, request_uri, referrer, script_name='', coll='coll'):
env = {'REL_REQUEST_URI': request_uri, 'HTTP_REFERER': referrer, 'SCRIPT_NAME': script_name}
env['HTTP_HOST'] = urlsplit(match_host).netloc
routes = [Route(coll, WbUrlHandler())]
the_router = ArchivalRouter(routes)
redir = ReferRedirect()
#req = WbRequest.from_uri(request_uri, env)
rep = redir(env, the_router)
if not rep:
return False
return rep.status_headers.get_header('Location')
if __name__ == "__main__":
import doctest
doctest.testmod()