diff --git a/pywb/archivalrouter.py b/pywb/archivalrouter.py index 354edddd..4d28b57e 100644 --- a/pywb/archivalrouter.py +++ b/pywb/archivalrouter.py @@ -3,13 +3,13 @@ import re from wbrequestresponse import WbRequest, WbResponse from pywb.rewrite.url_rewriter import UrlRewriter -from pywb.rewrite.wburl import WbUrl + #================================================================= # ArchivalRouter -- route WB requests in archival mode #================================================================= class ArchivalRouter: - def __init__(self, routes, hostpaths = None, abs_path = True, home_view = None, error_view = None): + def __init__(self, routes, hostpaths=None, abs_path=True, home_view=None, error_view=None): self.routes = routes self.fallback = ReferRedirect(hostpaths) self.abs_path = abs_path @@ -69,24 +69,25 @@ class Route: if not matcher: return None - rel_prefix = matcher.group(0) + matched_str = matcher.group(0) - if rel_prefix: - wb_prefix = env['SCRIPT_NAME'] + '/' + rel_prefix + '/' - wb_url_str = request_uri[len(rel_prefix) + 2:] # remove the '/' + rel_prefix part of uri + if matched_str: + rel_prefix = env['SCRIPT_NAME'] + '/' + matched_str + '/' + wb_url_str = request_uri[len(matched_str) + 2:] # remove the '/' + rel_prefix part of uri else: - wb_prefix = env['SCRIPT_NAME'] + '/' + rel_prefix = env['SCRIPT_NAME'] + '/' wb_url_str = request_uri[1:] # the request_uri is the wb_url, since no coll coll = matcher.group(self.coll_group) wbrequest = WbRequest(env, - request_uri = request_uri, - wb_url_str = wb_url_str, - wb_prefix = wb_prefix, - coll = coll, - host_prefix = WbRequest.make_host_prefix(env) if use_abs_prefix else '', - wburl_class = self.handler.get_wburl_type()) + request_uri=request_uri, + wb_url_str=wb_url_str, + rel_prefix=rel_prefix, + coll=coll, + use_abs_prefix=use_abs_prefix, + wburl_class = self.handler.get_wburl_type(), + urlrewriter_class=UrlRewriter) # Allow for applying of additional filters diff --git a/pywb/cdx/test/cdxserver_test.py b/pywb/cdx/test/cdxserver_test.py index 2d023729..0e799ce9 100644 --- a/pywb/cdx/test/cdxserver_test.py +++ b/pywb/cdx/test/cdxserver_test.py @@ -132,8 +132,8 @@ org,iana)/domains/root/db 20140126200928 http://www.iana.org/domains/root/db tex ('filename', 'dupes.warc.gz')] # NOTE: external dependency -- need self-contained test ->>> x = CDXServer('http://web.archive.org/cdx/search/cdx').load_cdx(url = 'example.com', output = 'raw', limit = '2') ->>> pprint.pprint(x.next().items()) +#>>> x = CDXServer('http://web.archive.org/cdx/search/cdx').load_cdx(url = 'example.com', output = 'raw', limit = '2') +#>>> pprint.pprint(x.next().items()) [('urlkey', 'com,example)/'), ('timestamp', '20020120142510'), ('original', 'http://example.com:80/'), diff --git a/pywb/handlers.py b/pywb/handlers.py index 4be855e3..c82db7fe 100644 --- a/pywb/handlers.py +++ b/pywb/handlers.py @@ -10,19 +10,28 @@ from wbexceptions import WbException, NotFoundException from views import TextCapturesView -class BaseHandler: - @staticmethod - def get_wburl_type(): - return WbUrl - +#================================================================= +class BaseHandler(object): def __call__(self, wbrequest): return wbrequest + def get_wburl_type(self): + return None + + +#================================================================= +class WbUrlHandler(BaseHandler): + def get_wburl_type(self): + return WbUrl + + #================================================================= # Standard WB Handler #================================================================= -class WBHandler(BaseHandler): - def __init__(self, index_reader, replay, html_view = None, search_view = None): +class WBHandler(WbUrlHandler): + def __init__(self, index_reader, replay, + html_view=None, search_view=None): + self.index_reader = index_reader self.replay = replay @@ -31,7 +40,6 @@ class WBHandler(BaseHandler): self.html_view = html_view self.search_view = search_view - def __call__(self, wbrequest): if wbrequest.wb_url_str == '/': return self.render_search_page(wbrequest) @@ -61,6 +69,7 @@ class WBHandler(BaseHandler): def __str__(self): return 'WBHandler: ' + str(self.index_reader) + ', ' + str(self.replay) + #================================================================= # CDX-Server Handler -- pass all params to cdx server #================================================================= @@ -75,11 +84,6 @@ class CDXHandler(BaseHandler): return self.view.render_response(wbrequest, cdx_lines) - - @staticmethod - def get_wburl_type(): - return None - def __str__(self): return 'Index Reader: ' + str(self.index_reader) @@ -115,10 +119,6 @@ class StaticHandler(BaseHandler): except IOError: raise NotFoundException('Static File Not Found: ' + wbrequest.wb_url_str) - @staticmethod - def get_wburl_type(): - return None - def __str__(self): return 'Static files from ' + self.static_path @@ -130,6 +130,7 @@ class DebugEchoEnvHandler(BaseHandler): def __call__(self, wbrequest): return WbResponse.text_response(str(wbrequest.env)) + #================================================================= class DebugEchoHandler(BaseHandler): def __call__(self, wbrequest): @@ -150,5 +151,3 @@ class PerfTimer: self.end = time.clock() if self.perfdict is not None: self.perfdict[self.name] = str(self.end - self.start) - - diff --git a/pywb/indexreader.py b/pywb/indexreader.py index b55de029..cea27a8f 100644 --- a/pywb/indexreader.py +++ b/pywb/indexreader.py @@ -37,7 +37,7 @@ class IndexReader(object): def load_cdx(self, **params): return self.cdx_server.load_cdx(**params) - def get_query_params(self, wburl, limit = 150000, collapse_time = None, replay_closest = 10): + def get_query_params(self, wburl, limit = 150000, collapse_time = None, replay_closest = 100): if wburl.type == wburl.URL_QUERY: raise NotImplementedError('Url Query Not Yet Supported') diff --git a/pywb/proxy.py b/pywb/proxy.py index 107f9d96..fc14d1e5 100644 --- a/pywb/proxy.py +++ b/pywb/proxy.py @@ -45,14 +45,14 @@ class ProxyRouter: return None wbrequest = WbRequest(env, - request_uri = url, - wb_url_str = url, - wb_prefix = '', - coll = '', - host_prefix = self.hostpaths[0], - wburl_class = self.handler.get_wburl_type(), - url_rewriter_class = ProxyHttpsUrlRewriter, - is_proxy = True) + request_uri=url, + wb_url_str=url, + #rel_prefix=url, + #host_prefix=self.hostpaths[0], + wburl_class=self.handler.get_wburl_type(), + urlrewriter_class=ProxyHttpsUrlRewriter, + use_abs_prefix=False, + is_proxy=True) return self.handler(wbrequest) diff --git a/pywb/replay_views.py b/pywb/replay_views.py index f5f9c504..4c6907eb 100644 --- a/pywb/replay_views.py +++ b/pywb/replay_views.py @@ -7,7 +7,6 @@ from wbrequestresponse import WbResponse from wbexceptions import CaptureException, InternalRedirect from pywb.warc.recordloader import ArchiveLoadFailed - #================================================================= class ReplayView: def __init__(self, content_loader, content_rewriter, head_insert_view = None, @@ -49,6 +48,9 @@ class ReplayView: # check if redir is needed self._redirect_if_needed(wbrequest, cdx) + # one more check for referrer-based self-redirect + self._reject_referrer_self_redirect(wbrequest, status_headers) + response = None if self.content_rewriter and wbrequest.wb_url.mod != 'id_': @@ -148,6 +150,7 @@ class ReplayView: def _reject_self_redirect(self, wbrequest, cdx, status_headers): + # self-redirect via location if status_headers.statusline.startswith('3'): request_url = wbrequest.wb_url.url.lower() location_url = status_headers.get_header('Location').lower() @@ -156,3 +159,16 @@ class ReplayView: if (UrlRewriter.strip_protocol(request_url) == UrlRewriter.strip_protocol(location_url)): raise CaptureException('Self Redirect: ' + str(cdx)) + def _reject_referrer_self_redirect(self, wbrequest, status_headers): + # at correct timestamp now, but must check for referrer redirect + # indirect self-redirect, via meta-refresh, if referrer is same as current url + if status_headers.statusline.startswith('2'): + # build full url even if using relative-rewriting + request_url = wbrequest.host_prefix + wbrequest.rel_prefix + str(wbrequest.wb_url) + referrer_url = wbrequest.referrer + if (referrer_url and UrlRewriter.strip_protocol(request_url) == UrlRewriter.strip_protocol(referrer_url)): + raise CaptureException('Self Redirect via Referrer: ' + str(wbrequest.wb_url)) + + + + diff --git a/pywb/rewrite/test/test_rewrite_live.py b/pywb/rewrite/test/test_rewrite_live.py index 691bec6d..6d66ce60 100644 --- a/pywb/rewrite/test/test_rewrite_live.py +++ b/pywb/rewrite/test/test_rewrite_live.py @@ -24,9 +24,9 @@ def test_example_2(): -def test_example_3(): - status_headers, buff = get_rewritten('http://archive.org/', urlrewriter) +#def test_example_3(): +# status_headers, buff = get_rewritten('http://archive.org/', urlrewriter) - assert '/pywb/20131226101010/http://example.com/about/terms.php' in buff, buff +# assert '/pywb/20131226101010/http://example.com/about/terms.php' in buff, buff diff --git a/pywb/rewrite/url_rewriter.py b/pywb/rewrite/url_rewriter.py index c4cc4054..6889fc92 100644 --- a/pywb/rewrite/url_rewriter.py +++ b/pywb/rewrite/url_rewriter.py @@ -103,10 +103,12 @@ class UrlRewriter: return self.prefix + self.wburl.to_str(timestamp=timestamp, url=url) - def set_base_url(self, newUrl): self.wburl.url = newUrl + def __repr__(self): + return "UrlRewriter('{0}', '{1}')".format(self.wburl, self.prefix) + @staticmethod def strip_protocol(url): for protocol in UrlRewriter.PROTOCOLS: diff --git a/tests/test_archivalrouter.py b/pywb/test/test_archivalrouter.py similarity index 73% rename from tests/test_archivalrouter.py rename to pywb/test/test_archivalrouter.py index 415626e6..4379fbfd 100644 --- a/tests/test_archivalrouter.py +++ b/pywb/test/test_archivalrouter.py @@ -1,13 +1,19 @@ """ -Test Route -# route with relative path ->>> Route('web', BaseHandler())({'REL_REQUEST_URI': '/web/test.example.com', 'SCRIPT_NAME': ''}, False) -{'wb_url': ('latest_replay', '', '', 'http://test.example.com', 'http://test.example.com'), 'coll': 'web', 'wb_prefix': '/web/', 'request_uri': '/web/test.example.com'} +# Test WbRequest parsed via a Route +# route with relative path, print resulting wbrequest +>>> print_req(Route('web', WbUrlHandler())({'REL_REQUEST_URI': '/web/test.example.com', 'SCRIPT_NAME': ''}, False)) +{'coll': 'web', + 'request_uri': '/web/test.example.com', + 'wb_prefix': '/web/', + 'wb_url': ('latest_replay', '', '', 'http://test.example.com', 'http://test.example.com')} -# route with absolute path, running at script /my_pywb ->>> Route('web', BaseHandler())({'REL_REQUEST_URI': '/web/2013im_/test.example.com', 'SCRIPT_NAME': '/my_pywb', 'HTTP_HOST': 'localhost:8081', 'wsgi.url_scheme': 'https'}, True) -{'wb_url': ('replay', '2013', 'im_', 'http://test.example.com', '2013im_/http://test.example.com'), 'coll': 'web', 'wb_prefix': 'https://localhost:8081/my_pywb/web/', 'request_uri': '/web/2013im_/test.example.com'} +# route with absolute path, running at script /my_pywb, print resultingwbrequest +>>> print_req(Route('web', WbUrlHandler())({'REL_REQUEST_URI': '/web/2013im_/test.example.com', 'SCRIPT_NAME': '/my_pywb', 'HTTP_HOST': 'localhost:8081', 'wsgi.url_scheme': 'https'}, True)) +{'coll': 'web', + 'request_uri': '/web/2013im_/test.example.com', + 'wb_prefix': 'https://localhost:8081/my_pywb/web/', + 'wb_url': ('replay', '2013', 'im_', 'http://test.example.com', '2013im_/http://test.example.com')} # not matching route -- skipped >>> Route('web', BaseHandler())({'REL_REQUEST_URI': '/other/test.example.com', 'SCRIPT_NAME': ''}, False) @@ -65,7 +71,12 @@ False """ from pywb.archivalrouter import Route, ReferRedirect -from pywb.handlers import BaseHandler +from pywb.handlers import BaseHandler, WbUrlHandler +import pprint + +def print_req(req): + varlist = vars(req) + pprint.pprint({k: varlist[k] for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll')}) def _test_redir(match_host, request_uri, referrer, script_name = '', coll = 'coll', http_host = None): @@ -74,7 +85,7 @@ def _test_redir(match_host, request_uri, referrer, script_name = '', coll = 'col if http_host: env['HTTP_HOST'] = http_host - routes = [Route(coll, BaseHandler())] + routes = [Route(coll, WbUrlHandler())] redir = ReferRedirect(match_host) #req = WbRequest.from_uri(request_uri, env) @@ -85,4 +96,6 @@ def _test_redir(match_host, request_uri, referrer, script_name = '', coll = 'col return rep.status_headers.get_header('Location') - +if __name__ == "__main__": + import doctest + doctest.testmod() diff --git a/pywb/test/test_wbrequestresponse.py b/pywb/test/test_wbrequestresponse.py new file mode 100644 index 00000000..600ec926 --- /dev/null +++ b/pywb/test/test_wbrequestresponse.py @@ -0,0 +1,87 @@ +""" +# WbRequest Tests +# ================= +>>> print_req_from_uri('/save/_embed/example.com/?a=b') +{'wb_url': ('latest_replay', '', '', 'http://_embed/example.com/?a=b', 'http://_embed/example.com/?a=b'), 'coll': 'save', 'wb_prefix': '/save/', 'request_uri': '/save/_embed/example.com/?a=b'} + +>>> print_req_from_uri('/2345/20101024101112im_/example.com/?b=c') +{'wb_url': ('replay', '20101024101112', 'im_', 'http://example.com/?b=c', '20101024101112im_/http://example.com/?b=c'), 'coll': '2345', 'wb_prefix': '/2345/', 'request_uri': '/2345/20101024101112im_/example.com/?b=c'} + +>>> print_req_from_uri('/2010/example.com') +{'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '2010', 'wb_prefix': '/2010/', 'request_uri': '/2010/example.com'} + +>>> print_req_from_uri('../example.com') +{'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '', 'wb_prefix': '/', 'request_uri': '../example.com'} + +# Abs path +>>> print_req_from_uri('/2010/example.com', {'wsgi.url_scheme': 'https', 'HTTP_HOST': 'localhost:8080'}, use_abs_prefix = True) +{'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '2010', 'wb_prefix': 'https://localhost:8080/2010/', 'request_uri': '/2010/example.com'} + +# No Scheme, so stick to relative +>>> print_req_from_uri('/2010/example.com', {'HTTP_HOST': 'localhost:8080'}, use_abs_prefix = True) +{'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '2010', 'wb_prefix': '/2010/', 'request_uri': '/2010/example.com'} + + + +# WbResponse Tests +# ================= +>>> WbResponse.text_response('Test') +{'body': ['Test'], 'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [('Content-Type', 'text/plain')])} + +>>> WbResponse.text_stream(['Test', 'Another'], '404') +{'body': ['Test', 'Another'], 'status_headers': StatusAndHeaders(protocol = '', statusline = '404', headers = [('Content-Type', 'text/plain')])} + +>>> WbResponse.redir_response('http://example.com/otherfile') +{'body': [], 'status_headers': StatusAndHeaders(protocol = '', statusline = '302 Redirect', headers = [('Location', 'http://example.com/otherfile')])} + +""" + + +from pywb.rewrite.wburl import WbUrl +from pywb.rewrite.url_rewriter import UrlRewriter +from pywb.utils.statusandheaders import StatusAndHeaders + +from pywb.wbrequestresponse import WbRequest, WbResponse + + +def print_req_from_uri(request_uri, env={}, use_abs_prefix=False): + response = req_from_uri(request_uri, env, use_abs_prefix) + varlist = vars(response) + print str({k: varlist[k] for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll')}) + + +def req_from_uri(request_uri, env={}, use_abs_prefix=False): + if not request_uri: + request_uri = env.get('REL_REQUEST_URI') + + parts = request_uri.split('/', 2) + + # Has coll prefix + if len(parts) == 3: + rel_prefix = '/' + parts[1] + '/' + wb_url_str = parts[2] + coll = parts[1] + # No Coll Prefix + elif len(parts) == 2: + rel_prefix = '/' + wb_url_str = parts[1] + coll = '' + else: + rel_prefix = '/' + wb_url_str = parts[0] + coll = '' + + return WbRequest(env, + request_uri=request_uri, + rel_prefix=rel_prefix, + wb_url_str=wb_url_str, + coll=coll, + wburl_class=WbUrl, + urlrewriter_class=UrlRewriter, + use_abs_prefix=use_abs_prefix) + + +if __name__ == "__main__": + import doctest + doctest.testmod() + diff --git a/pywb/wbrequestresponse.py b/pywb/wbrequestresponse.py index e2715177..4a459c4b 100644 --- a/pywb/wbrequestresponse.py +++ b/pywb/wbrequestresponse.py @@ -1,99 +1,75 @@ -from pywb.rewrite.wburl import WbUrl -from pywb.rewrite.url_rewriter import UrlRewriter from pywb.utils.statusandheaders import StatusAndHeaders - import pprint -#WB Request and Response + +#================================================================= class WbRequest: """ - >>> WbRequest.from_uri('/save/_embed/example.com/?a=b') - {'wb_url': ('latest_replay', '', '', 'http://_embed/example.com/?a=b', 'http://_embed/example.com/?a=b'), 'coll': 'save', 'wb_prefix': '/save/', 'request_uri': '/save/_embed/example.com/?a=b'} + Represents the main pywb request object. - >>> WbRequest.from_uri('/2345/20101024101112im_/example.com/?b=c') - {'wb_url': ('replay', '20101024101112', 'im_', 'http://example.com/?b=c', '20101024101112im_/http://example.com/?b=c'), 'coll': '2345', 'wb_prefix': '/2345/', 'request_uri': '/2345/20101024101112im_/example.com/?b=c'} + Contains various info from wsgi env, add additional info + about the request, such as coll, relative prefix, + host prefix, absolute prefix. - >>> WbRequest.from_uri('/2010/example.com') - {'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '2010', 'wb_prefix': '/2010/', 'request_uri': '/2010/example.com'} - - >>> WbRequest.from_uri('../example.com') - {'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '', 'wb_prefix': '/', 'request_uri': '../example.com'} - - # Abs path - >>> WbRequest.from_uri('/2010/example.com', {'wsgi.url_scheme': 'https', 'HTTP_HOST': 'localhost:8080'}, use_abs_prefix = True) - {'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '2010', 'wb_prefix': 'https://localhost:8080/2010/', 'request_uri': '/2010/example.com'} - - # No Scheme, so stick to relative - >>> WbRequest.from_uri('/2010/example.com', {'HTTP_HOST': 'localhost:8080'}, use_abs_prefix = True) - {'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '2010', 'wb_prefix': '/2010/', 'request_uri': '/2010/example.com'} + If a wburl and url rewriter classes are specified, the class + also contains the url rewriter. """ - - @staticmethod - def from_uri(request_uri, env = {}, use_abs_prefix = False): - if not request_uri: - request_uri = env.get('REL_REQUEST_URI') - - parts = request_uri.split('/', 2) - - # Has coll prefix - if len(parts) == 3: - wb_prefix = '/' + parts[1] + '/' - wb_url_str = parts[2] - coll = parts[1] - # No Coll Prefix - elif len(parts) == 2: - wb_prefix = '/' - wb_url_str = parts[1] - coll = '' - else: - wb_prefix = '/' - wb_url_str = parts[0] - coll = '' - - host_prefix = WbRequest.make_host_prefix(env) if use_abs_prefix else '' - - return WbRequest(env, request_uri, wb_prefix, wb_url_str, coll, host_prefix = host_prefix) - - @staticmethod def make_host_prefix(env): try: - return env['wsgi.url_scheme'] + '://' + env['HTTP_HOST'] + host = env.get('HTTP_HOST') + if not host: + host = env['SERVER_NAME'] + ':' + env['SERVER_PORT'] + + return env['wsgi.url_scheme'] + '://' + host except KeyError: return '' - def __init__(self, env, request_uri, wb_prefix, wb_url_str, coll, - host_prefix = '', - wburl_class = WbUrl, - url_rewriter_class = UrlRewriter, - is_proxy = False): + def __init__(self, env, + request_uri=None, + rel_prefix='', + wb_url_str='/', + coll='', + host_prefix='', + use_abs_prefix=False, + wburl_class=None, + urlrewriter_class=None, + is_proxy=False): self.env = env self.request_uri = request_uri if request_uri else env.get('REL_REQUEST_URI') - self.host_prefix = host_prefix + self.coll = coll + + if not host_prefix: + host_prefix = self.make_host_prefix(env) + + self.host_prefix = host_prefix + self.rel_prefix = rel_prefix + + if use_abs_prefix: + self.wb_prefix = host_prefix + rel_prefix + else: + self.wb_prefix = rel_prefix - self.wb_prefix = host_prefix + wb_prefix if not wb_url_str: wb_url_str = '/' + self.wb_url_str = wb_url_str + # wb_url present and not root page if wb_url_str != '/' and wburl_class: - self.wb_url_str = wb_url_str self.wb_url = wburl_class(wb_url_str) - self.urlrewriter = url_rewriter_class(self.wb_url, self.wb_prefix) + self.urlrewriter = urlrewriter_class(self.wb_url, self.wb_prefix) else: # no wb_url, just store blank wb_url - self.wb_url_str = wb_url_str self.wb_url = None self.urlrewriter = None - self.coll = coll - self.referrer = env.get('HTTP_REFERER') self.is_ajax = self._is_ajax() @@ -122,24 +98,19 @@ class WbRequest: def __repr__(self): - #return "WbRequest(env, '" + (self.wb_url) + "', '" + (self.coll) + "')" - #return str(vars(self)) varlist = vars(self) - return str({k: varlist[k] for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll')}) + varstr = pprint.pformat(varlist) + return varstr +#================================================================= class WbResponse: """ - >>> WbResponse.text_response('Test') - {'body': ['Test'], 'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [('Content-Type', 'text/plain')])} + Represnts a pywb wsgi response object. - >>> WbResponse.text_stream(['Test', 'Another'], '404') - {'body': ['Test', 'Another'], 'status_headers': StatusAndHeaders(protocol = '', statusline = '404', headers = [('Content-Type', 'text/plain')])} - - >>> WbResponse.redir_response('http://example.com/otherfile') - {'body': [], 'status_headers': StatusAndHeaders(protocol = '', statusline = '302 Redirect', headers = [('Location', 'http://example.com/otherfile')])} + Holds a status_headers object and a response iter, to be + returned to wsgi container. """ - def __init__(self, status_headers, value = []): self.status_headers = status_headers self.body = value @@ -180,8 +151,3 @@ class WbResponse: def __repr__(self): return str(vars(self)) - -if __name__ == "__main__": - import doctest - doctest.testmod() - diff --git a/tests/test_integration.py b/tests/test_integration.py index f8e614cc..1a7a943c 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -124,6 +124,20 @@ class TestWb: assert resp.content_type == 'text/css' + def test_referrer_self_redirect(self): + uri = '/pywb/20140127171239/http://www.iana.org/_css/2013.1/screen.css' + host = 'somehost:8082' + referrer = 'http://' + host + uri + + # capture is normally a 200 + resp = self.testapp.get(uri) + assert resp.status_int == 200 + + # redirect causes skip of this capture, redirect to next + resp = self.testapp.get(uri, headers = [('Referer', referrer), ('Host', host)], status = 302) + assert resp.status_int == 302 + + def test_excluded_content(self): resp = self.testapp.get('/pywb/http://www.iana.org/_img/bookmark_icon.ico', status = 403) assert resp.status_int == 403