1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

trailing slash fix: add trailing slash, preserving query, if no slash present after hostname (#211)

This commit is contained in:
Ilya Kreymer 2017-04-04 18:10:49 -07:00
parent 7ca5795976
commit f593b5f80f
3 changed files with 36 additions and 19 deletions

View File

@ -23,7 +23,7 @@ from pywb.framework.wbrequestresponse import WbResponse
from pywb.webagg.utils import MementoUtils, buffer_iter
from werkzeug.http import HTTP_STATUS_CODES
from six.moves.urllib.parse import urlencode
from six.moves.urllib.parse import urlencode, urlsplit, urlunsplit
from pywb.urlrewrite.rewriteinputreq import RewriteInputRequest
from pywb.urlrewrite.templateview import JinjaEnv, HeadInsertView, TopFrameView, BaseInsertView
@ -125,9 +125,13 @@ class RewriterApp(object):
full_prefix=full_prefix,
rel_prefix=rel_prefix)
scheme_inx = wb_url.url.find('//')
if wb_url.url.find('/', scheme_inx + 2) < 0:
return WbResponse.redir_response(urlrewriter.rewrite(wb_url.url + '/'))
url_parts = urlsplit(wb_url.url)
if not url_parts.path:
scheme, netloc, path, query, frag = url_parts
path = '/'
url = urlunsplit((scheme, netloc, path, query, frag))
return WbResponse.redir_response(urlrewriter.rewrite(url),
'307 Temporary Redirect')
self.unrewrite_referrer(environ)
@ -211,7 +215,8 @@ class RewriterApp(object):
cdx['url'] = target_uri
if target_uri != wb_url.url and r.headers.get('WebAgg-Fuzzy-Match') == '1':
return WbResponse.redir_response(urlrewriter.rewrite(target_uri))
return WbResponse.redir_response(urlrewriter.rewrite(target_uri),
'307 Temporary Redirect')
self._add_custom_params(cdx, r.headers, kwargs)

View File

@ -106,7 +106,7 @@ class TestWbIntegration(BaseConfigTest):
def test_replay_fuzzy_1(self):
resp = self.testapp.get('/pywb/20140127171238mp_/http://www.iana.org/?_=123')
assert resp.status_int == 302
assert resp.status_int == 307
assert resp.headers['Location'].endswith('/pywb/20140127171238mp_/http://www.iana.org/')
def test_replay_no_fuzzy_match(self):
@ -121,8 +121,18 @@ class TestWbIntegration(BaseConfigTest):
# assert 'wb.js' in resp.text
# assert '/pywb-nosurt/20140103030321/http://www.iana.org/domains/example' in resp.text
def test_no_slash_redir_1(self):
resp = self.testapp.get('/pywb/20140103030321mp_/http://example.com')
assert resp.status_int == 307
assert resp.headers['Location'].endswith('/pywb/20140103030321mp_/http://example.com/')
def test_no_slash_redir_2(self):
resp = self.testapp.get('/pywb/20140103030321mp_/http://example.com?example=1')
assert resp.status_int == 307
assert resp.headers['Location'].endswith('/pywb/20140103030321mp_/http://example.com/?example=1')
def test_replay_cdxj(self):
resp = self.testapp.get('/pywb-cdxj/20140103030321mp_/http://example.com?example=1')
resp = self.testapp.get('/pywb-cdxj/20140103030321mp_/http://example.com/?example=1')
self._assert_basic_html(resp)
assert '"20140103030321"' in resp.text
@ -130,7 +140,7 @@ class TestWbIntegration(BaseConfigTest):
assert '/pywb-cdxj/20140103030321mp_/http://www.iana.org/domains/example' in resp.text
def test_replay_cdxj_revisit(self):
resp = self.testapp.get('/pywb-cdxj/20140103030341mp_/http://example.com?example=1')
resp = self.testapp.get('/pywb-cdxj/20140103030341mp_/http://example.com/?example=1')
self._assert_basic_html(resp)
assert '"20140103030341"' in resp.text
@ -138,7 +148,7 @@ class TestWbIntegration(BaseConfigTest):
assert '/pywb-cdxj/20140103030341mp_/http://www.iana.org/domains/example' in resp.text
def test_zero_len_revisit(self):
resp = self.testapp.get('/pywb/20140603030341mp_/http://example.com?example=2')
resp = self.testapp.get('/pywb/20140603030341mp_/http://example.com/?example=2')
self._assert_basic_html(resp)
assert '"20140603030341"' in resp.text
@ -181,7 +191,7 @@ class TestWbIntegration(BaseConfigTest):
assert '"/_css/2013.1/screen.css"' in resp.text
def test_replay_identity_1(self):
resp = self.testapp.get('/pywb/20140127171251id_/http://example.com')
resp = self.testapp.get('/pywb/20140127171251id_/http://example.com/')
# no wb header insertion
assert 'wb.js' not in resp.text
@ -235,7 +245,7 @@ class TestWbIntegration(BaseConfigTest):
assert resp.content_length == 0
def test_replay_identity_2_arcgz(self):
resp = self.testapp.get('/pywb/20140216050221id_/http://arc.gz.test.example.com')
resp = self.testapp.get('/pywb/20140216050221id_/http://arc.gz.test.example.com/')
# no wb header insertion
assert 'wb.js' not in resp.text
@ -244,7 +254,7 @@ class TestWbIntegration(BaseConfigTest):
assert '"http://www.iana.org/domains/example"' in resp.text
def test_replay_identity_2_arc(self):
resp = self.testapp.get('/pywb/20140216050221id_/http://arc.test.example.com')
resp = self.testapp.get('/pywb/20140216050221id_/http://arc.test.example.com/')
# no wb header insertion
assert 'wb.js' not in resp.text
@ -350,21 +360,21 @@ class TestWbIntegration(BaseConfigTest):
# assert resp.status_int == 302
def test_not_existant_warc_other_capture(self):
resp = self.testapp.get('/pywb/20140703030321mp_/http://example.com?example=2')
resp = self.testapp.get('/pywb/20140703030321mp_/http://example.com/?example=2')
assert resp.status_int == 200
assert resp.headers['Content-Location'].endswith('/pywb/20140603030341mp_/http://example.com?example=2')
def test_missing_revisit_other_capture(self):
resp = self.testapp.get('/pywb/20140603030351mp_/http://example.com?example=2')
resp = self.testapp.get('/pywb/20140603030351mp_/http://example.com/?example=2')
assert resp.status_int == 200
assert resp.headers['Content-Location'].endswith('/pywb/20140603030341mp_/http://example.com?example=2')
def test_not_existant_warc_no_other(self):
resp = self.testapp.get('/pywb/20140703030321mp_/http://example.com?example=3', status=503)
resp = self.testapp.get('/pywb/20140703030321mp_/http://example.com/?example=3', status=503)
assert resp.status_int == 503
def test_missing_revisit_no_other(self):
resp = self.testapp.get('/pywb/20140603030351mp_/http://example.com?example=3', status=503)
resp = self.testapp.get('/pywb/20140603030351mp_/http://example.com/?example=3', status=503)
assert resp.status_int == 503
def test_live_frame(self):
@ -429,7 +439,7 @@ class TestWbIntegration(BaseConfigTest):
assert 'Excluded' in resp.text
def test_replay_not_found(self):
resp = self.testapp.head('/pywb/mp_/http://not-exist.example.com', status=404)
resp = self.testapp.head('/pywb/mp_/http://not-exist.example.com/', status=404)
assert resp.content_type == 'text/html'
assert resp.status_int == 404

View File

@ -32,11 +32,13 @@ class TestLiveRewriter(BaseConfigTest):
assert 'src="http://localhost:80/live/mp_/http://example.com/"' in resp.text, resp.text
def test_live_invalid(self):
resp = self.testapp.get('/live/mp_/http://abcdef', status=400)
resp = self.testapp.get('/live/mp_/http://abcdef', status=307)
resp = resp.follow(status=400)
assert resp.status_int == 400
def test_live_invalid_2(self):
resp = self.testapp.get('/live/mp_/@#$@#$', status=400)
resp = self.testapp.get('/live/mp_/@#$@#$', status=307)
resp = resp.follow(status=400)
assert resp.status_int == 400
def test_live_video_info(self):