From 837894a07f2e93142aa5da33ccfa29d08dede9dd Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 24 Jul 2019 10:47:17 -0700 Subject: [PATCH] Misc fixes for 2.3.2 release (#490) * misc fixes: - ensure SCRIPT_NAME is never empty, fixes #466 - static: if ending in '/' look for '/index.html' - tests: use local httpbin instead of iana.org tests - docker: switch to $VOLUME_DIR before initing collection - ensure static_prefix is set correctly after host prefix - bump version to 2.3.2.dev0 * rules update: fix fuzzy matching, rewriting rules for soundcloud --- docker-entrypoint.sh | 1 + pywb/apps/frontendapp.py | 4 ++-- pywb/apps/rewriterapp.py | 5 ++--- pywb/apps/static_handler.py | 3 +++ pywb/rules.yaml | 13 +++++++++++++ pywb/version.py | 2 +- pywb/warcserver/test/testutils.py | 5 +++++ tests/test_root_coll.py | 16 +++++++++------- 8 files changed, 36 insertions(+), 13 deletions(-) diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index d669b9ad..787dd13f 100755 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -29,6 +29,7 @@ if [ "$MY_GID" != "$VOLUME_GID" ] || [ "$MY_UID" != "$VOLUME_UID" ]; then else # initialize a collection if defined and not present if [ -n "$INIT_COLLECTION" ] && [ ! -d $VOLUME_DIR/collections/$INIT_COLLECTION ]; then + cd $VOLUME_DIR wb-manager init $INIT_COLLECTION fi diff --git a/pywb/apps/frontendapp.py b/pywb/apps/frontendapp.py index 9626a07f..d3614a37 100644 --- a/pywb/apps/frontendapp.py +++ b/pywb/apps/frontendapp.py @@ -286,7 +286,7 @@ class FrontEndApp(object): view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html') - wb_prefix = environ.get('SCRIPT_NAME') + wb_prefix = environ.get('SCRIPT_NAME', '') if wb_prefix: wb_prefix += '/' @@ -494,7 +494,7 @@ class FrontEndApp(object): try: endpoint, args = urls.match() # store original script_name (original prefix) before modifications are made - environ['pywb.app_prefix'] = environ.get('SCRIPT_NAME') + environ['pywb.app_prefix'] = environ.get('SCRIPT_NAME', '') response = endpoint(environ, **args) return response(environ, start_response) diff --git a/pywb/apps/rewriterapp.py b/pywb/apps/rewriterapp.py index c22bcbfc..d040925c 100644 --- a/pywb/apps/rewriterapp.py +++ b/pywb/apps/rewriterapp.py @@ -221,7 +221,8 @@ class RewriterApp(object): host_prefix = self.get_host_prefix(environ) rel_prefix = self.get_rel_prefix(environ) full_prefix = host_prefix + rel_prefix - pywb_static_prefix = environ.get('pywb.host_prefix', '') + environ.get('pywb.app_prefix', '') + environ.get( + environ['pywb.host_prefix'] = host_prefix + pywb_static_prefix = host_prefix + environ.get('pywb.app_prefix', '') + environ.get( 'pywb.static_prefix', '/static/') is_proxy = ('wsgiprox.proxy_host' in environ) @@ -254,8 +255,6 @@ class RewriterApp(object): urlkey = canonicalize(wb_url.url) - environ['pywb.host_prefix'] = host_prefix - if self.use_js_obj_proxy: content_rw = self.js_proxy_rw else: diff --git a/pywb/apps/static_handler.py b/pywb/apps/static_handler.py index 2ade7aec..c4fcc62b 100644 --- a/pywb/apps/static_handler.py +++ b/pywb/apps/static_handler.py @@ -20,6 +20,9 @@ class StaticHandler(object): def __call__(self, environ, url_str): url = url_str.split('?')[0] + if url.endswith('/'): + url += 'index.html' + full_path = environ.get('pywb.static_dir') if full_path: full_path = os.path.join(full_path, url) diff --git a/pywb/rules.yaml b/pywb/rules.yaml index 53fc531d..d89f2549 100644 --- a/pywb/rules.yaml +++ b/pywb/rules.yaml @@ -278,6 +278,10 @@ rules: # soundcloud #================================================================= + - url_prefix: 'com,sndcdn,cf-media)/' + + fuzzy_lookup: '()' + - url_prefix: 'com,soundcloud,api)/i1/tracks/' rewrite: @@ -287,6 +291,15 @@ rules: replace: '"__hls' + - url_prefix: 'com,soundcloud,api-v2)/' + + rewrite: + live_only: true + js_regexs: + - match: 'hls' + replace: 'mp3' + + # vimeo rules #================================================================= diff --git a/pywb/version.py b/pywb/version.py index f92e3330..780f81d6 100644 --- a/pywb/version.py +++ b/pywb/version.py @@ -1,4 +1,4 @@ -__version__ = '2.3.1' +__version__ = '2.3.2.dev0' if __name__ == '__main__': print(__version__) diff --git a/pywb/warcserver/test/testutils.py b/pywb/warcserver/test/testutils.py index dbf97650..37b956ea 100644 --- a/pywb/warcserver/test/testutils.py +++ b/pywb/warcserver/test/testutils.py @@ -171,6 +171,7 @@ class HttpBinLiveTests(object): cls.httpbin_server = GeventServer(httpbin_app) httpbin_local = 'http://localhost:' + str(cls.httpbin_server.port) + '/' + cls.httpbin_local = httpbin_local def get_load_url(self, params): params['url'] = params['url'].replace('http://test.httpbin.org/', httpbin_local) @@ -181,6 +182,10 @@ class HttpBinLiveTests(object): cls.indexmock = patch('pywb.warcserver.index.indexsource.LiveIndexSource.get_load_url', get_load_url) cls.indexmock.start() + @classmethod + def get_httpbin_url(cls, url): + return url.replace(cls.httpbin_local, 'http://httpbin.org/') + @classmethod def teardown_class(cls): cls.indexmock.stop() diff --git a/tests/test_root_coll.py b/tests/test_root_coll.py index 2fab9db5..3f843a26 100644 --- a/tests/test_root_coll.py +++ b/tests/test_root_coll.py @@ -1,39 +1,41 @@ from .base_config_test import BaseConfigTest, fmod +from pywb.warcserver.test.testutils import HttpBinLiveTests # ============================================================================ -class TestRootColl(BaseConfigTest): +class TestRootColl(HttpBinLiveTests, BaseConfigTest): @classmethod def setup_class(cls): super(TestRootColl, cls).setup_class('config_test_root_coll.yaml') def test_root_replay_ts(self, fmod): - resp = self.get('/20140127171238{0}/http://www.iana.org/', fmod) + resp = self.get('/20140127171238{0}/http://httpbin.org/base64/PGh0bWw+PGJvZHk+PGEgaHJlZj0iL3Rlc3QvcGF0aCI+VGVzdCBVUkw8L2E+PC9ib2R5PjwvaHRtbD4=', fmod) # Body assert '"20140127171238"' in resp.text assert 'wombat.js' in resp.text assert 'WBWombatInit' in resp.text, resp.text assert 'wbinfo.enable_auto_fetch = true;' in resp.text, resp.text - assert '/20140127171238{0}/http://www.iana.org/time-zones"'.format(fmod) in resp.text + assert '/20140127171238{0}/http://httpbin.org/test/path"'.format(fmod) in resp.text def test_root_replay_no_ts(self, fmod): fmod_slash = fmod + '/' if fmod else '' - resp = self.get('/{0}http://www.iana.org/', fmod_slash) + resp = self.get('/{0}http://httpbin.org/base64/PGh0bWw+PGJvZHk+PGEgaHJlZj0iL3Rlc3QvcGF0aCI+VGVzdCBVUkw8L2E+PC9ib2R5PjwvaHRtbD4=', fmod_slash) # Body assert 'request_ts = ""' in resp.text assert 'wombat.js' in resp.text assert 'WBWombatInit' in resp.text, resp.text assert 'wbinfo.enable_auto_fetch = true;' in resp.text, resp.text - assert '/{0}http://www.iana.org/time-zones"'.format(fmod_slash) in resp.text + assert '/{0}http://httpbin.org/test/path"'.format(fmod_slash) in resp.text def test_root_replay_redir(self, fmod): - resp = self.get('/20140128051539{0}/http://www.iana.org/domains/example', fmod) + resp = self.get('/20140128051539{0}/http://httpbin.org/redirect-to?url=http://httpbin.org/get', fmod) assert resp.status_int in (301, 302) - assert resp.headers['Location'] == 'http://localhost:80/20140128051539{0}/https://www.iana.org/domains/reserved'.format(fmod) + location = self.get_httpbin_url(resp.headers['Location']) + assert location == 'http://localhost:80/20140128051539{0}/http://httpbin.org/get'.format(fmod) def test_root_home_search(self): resp = self.testapp.get('/')