1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-14 15:53:28 +01:00

Misc fixes for 2.3.2 release (#490)

* misc fixes:
- ensure SCRIPT_NAME is never empty, fixes #466
- static: if ending in '/' look for '/index.html'
- tests: use local httpbin instead of iana.org tests
- docker: switch to $VOLUME_DIR before initing collection
- ensure static_prefix is set correctly after host prefix
- bump version to 2.3.2.dev0

* rules update: fix fuzzy matching, rewriting rules for soundcloud
This commit is contained in:
Ilya Kreymer 2019-07-24 10:47:17 -07:00 committed by GitHub
parent d4518ae557
commit 837894a07f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 36 additions and 13 deletions

View File

@ -29,6 +29,7 @@ if [ "$MY_GID" != "$VOLUME_GID" ] || [ "$MY_UID" != "$VOLUME_UID" ]; then
else else
# initialize a collection if defined and not present # initialize a collection if defined and not present
if [ -n "$INIT_COLLECTION" ] && [ ! -d $VOLUME_DIR/collections/$INIT_COLLECTION ]; then if [ -n "$INIT_COLLECTION" ] && [ ! -d $VOLUME_DIR/collections/$INIT_COLLECTION ]; then
cd $VOLUME_DIR
wb-manager init $INIT_COLLECTION wb-manager init $INIT_COLLECTION
fi fi

View File

@ -286,7 +286,7 @@ class FrontEndApp(object):
view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html') view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html')
wb_prefix = environ.get('SCRIPT_NAME') wb_prefix = environ.get('SCRIPT_NAME', '')
if wb_prefix: if wb_prefix:
wb_prefix += '/' wb_prefix += '/'
@ -494,7 +494,7 @@ class FrontEndApp(object):
try: try:
endpoint, args = urls.match() endpoint, args = urls.match()
# store original script_name (original prefix) before modifications are made # store original script_name (original prefix) before modifications are made
environ['pywb.app_prefix'] = environ.get('SCRIPT_NAME') environ['pywb.app_prefix'] = environ.get('SCRIPT_NAME', '')
response = endpoint(environ, **args) response = endpoint(environ, **args)
return response(environ, start_response) return response(environ, start_response)

View File

@ -221,7 +221,8 @@ class RewriterApp(object):
host_prefix = self.get_host_prefix(environ) host_prefix = self.get_host_prefix(environ)
rel_prefix = self.get_rel_prefix(environ) rel_prefix = self.get_rel_prefix(environ)
full_prefix = host_prefix + rel_prefix full_prefix = host_prefix + rel_prefix
pywb_static_prefix = environ.get('pywb.host_prefix', '') + environ.get('pywb.app_prefix', '') + environ.get( environ['pywb.host_prefix'] = host_prefix
pywb_static_prefix = host_prefix + environ.get('pywb.app_prefix', '') + environ.get(
'pywb.static_prefix', '/static/') 'pywb.static_prefix', '/static/')
is_proxy = ('wsgiprox.proxy_host' in environ) is_proxy = ('wsgiprox.proxy_host' in environ)
@ -254,8 +255,6 @@ class RewriterApp(object):
urlkey = canonicalize(wb_url.url) urlkey = canonicalize(wb_url.url)
environ['pywb.host_prefix'] = host_prefix
if self.use_js_obj_proxy: if self.use_js_obj_proxy:
content_rw = self.js_proxy_rw content_rw = self.js_proxy_rw
else: else:

View File

@ -20,6 +20,9 @@ class StaticHandler(object):
def __call__(self, environ, url_str): def __call__(self, environ, url_str):
url = url_str.split('?')[0] url = url_str.split('?')[0]
if url.endswith('/'):
url += 'index.html'
full_path = environ.get('pywb.static_dir') full_path = environ.get('pywb.static_dir')
if full_path: if full_path:
full_path = os.path.join(full_path, url) full_path = os.path.join(full_path, url)

View File

@ -278,6 +278,10 @@ rules:
# soundcloud # soundcloud
#================================================================= #=================================================================
- url_prefix: 'com,sndcdn,cf-media)/'
fuzzy_lookup: '()'
- url_prefix: 'com,soundcloud,api)/i1/tracks/' - url_prefix: 'com,soundcloud,api)/i1/tracks/'
rewrite: rewrite:
@ -287,6 +291,15 @@ rules:
replace: '"__hls' replace: '"__hls'
- url_prefix: 'com,soundcloud,api-v2)/'
rewrite:
live_only: true
js_regexs:
- match: 'hls'
replace: 'mp3'
# vimeo rules # vimeo rules
#================================================================= #=================================================================

View File

@ -1,4 +1,4 @@
__version__ = '2.3.1' __version__ = '2.3.2.dev0'
if __name__ == '__main__': if __name__ == '__main__':
print(__version__) print(__version__)

View File

@ -171,6 +171,7 @@ class HttpBinLiveTests(object):
cls.httpbin_server = GeventServer(httpbin_app) cls.httpbin_server = GeventServer(httpbin_app)
httpbin_local = 'http://localhost:' + str(cls.httpbin_server.port) + '/' httpbin_local = 'http://localhost:' + str(cls.httpbin_server.port) + '/'
cls.httpbin_local = httpbin_local
def get_load_url(self, params): def get_load_url(self, params):
params['url'] = params['url'].replace('http://test.httpbin.org/', httpbin_local) params['url'] = params['url'].replace('http://test.httpbin.org/', httpbin_local)
@ -181,6 +182,10 @@ class HttpBinLiveTests(object):
cls.indexmock = patch('pywb.warcserver.index.indexsource.LiveIndexSource.get_load_url', get_load_url) cls.indexmock = patch('pywb.warcserver.index.indexsource.LiveIndexSource.get_load_url', get_load_url)
cls.indexmock.start() cls.indexmock.start()
@classmethod
def get_httpbin_url(cls, url):
return url.replace(cls.httpbin_local, 'http://httpbin.org/')
@classmethod @classmethod
def teardown_class(cls): def teardown_class(cls):
cls.indexmock.stop() cls.indexmock.stop()

View File

@ -1,39 +1,41 @@
from .base_config_test import BaseConfigTest, fmod from .base_config_test import BaseConfigTest, fmod
from pywb.warcserver.test.testutils import HttpBinLiveTests
# ============================================================================ # ============================================================================
class TestRootColl(BaseConfigTest): class TestRootColl(HttpBinLiveTests, BaseConfigTest):
@classmethod @classmethod
def setup_class(cls): def setup_class(cls):
super(TestRootColl, cls).setup_class('config_test_root_coll.yaml') super(TestRootColl, cls).setup_class('config_test_root_coll.yaml')
def test_root_replay_ts(self, fmod): def test_root_replay_ts(self, fmod):
resp = self.get('/20140127171238{0}/http://www.iana.org/', fmod) resp = self.get('/20140127171238{0}/http://httpbin.org/base64/PGh0bWw+PGJvZHk+PGEgaHJlZj0iL3Rlc3QvcGF0aCI+VGVzdCBVUkw8L2E+PC9ib2R5PjwvaHRtbD4=', fmod)
# Body # Body
assert '"20140127171238"' in resp.text assert '"20140127171238"' in resp.text
assert 'wombat.js' in resp.text assert 'wombat.js' in resp.text
assert 'WBWombatInit' in resp.text, resp.text assert 'WBWombatInit' in resp.text, resp.text
assert 'wbinfo.enable_auto_fetch = true;' in resp.text, resp.text assert 'wbinfo.enable_auto_fetch = true;' in resp.text, resp.text
assert '/20140127171238{0}/http://www.iana.org/time-zones"'.format(fmod) in resp.text assert '/20140127171238{0}/http://httpbin.org/test/path"'.format(fmod) in resp.text
def test_root_replay_no_ts(self, fmod): def test_root_replay_no_ts(self, fmod):
fmod_slash = fmod + '/' if fmod else '' fmod_slash = fmod + '/' if fmod else ''
resp = self.get('/{0}http://www.iana.org/', fmod_slash) resp = self.get('/{0}http://httpbin.org/base64/PGh0bWw+PGJvZHk+PGEgaHJlZj0iL3Rlc3QvcGF0aCI+VGVzdCBVUkw8L2E+PC9ib2R5PjwvaHRtbD4=', fmod_slash)
# Body # Body
assert 'request_ts = ""' in resp.text assert 'request_ts = ""' in resp.text
assert 'wombat.js' in resp.text assert 'wombat.js' in resp.text
assert 'WBWombatInit' in resp.text, resp.text assert 'WBWombatInit' in resp.text, resp.text
assert 'wbinfo.enable_auto_fetch = true;' in resp.text, resp.text assert 'wbinfo.enable_auto_fetch = true;' in resp.text, resp.text
assert '/{0}http://www.iana.org/time-zones"'.format(fmod_slash) in resp.text assert '/{0}http://httpbin.org/test/path"'.format(fmod_slash) in resp.text
def test_root_replay_redir(self, fmod): def test_root_replay_redir(self, fmod):
resp = self.get('/20140128051539{0}/http://www.iana.org/domains/example', fmod) resp = self.get('/20140128051539{0}/http://httpbin.org/redirect-to?url=http://httpbin.org/get', fmod)
assert resp.status_int in (301, 302) assert resp.status_int in (301, 302)
assert resp.headers['Location'] == 'http://localhost:80/20140128051539{0}/https://www.iana.org/domains/reserved'.format(fmod) location = self.get_httpbin_url(resp.headers['Location'])
assert location == 'http://localhost:80/20140128051539{0}/http://httpbin.org/get'.format(fmod)
def test_root_home_search(self): def test_root_home_search(self):
resp = self.testapp.get('/') resp = self.testapp.get('/')