mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Add ir_ modifier (#759)
* rewrite: add 'ir_' mod to support header only url-rewriting with no content rewriting * tests: add tests for ir_ to test that content is identical to id_, but Location headers are rewritten with ir_ modifier.
This commit is contained in:
parent
8ef4ff102d
commit
1fddec216d
@ -524,7 +524,7 @@ class RewriteInfo(object):
|
|||||||
if not self.text_type:
|
if not self.text_type:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if self.url_rewriter.wburl.mod == 'id_':
|
if self.is_identity():
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if self.url_rewriter.rewrite_opts.get('is_ajax'):
|
if self.url_rewriter.rewrite_opts.get('is_ajax'):
|
||||||
@ -537,9 +537,11 @@ class RewriteInfo(object):
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def is_identity(self):
|
||||||
|
return self.url_rewriter.wburl.mod in ('id_', 'ir_')
|
||||||
|
|
||||||
def is_url_rw(self):
|
def is_url_rw(self):
|
||||||
if self.url_rewriter.wburl.mod in ('id_', 'bn_', 'wkrf_'):
|
if self.url_rewriter.wburl.mod in ('id_', 'bn_', 'wkrf_'):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -102,6 +102,7 @@ class DefaultRewriter(BaseContentRewriter):
|
|||||||
super(DefaultRewriter, self).__init__(rules_file, replay_mod)
|
super(DefaultRewriter, self).__init__(rules_file, replay_mod)
|
||||||
self.all_rewriters = copy.copy(self.DEFAULT_REWRITERS)
|
self.all_rewriters = copy.copy(self.DEFAULT_REWRITERS)
|
||||||
|
|
||||||
|
self.add_prefer_mod('raw', 'ir_')
|
||||||
self.add_prefer_mod('raw', 'id_')
|
self.add_prefer_mod('raw', 'id_')
|
||||||
self.add_prefer_mod('banner-only', 'bn_')
|
self.add_prefer_mod('banner-only', 'bn_')
|
||||||
self.add_prefer_mod('rewritten', replay_mod)
|
self.add_prefer_mod('rewritten', replay_mod)
|
||||||
|
@ -138,7 +138,19 @@ class TestWbIntegration(BaseConfigTest):
|
|||||||
|
|
||||||
def test_replay_redirect(self, fmod):
|
def test_replay_redirect(self, fmod):
|
||||||
resp = self.get('/pywb/2014{0}/http://www.iana.org/domains/example', fmod)
|
resp = self.get('/pywb/2014{0}/http://www.iana.org/domains/example', fmod)
|
||||||
assert resp.headers['Location'].startswith('/pywb/2014{0}/'.format(fmod))
|
assert resp.headers['Location'] == '/pywb/2014{0}/http://www.iana.org/domains/reserved'.format(fmod)
|
||||||
|
assert resp.status_code == 302
|
||||||
|
|
||||||
|
def test_replay_redirect_id(self):
|
||||||
|
resp = self.get('/pywb/2014id_/http://www.iana.org/domains/example', fmod)
|
||||||
|
print(resp.headers['Location'])
|
||||||
|
assert resp.headers['Location'] == '/domains/reserved'
|
||||||
|
assert resp.status_code == 302
|
||||||
|
|
||||||
|
def test_replay_redirect_ir(self):
|
||||||
|
resp = self.get('/pywb/2014ir_/http://www.iana.org/domains/example', fmod)
|
||||||
|
print(resp.headers['Location'])
|
||||||
|
assert resp.headers['Location'] == '/pywb/2014ir_/http://www.iana.org/domains/reserved'
|
||||||
assert resp.status_code == 302
|
assert resp.status_code == 302
|
||||||
|
|
||||||
def test_replay_fuzzy_1(self, fmod):
|
def test_replay_fuzzy_1(self, fmod):
|
||||||
@ -224,6 +236,17 @@ class TestWbIntegration(BaseConfigTest):
|
|||||||
# original unrewritten url present
|
# original unrewritten url present
|
||||||
assert '"http://www.iana.org/domains/example"' in resp.text
|
assert '"http://www.iana.org/domains/example"' in resp.text
|
||||||
|
|
||||||
|
def test_replay_identity_1_ir(self):
|
||||||
|
resp = self.testapp.get('/pywb/20140127171251ir_/http://example.com/')
|
||||||
|
|
||||||
|
# no wb header insertion
|
||||||
|
assert 'wombat.js' not in resp.text
|
||||||
|
|
||||||
|
assert resp.content_length == 1270, resp.content_length
|
||||||
|
|
||||||
|
# original unrewritten url present
|
||||||
|
assert '"http://www.iana.org/domains/example"' in resp.text
|
||||||
|
|
||||||
def test_replay_identity_2_arcgz(self):
|
def test_replay_identity_2_arcgz(self):
|
||||||
resp = self.testapp.get('/pywb/20140216050221id_/http://arc.gz.test.example.com/')
|
resp = self.testapp.get('/pywb/20140216050221id_/http://arc.gz.test.example.com/')
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user