1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Add ir_ modifier (#759)

* rewrite: add 'ir_' mod to support header only url-rewriting with no content rewriting
* tests: add tests for ir_ to test that content is identical to id_, but Location headers are rewritten with ir_ modifier.
This commit is contained in:
Ilya Kreymer 2022-08-31 18:49:45 -07:00 committed by GitHub
parent 8ef4ff102d
commit 1fddec216d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 29 additions and 3 deletions

View File

@ -524,7 +524,7 @@ class RewriteInfo(object):
if not self.text_type: if not self.text_type:
return False return False
if self.url_rewriter.wburl.mod == 'id_': if self.is_identity():
return False return False
if self.url_rewriter.rewrite_opts.get('is_ajax'): if self.url_rewriter.rewrite_opts.get('is_ajax'):
@ -537,9 +537,11 @@ class RewriteInfo(object):
return True return True
def is_identity(self):
return self.url_rewriter.wburl.mod in ('id_', 'ir_')
def is_url_rw(self): def is_url_rw(self):
if self.url_rewriter.wburl.mod in ('id_', 'bn_', 'wkrf_'): if self.url_rewriter.wburl.mod in ('id_', 'bn_', 'wkrf_'):
return False return False
return True return True

View File

@ -102,6 +102,7 @@ class DefaultRewriter(BaseContentRewriter):
super(DefaultRewriter, self).__init__(rules_file, replay_mod) super(DefaultRewriter, self).__init__(rules_file, replay_mod)
self.all_rewriters = copy.copy(self.DEFAULT_REWRITERS) self.all_rewriters = copy.copy(self.DEFAULT_REWRITERS)
self.add_prefer_mod('raw', 'ir_')
self.add_prefer_mod('raw', 'id_') self.add_prefer_mod('raw', 'id_')
self.add_prefer_mod('banner-only', 'bn_') self.add_prefer_mod('banner-only', 'bn_')
self.add_prefer_mod('rewritten', replay_mod) self.add_prefer_mod('rewritten', replay_mod)

View File

@ -138,7 +138,19 @@ class TestWbIntegration(BaseConfigTest):
def test_replay_redirect(self, fmod): def test_replay_redirect(self, fmod):
resp = self.get('/pywb/2014{0}/http://www.iana.org/domains/example', fmod) resp = self.get('/pywb/2014{0}/http://www.iana.org/domains/example', fmod)
assert resp.headers['Location'].startswith('/pywb/2014{0}/'.format(fmod)) assert resp.headers['Location'] == '/pywb/2014{0}/http://www.iana.org/domains/reserved'.format(fmod)
assert resp.status_code == 302
def test_replay_redirect_id(self):
resp = self.get('/pywb/2014id_/http://www.iana.org/domains/example', fmod)
print(resp.headers['Location'])
assert resp.headers['Location'] == '/domains/reserved'
assert resp.status_code == 302
def test_replay_redirect_ir(self):
resp = self.get('/pywb/2014ir_/http://www.iana.org/domains/example', fmod)
print(resp.headers['Location'])
assert resp.headers['Location'] == '/pywb/2014ir_/http://www.iana.org/domains/reserved'
assert resp.status_code == 302 assert resp.status_code == 302
def test_replay_fuzzy_1(self, fmod): def test_replay_fuzzy_1(self, fmod):
@ -224,6 +236,17 @@ class TestWbIntegration(BaseConfigTest):
# original unrewritten url present # original unrewritten url present
assert '"http://www.iana.org/domains/example"' in resp.text assert '"http://www.iana.org/domains/example"' in resp.text
def test_replay_identity_1_ir(self):
resp = self.testapp.get('/pywb/20140127171251ir_/http://example.com/')
# no wb header insertion
assert 'wombat.js' not in resp.text
assert resp.content_length == 1270, resp.content_length
# original unrewritten url present
assert '"http://www.iana.org/domains/example"' in resp.text
def test_replay_identity_2_arcgz(self): def test_replay_identity_2_arcgz(self):
resp = self.testapp.get('/pywb/20140216050221id_/http://arc.gz.test.example.com/') resp = self.testapp.get('/pywb/20140216050221id_/http://arc.gz.test.example.com/')