From 5f3d37bb44077371c1d3c732f4c605c9c435f5be Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 21 May 2018 11:57:43 -0700 Subject: [PATCH] origin header improvement: if Referer header is available, compute Origin from the Referer, not from target url (#329) (Origin header received will be the pywb host, using Referer will result in more accurate Origin, which may not be the target url) tests: add tests to verify Origin header with and without Referer --- pywb/rewrite/rewriteinputreq.py | 10 ++++++++-- tests/test_live_rewriter.py | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/pywb/rewrite/rewriteinputreq.py b/pywb/rewrite/rewriteinputreq.py index 21efb1f5..21e7f94e 100644 --- a/pywb/rewrite/rewriteinputreq.py +++ b/pywb/rewrite/rewriteinputreq.py @@ -49,8 +49,14 @@ class RewriteInputRequest(DirectWSGIInputRequest): elif name == 'HTTP_ORIGIN': name = 'Origin' - if self.splits: - value = (self.splits.scheme + '://' + self.splits.netloc) + referrer = self.env.get('HTTP_REFERER') + if referrer: + splits = urlsplit(referrer) + else: + splits = self.splits + + if splits: + value = (splits.scheme + '://' + splits.netloc) elif name == 'HTTP_X_CSRFTOKEN': name = 'X-CSRFToken' diff --git a/tests/test_live_rewriter.py b/tests/test_live_rewriter.py index cd6afb71..08c6ef71 100644 --- a/tests/test_live_rewriter.py +++ b/tests/test_live_rewriter.py @@ -64,3 +64,22 @@ class TestLiveRewriter(HttpBinLiveTests, BaseConfigTest): def test_deflate(self, fmod_sl): resp = self.get('/live/{0}http://httpbin.org/deflate', fmod_sl) assert b'"deflated": true' in resp.body + + def test_live_origin_and_referrer(self, fmod_sl): + headers = {'Referer': 'http://localhost:80/live/{0}http://example.com/test'.format(fmod_sl), + 'Origin': 'http://localhost:80' + } + + resp = self.get('/live/{0}http://httpbin.org/get?test=headers', fmod_sl, headers=headers) + + assert resp.json['headers']['Referer'] == 'http://example.com/test' + assert resp.json['headers']['Origin'] == 'http://example.com' + + def test_live_origin_no_referrer(self, fmod_sl): + headers = {'Origin': 'http://localhost:80'} + + resp = self.get('/live/{0}http://httpbin.org/get?test=headers', fmod_sl, headers=headers) + + assert resp.json['headers']['Origin'] == 'http://httpbin.org' + +