mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
rewrite: add support for Cookie request header rewrite to support sites
which require a cookie to be set. req_cookie_rewrite directive can be set in rules.yaml per url prefix with a list of match/replace regexs
This commit is contained in:
parent
df94e17305
commit
d9c5345d3c
@ -3,6 +3,8 @@ pywb 0.7.2 changelist
|
||||
|
||||
* Experiment with disabling DASH for YT
|
||||
|
||||
* New ``req_cookie_rewrite`` rewrite directive to rewrite outgoing ``Cookie`` header, can be used to fix a certain cookie for a url prefix.
|
||||
|
||||
|
||||
pywb 0.7.1 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
@ -25,7 +27,7 @@ pywb 0.7.1 changelist
|
||||
- setAttribute override
|
||||
- Date override sets date to replay timestamp
|
||||
- Image() object override
|
||||
- ability to disable dynamic attribute rewriting by setting `_no_rewrite` on an element.
|
||||
- ability to disable dynamic attribute rewriting by setting ``_no_rewrite`` on an element.
|
||||
|
||||
* Type detection: resolve conflict between text/html that is served under js_ mod, resolve if html or js.
|
||||
|
||||
|
@ -50,10 +50,11 @@ class LiveRewriter(object):
|
||||
|
||||
return (status_headers, stream)
|
||||
|
||||
def translate_headers(self, url, env):
|
||||
def translate_headers(self, url, urlkey, env):
|
||||
headers = {}
|
||||
|
||||
splits = urlsplit(url)
|
||||
has_cookies = False
|
||||
|
||||
for name, value in env.iteritems():
|
||||
if name == 'HTTP_HOST':
|
||||
@ -73,6 +74,11 @@ class LiveRewriter(object):
|
||||
elif name == 'HTTP_REFERER':
|
||||
continue
|
||||
|
||||
elif name == 'HTTP_COOKIE':
|
||||
name = 'Cookie'
|
||||
value = self._req_cookie_rewrite(urlkey, value)
|
||||
has_cookies = True
|
||||
|
||||
elif name.startswith('HTTP_'):
|
||||
name = name[5:].title().replace('_', '-')
|
||||
|
||||
@ -87,9 +93,28 @@ class LiveRewriter(object):
|
||||
if value:
|
||||
headers[name] = value
|
||||
|
||||
if not has_cookies:
|
||||
value = self._req_cookie_rewrite(urlkey, '')
|
||||
if value:
|
||||
headers['Cookie'] = value
|
||||
|
||||
return headers
|
||||
|
||||
def _req_cookie_rewrite(self, urlkey, value):
|
||||
rule = self.rewriter.ruleset.get_first_match(urlkey)
|
||||
if not rule or not rule.req_cookie_rewrite:
|
||||
return value
|
||||
|
||||
for cr in rule.req_cookie_rewrite:
|
||||
try:
|
||||
value = cr['rx'].sub(cr['replace'], value)
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
return value
|
||||
|
||||
def fetch_http(self, url,
|
||||
urlkey=None,
|
||||
env=None,
|
||||
req_headers=None,
|
||||
follow_redirects=False,
|
||||
@ -109,7 +134,7 @@ class LiveRewriter(object):
|
||||
method = env['REQUEST_METHOD'].upper()
|
||||
input_ = env['wsgi.input']
|
||||
|
||||
req_headers.update(self.translate_headers(url, env))
|
||||
req_headers.update(self.translate_headers(url, urlkey, env))
|
||||
|
||||
if method in ('POST', 'PUT'):
|
||||
len_ = env.get('CONTENT_LENGTH')
|
||||
@ -155,17 +180,18 @@ class LiveRewriter(object):
|
||||
if url.startswith('//'):
|
||||
url = 'http:' + url
|
||||
|
||||
# explicit urlkey may be passed in (say for testing)
|
||||
if not urlkey:
|
||||
urlkey = canonicalize(url)
|
||||
|
||||
if is_http(url):
|
||||
(status_headers, stream) = self.fetch_http(url, env, req_headers,
|
||||
(status_headers, stream) = self.fetch_http(url, urlkey, env,
|
||||
req_headers,
|
||||
follow_redirects,
|
||||
ignore_proxies)
|
||||
else:
|
||||
(status_headers, stream) = self.fetch_local_file(url)
|
||||
|
||||
# explicit urlkey may be passed in (say for testing)
|
||||
if not urlkey:
|
||||
urlkey = canonicalize(url)
|
||||
|
||||
if timestamp is None:
|
||||
timestamp = datetime_to_timestamp(datetime.datetime.utcnow())
|
||||
|
||||
|
@ -8,6 +8,7 @@ from header_rewriter import HeaderRewriter
|
||||
from html_rewriter import HTMLRewriter
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
|
||||
#=================================================================
|
||||
@ -47,6 +48,12 @@ class RewriteRules(BaseRule):
|
||||
# cookie rewrite scope
|
||||
self.cookie_scope = config.get('cookie_scope', 'default')
|
||||
|
||||
req_cookie_rewrite = config.get('req_cookie_rewrite', [])
|
||||
for rc in req_cookie_rewrite:
|
||||
rc['rx'] = re.compile(rc.get('match', ''))
|
||||
|
||||
self.req_cookie_rewrite = req_cookie_rewrite
|
||||
|
||||
def _add_custom_regexs(self, field, config):
|
||||
regexs = config.get(field + '_regexs')
|
||||
if not regexs:
|
||||
|
@ -22,10 +22,43 @@ def test_csrf_token_headers():
|
||||
rewriter = LiveRewriter()
|
||||
env = {'HTTP_X_CSRFTOKEN': 'wrong', 'HTTP_COOKIE': 'csrftoken=foobar'}
|
||||
|
||||
req_headers = rewriter.translate_headers('http://example.com/', env)
|
||||
req_headers = rewriter.translate_headers('http://example.com/', 'com,example)/', env)
|
||||
|
||||
assert req_headers == {'X-CSRFToken': 'foobar', 'Cookie': 'csrftoken=foobar'}
|
||||
|
||||
def test_req_cookie_rewrite_1():
|
||||
rewriter = LiveRewriter()
|
||||
env = {'HTTP_COOKIE': 'A=B'}
|
||||
|
||||
urlkey = 'example,example,test)/'
|
||||
url = 'test.example.example/'
|
||||
|
||||
req_headers = rewriter.translate_headers(url, urlkey, env)
|
||||
|
||||
assert req_headers == {'Cookie': 'A=B; FOO=&bar=1'}
|
||||
|
||||
def test_req_cookie_rewrite_2():
|
||||
rewriter = LiveRewriter()
|
||||
env = {'HTTP_COOKIE': 'FOO=goo'}
|
||||
|
||||
urlkey = 'example,example,test)/'
|
||||
url = 'test.example.example/'
|
||||
|
||||
req_headers = rewriter.translate_headers(url, urlkey, env)
|
||||
|
||||
assert req_headers == {'Cookie': 'FOO=&bar=1'}
|
||||
|
||||
def test_req_cookie_rewrite_3():
|
||||
rewriter = LiveRewriter()
|
||||
env = {}
|
||||
|
||||
urlkey = 'example,example,test)/'
|
||||
url = 'test.example.example/'
|
||||
|
||||
req_headers = rewriter.translate_headers(url, urlkey, env)
|
||||
|
||||
assert req_headers == {'Cookie': '; FOO=&bar=1'}
|
||||
|
||||
def test_local_1():
|
||||
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
|
||||
urlrewriter,
|
||||
|
@ -162,7 +162,7 @@ rules:
|
||||
args:
|
||||
- id
|
||||
- itag
|
||||
- mime
|
||||
#- mime
|
||||
|
||||
filter:
|
||||
- '~urlkey:{0}'
|
||||
@ -187,15 +187,24 @@ rules:
|
||||
js_rewrite_location: location
|
||||
|
||||
|
||||
# watch config changes
|
||||
- url_prefix: 'com,youtube)/watch'
|
||||
# watch and embed config changes
|
||||
- url_prefix: 'com,youtube)/'
|
||||
|
||||
rewrite:
|
||||
|
||||
js_regexs:
|
||||
- match: 'ytplayer.load\(\);'
|
||||
replace: 'ytplayer.config.args.dash = "0"; ytplayer.config.args.dashmpd = ""; {0}'
|
||||
|
||||
- match: 'yt\.setConfig.*PLAYER_CONFIG.*args": {'
|
||||
replace: '{0} "dash": "0", dashmpd: "", '
|
||||
|
||||
req_cookie_rewrite:
|
||||
- match: '^(((?!PREF).)*)$'
|
||||
replace: '\1; PREF=f2=40000000'
|
||||
|
||||
- match: '(.*PREF=)([^ ;]*)(.*)'
|
||||
replace: '\1&f2=40000000\3'
|
||||
|
||||
# testing rules -- not for valid domain
|
||||
#=================================================================
|
||||
# this rule block is a non-existent prefix merely for testing
|
||||
@ -217,6 +226,15 @@ rules:
|
||||
rewrite:
|
||||
js_rewrite_location: urls
|
||||
|
||||
req_cookie_rewrite:
|
||||
- match: '^(((?!FOO).)*)$'
|
||||
replace: '\1; FOO=bar=1'
|
||||
|
||||
- match: '(.*FOO=)([^ ;]*)(.*)'
|
||||
replace: '\1&bar=1\3'
|
||||
|
||||
- match: ''
|
||||
invalid_: ''
|
||||
|
||||
# all domain rules -- fallback to this dataset
|
||||
#=================================================================
|
||||
|
Loading…
x
Reference in New Issue
Block a user