From 50bf7d2634c4b252b8ffd7f64cca6f60ed752447 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 12 Oct 2014 03:07:54 -0700 Subject: [PATCH] rewrite: move extract_client_cookie to utils for access at rewrite root cookie_rewriter: keep max-age add csrf token copying (experimental) update tests --- pywb/framework/proxy_resolvers.py | 8 +++--- pywb/framework/test/test_wbrequestresponse.py | 12 --------- pywb/framework/wbrequestresponse.py | 24 ------------------ pywb/rewrite/cookie_rewriter.py | 25 +++++++++---------- pywb/rewrite/rewrite_live.py | 7 ++++++ pywb/utils/loaders.py | 25 +++++++++++++++++++ pywb/utils/test/test_loaders.py | 13 +++++++++- pywb/webapp/query_handler.py | 1 - 8 files changed, 60 insertions(+), 55 deletions(-) diff --git a/pywb/framework/proxy_resolvers.py b/pywb/framework/proxy_resolvers.py index dd4d4ad1..cbe636ca 100644 --- a/pywb/framework/proxy_resolvers.py +++ b/pywb/framework/proxy_resolvers.py @@ -1,4 +1,5 @@ -from wbrequestresponse import WbResponse, WbRequest +from wbrequestresponse import WbResponse +from pywb.utils.loaders import extract_client_cookie from pywb.utils.statusandheaders import StatusAndHeaders from pywb.rewrite.wburl import WbUrl @@ -193,8 +194,7 @@ class CookieResolver(BaseCollResolver): return self.make_redir_response(wb_url.url) elif server_name.endswith(self.set_prefix): - old_sesh_id = WbRequest.extract_client_cookie(env, - self.cookie_name) + old_sesh_id = extract_client_cookie(env, self.cookie_name) sesh_id = self.create_renew_sesh_id(old_sesh_id) if sesh_id != old_sesh_id: @@ -283,7 +283,7 @@ class CookieResolver(BaseCollResolver): del self.cache[sesh_id + ':t'] def get_coll(self, env): - sesh_id = WbRequest.extract_client_cookie(env, self.cookie_name) + sesh_id = extract_client_cookie(env, self.cookie_name) coll = None ts = None diff --git a/pywb/framework/test/test_wbrequestresponse.py b/pywb/framework/test/test_wbrequestresponse.py index a1e56158..5bbb65b8 100644 --- a/pywb/framework/test/test_wbrequestresponse.py +++ b/pywb/framework/test/test_wbrequestresponse.py @@ -37,18 +37,6 @@ >>> req_from_uri('/web/2010/example.com', {'wsgi.url_scheme': 'http', 'HTTP_HOST': 'localhost:8080'}).extract_referrer_wburl_str() -# cookie extract tests ->>> WbRequest.extract_client_cookie(dict(HTTP_COOKIE='a=b; c=d'), 'a') -'b' - ->>> WbRequest.extract_client_cookie(dict(HTTP_COOKIE='a=b; c=d'), 'c') -'d' - ->>> WbRequest.extract_client_cookie(dict(HTTP_COOKIE='a=b; c=d'), 'x') - ->>> WbRequest.extract_client_cookie({}, 'y') - - # WbResponse Tests # ================= >>> WbResponse.text_response('Test') diff --git a/pywb/framework/wbrequestresponse.py b/pywb/framework/wbrequestresponse.py index ddb5b8d1..7f2c4337 100644 --- a/pywb/framework/wbrequestresponse.py +++ b/pywb/framework/wbrequestresponse.py @@ -136,30 +136,6 @@ class WbRequest(object): if post_query: self.wb_url.url = append_post_query(self.wb_url.url, post_query) - @staticmethod - def extract_client_cookie(env, cookie_name): - cookie_header = env.get('HTTP_COOKIE') - if not cookie_header: - return None - - # attempt to extract cookie_name only - inx = cookie_header.find(cookie_name) - if inx < 0: - return None - - end_inx = cookie_header.find(';', inx) - if end_inx > 0: - value = cookie_header[inx:end_inx] - else: - value = cookie_header[inx:] - - value = value.split('=') - if len(value) < 2: - return None - - value = value[1].strip() - return value - #================================================================= class WbResponse(object): diff --git a/pywb/rewrite/cookie_rewriter.py b/pywb/rewrite/cookie_rewriter.py index 0eb507b6..4724df4c 100644 --- a/pywb/rewrite/cookie_rewriter.py +++ b/pywb/rewrite/cookie_rewriter.py @@ -23,6 +23,15 @@ class WbUrlBaseCookieRewriter(object): return results + def _remove_age_opts(self, morsel): + # remove expires as it refers to archived time + if morsel.get('expires'): + del morsel['expires'] + + # don't use max-age, just expire at end of session + if morsel.get('max-age'): + del morsel['max-age'] + #================================================================= class MinimalScopeCookieRewriter(WbUrlBaseCookieRewriter): @@ -42,14 +51,7 @@ class MinimalScopeCookieRewriter(WbUrlBaseCookieRewriter): elif morsel.get('path'): morsel['path'] = self.url_rewriter.rewrite(morsel['path']) - # remove expires as it refers to archived time - if morsel.get('expires'): - del morsel['expires'] - - # don't use max-age, just expire at end of session - if morsel.get('max-age'): - del morsel['max-age'] - + self._remove_age_opts(morsel) return morsel @@ -64,15 +66,12 @@ class RootScopeCookieRewriter(WbUrlBaseCookieRewriter): def rewrite_cookie(self, name, morsel): # get root path morsel['path'] = self.url_rewriter.root_path - + # remove domain if morsel.get('domain'): del morsel['domain'] - # remove expires as it refers to archived time - if morsel.get('expires'): - del morsel['expires'] - + self._remove_age_opts(morsel) return morsel diff --git a/pywb/rewrite/rewrite_live.py b/pywb/rewrite/rewrite_live.py index be891498..7e72fea6 100644 --- a/pywb/rewrite/rewrite_live.py +++ b/pywb/rewrite/rewrite_live.py @@ -10,6 +10,7 @@ import logging from urlparse import urlsplit from pywb.utils.loaders import is_http, LimitReader, BlockLoader +from pywb.utils.loaders import extract_client_cookie from pywb.utils.timeutils import datetime_to_timestamp from pywb.utils.statusandheaders import StatusAndHeaders from pywb.utils.canonicalize import canonicalize @@ -56,6 +57,12 @@ class LiveRewriter(object): name = 'Origin' value = (splits.scheme + '://' + splits.netloc) + elif name == 'HTTP_X_CSRFTOKEN': + name = 'X-CSRFToken' + cookie_val = extract_client_cookie(env, 'csrftoken') + if cookie_val: + value = cookie_val + elif name.startswith('HTTP_'): name = name[5:].title().replace('_', '-') diff --git a/pywb/utils/loaders.py b/pywb/utils/loaders.py index 107379a2..0e9e3fda 100644 --- a/pywb/utils/loaders.py +++ b/pywb/utils/loaders.py @@ -77,6 +77,31 @@ def append_post_query(url, post_query): return url +#================================================================= +def extract_client_cookie(env, cookie_name): + cookie_header = env.get('HTTP_COOKIE') + if not cookie_header: + return None + + # attempt to extract cookie_name only + inx = cookie_header.find(cookie_name) + if inx < 0: + return None + + end_inx = cookie_header.find(';', inx) + if end_inx > 0: + value = cookie_header[inx:end_inx] + else: + value = cookie_header[inx:] + + value = value.split('=') + if len(value) < 2: + return None + + value = value[1].strip() + return value + + #================================================================= class BlockLoader(object): """ diff --git a/pywb/utils/test/test_loaders.py b/pywb/utils/test/test_loaders.py index 322b9169..12ad7c44 100644 --- a/pywb/utils/test/test_loaders.py +++ b/pywb/utils/test/test_loaders.py @@ -43,6 +43,17 @@ True # test with extra id, ensure 4 parts of the A-B=C-D form are present >>> len(re.split('[-=]', HMACCookieMaker('test', 'test', 5).make('extra'))) 4 + +# cookie extract tests +>>> extract_client_cookie(dict(HTTP_COOKIE='a=b; c=d'), 'a') +'b' + +>>> extract_client_cookie(dict(HTTP_COOKIE='a=b; c=d'), 'c') +'d' + +>>> extract_client_cookie(dict(HTTP_COOKIE='a=b; c=d'), 'x') + +>>> extract_client_cookie({}, 'y') """ @@ -50,7 +61,7 @@ True import re from io import BytesIO from pywb.utils.loaders import BlockLoader, HMACCookieMaker -from pywb.utils.loaders import LimitReader +from pywb.utils.loaders import LimitReader, extract_client_cookie from pywb import get_test_dir diff --git a/pywb/webapp/query_handler.py b/pywb/webapp/query_handler.py index ee03b8b9..2ac80fa7 100644 --- a/pywb/webapp/query_handler.py +++ b/pywb/webapp/query_handler.py @@ -76,7 +76,6 @@ class QueryHandler(object): return self.make_cdx_response(wbrequest, cdx_iter, params['output']) def load_cdx(self, wbrequest, params): - print(params) if wbrequest: # add any custom filter from the request if wbrequest.query_filter: