1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

rewrite: move extract_client_cookie to utils for access at rewrite

root cookie_rewriter: keep max-age
add csrf token copying (experimental)
update tests
This commit is contained in:
Ilya Kreymer 2014-10-12 03:07:54 -07:00
parent 498a864441
commit 50bf7d2634
8 changed files with 60 additions and 55 deletions

View File

@ -1,4 +1,5 @@
from wbrequestresponse import WbResponse, WbRequest from wbrequestresponse import WbResponse
from pywb.utils.loaders import extract_client_cookie
from pywb.utils.statusandheaders import StatusAndHeaders from pywb.utils.statusandheaders import StatusAndHeaders
from pywb.rewrite.wburl import WbUrl from pywb.rewrite.wburl import WbUrl
@ -193,8 +194,7 @@ class CookieResolver(BaseCollResolver):
return self.make_redir_response(wb_url.url) return self.make_redir_response(wb_url.url)
elif server_name.endswith(self.set_prefix): elif server_name.endswith(self.set_prefix):
old_sesh_id = WbRequest.extract_client_cookie(env, old_sesh_id = extract_client_cookie(env, self.cookie_name)
self.cookie_name)
sesh_id = self.create_renew_sesh_id(old_sesh_id) sesh_id = self.create_renew_sesh_id(old_sesh_id)
if sesh_id != old_sesh_id: if sesh_id != old_sesh_id:
@ -283,7 +283,7 @@ class CookieResolver(BaseCollResolver):
del self.cache[sesh_id + ':t'] del self.cache[sesh_id + ':t']
def get_coll(self, env): def get_coll(self, env):
sesh_id = WbRequest.extract_client_cookie(env, self.cookie_name) sesh_id = extract_client_cookie(env, self.cookie_name)
coll = None coll = None
ts = None ts = None

View File

@ -37,18 +37,6 @@
>>> req_from_uri('/web/2010/example.com', {'wsgi.url_scheme': 'http', 'HTTP_HOST': 'localhost:8080'}).extract_referrer_wburl_str() >>> req_from_uri('/web/2010/example.com', {'wsgi.url_scheme': 'http', 'HTTP_HOST': 'localhost:8080'}).extract_referrer_wburl_str()
# cookie extract tests
>>> WbRequest.extract_client_cookie(dict(HTTP_COOKIE='a=b; c=d'), 'a')
'b'
>>> WbRequest.extract_client_cookie(dict(HTTP_COOKIE='a=b; c=d'), 'c')
'd'
>>> WbRequest.extract_client_cookie(dict(HTTP_COOKIE='a=b; c=d'), 'x')
>>> WbRequest.extract_client_cookie({}, 'y')
# WbResponse Tests # WbResponse Tests
# ================= # =================
>>> WbResponse.text_response('Test') >>> WbResponse.text_response('Test')

View File

@ -136,30 +136,6 @@ class WbRequest(object):
if post_query: if post_query:
self.wb_url.url = append_post_query(self.wb_url.url, post_query) self.wb_url.url = append_post_query(self.wb_url.url, post_query)
@staticmethod
def extract_client_cookie(env, cookie_name):
cookie_header = env.get('HTTP_COOKIE')
if not cookie_header:
return None
# attempt to extract cookie_name only
inx = cookie_header.find(cookie_name)
if inx < 0:
return None
end_inx = cookie_header.find(';', inx)
if end_inx > 0:
value = cookie_header[inx:end_inx]
else:
value = cookie_header[inx:]
value = value.split('=')
if len(value) < 2:
return None
value = value[1].strip()
return value
#================================================================= #=================================================================
class WbResponse(object): class WbResponse(object):

View File

@ -23,6 +23,15 @@ class WbUrlBaseCookieRewriter(object):
return results return results
def _remove_age_opts(self, morsel):
# remove expires as it refers to archived time
if morsel.get('expires'):
del morsel['expires']
# don't use max-age, just expire at end of session
if morsel.get('max-age'):
del morsel['max-age']
#================================================================= #=================================================================
class MinimalScopeCookieRewriter(WbUrlBaseCookieRewriter): class MinimalScopeCookieRewriter(WbUrlBaseCookieRewriter):
@ -42,14 +51,7 @@ class MinimalScopeCookieRewriter(WbUrlBaseCookieRewriter):
elif morsel.get('path'): elif morsel.get('path'):
morsel['path'] = self.url_rewriter.rewrite(morsel['path']) morsel['path'] = self.url_rewriter.rewrite(morsel['path'])
# remove expires as it refers to archived time self._remove_age_opts(morsel)
if morsel.get('expires'):
del morsel['expires']
# don't use max-age, just expire at end of session
if morsel.get('max-age'):
del morsel['max-age']
return morsel return morsel
@ -64,15 +66,12 @@ class RootScopeCookieRewriter(WbUrlBaseCookieRewriter):
def rewrite_cookie(self, name, morsel): def rewrite_cookie(self, name, morsel):
# get root path # get root path
morsel['path'] = self.url_rewriter.root_path morsel['path'] = self.url_rewriter.root_path
# remove domain # remove domain
if morsel.get('domain'): if morsel.get('domain'):
del morsel['domain'] del morsel['domain']
# remove expires as it refers to archived time self._remove_age_opts(morsel)
if morsel.get('expires'):
del morsel['expires']
return morsel return morsel

View File

@ -10,6 +10,7 @@ import logging
from urlparse import urlsplit from urlparse import urlsplit
from pywb.utils.loaders import is_http, LimitReader, BlockLoader from pywb.utils.loaders import is_http, LimitReader, BlockLoader
from pywb.utils.loaders import extract_client_cookie
from pywb.utils.timeutils import datetime_to_timestamp from pywb.utils.timeutils import datetime_to_timestamp
from pywb.utils.statusandheaders import StatusAndHeaders from pywb.utils.statusandheaders import StatusAndHeaders
from pywb.utils.canonicalize import canonicalize from pywb.utils.canonicalize import canonicalize
@ -56,6 +57,12 @@ class LiveRewriter(object):
name = 'Origin' name = 'Origin'
value = (splits.scheme + '://' + splits.netloc) value = (splits.scheme + '://' + splits.netloc)
elif name == 'HTTP_X_CSRFTOKEN':
name = 'X-CSRFToken'
cookie_val = extract_client_cookie(env, 'csrftoken')
if cookie_val:
value = cookie_val
elif name.startswith('HTTP_'): elif name.startswith('HTTP_'):
name = name[5:].title().replace('_', '-') name = name[5:].title().replace('_', '-')

View File

@ -77,6 +77,31 @@ def append_post_query(url, post_query):
return url return url
#=================================================================
def extract_client_cookie(env, cookie_name):
cookie_header = env.get('HTTP_COOKIE')
if not cookie_header:
return None
# attempt to extract cookie_name only
inx = cookie_header.find(cookie_name)
if inx < 0:
return None
end_inx = cookie_header.find(';', inx)
if end_inx > 0:
value = cookie_header[inx:end_inx]
else:
value = cookie_header[inx:]
value = value.split('=')
if len(value) < 2:
return None
value = value[1].strip()
return value
#================================================================= #=================================================================
class BlockLoader(object): class BlockLoader(object):
""" """

View File

@ -43,6 +43,17 @@ True
# test with extra id, ensure 4 parts of the A-B=C-D form are present # test with extra id, ensure 4 parts of the A-B=C-D form are present
>>> len(re.split('[-=]', HMACCookieMaker('test', 'test', 5).make('extra'))) >>> len(re.split('[-=]', HMACCookieMaker('test', 'test', 5).make('extra')))
4 4
# cookie extract tests
>>> extract_client_cookie(dict(HTTP_COOKIE='a=b; c=d'), 'a')
'b'
>>> extract_client_cookie(dict(HTTP_COOKIE='a=b; c=d'), 'c')
'd'
>>> extract_client_cookie(dict(HTTP_COOKIE='a=b; c=d'), 'x')
>>> extract_client_cookie({}, 'y')
""" """
@ -50,7 +61,7 @@ True
import re import re
from io import BytesIO from io import BytesIO
from pywb.utils.loaders import BlockLoader, HMACCookieMaker from pywb.utils.loaders import BlockLoader, HMACCookieMaker
from pywb.utils.loaders import LimitReader from pywb.utils.loaders import LimitReader, extract_client_cookie
from pywb import get_test_dir from pywb import get_test_dir

View File

@ -76,7 +76,6 @@ class QueryHandler(object):
return self.make_cdx_response(wbrequest, cdx_iter, params['output']) return self.make_cdx_response(wbrequest, cdx_iter, params['output'])
def load_cdx(self, wbrequest, params): def load_cdx(self, wbrequest, params):
print(params)
if wbrequest: if wbrequest:
# add any custom filter from the request # add any custom filter from the request
if wbrequest.query_filter: if wbrequest.query_filter: