1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-23 06:32:24 +01:00
pywb/urlrewrite/cookies.py

140 lines
3.9 KiB
Python

from pywb.rewrite.cookie_rewriter import WbUrlBaseCookieRewriter
from pywb.utils.timeutils import datetime_to_http_date
from six.moves.http_cookiejar import CookieJar, DefaultCookiePolicy
import redis
import tldextract
import time
import datetime
import six
# =============================================================================
class CookieTracker(object):
def __init__(self, redis):
self.redis = redis
def get_rewriter(self, url_rewriter, cookie_key):
return DomainCacheCookieRewriter(url_rewriter,
self.redis,
cookie_key)
def get_cookie_headers(self, url, cookie_key):
subds = self.get_subdomains(url)
if not subds:
return None, None
with redis.utils.pipeline(self.redis) as pi:
for x in subds:
pi.hgetall(cookie_key + '.' + x)
all_res = pi.execute()
cookies = []
set_cookies = []
for res in all_res:
if not res:
continue
for n, v in six.iteritems(res):
n = n.decode('utf-8')
v = v.decode('utf-8')
full = n + '=' + v
cookies.append(full.split(';')[0])
set_cookies.append(('Set-Cookie', full + '; Max-Age=120'))
cookies = ';'.join(cookies)
return cookies, set_cookies
@staticmethod
def get_subdomains(url):
tld = tldextract.extract(url)
if not tld.subdomain:
return None
main = tld.domain + '.' + tld.suffix
full = tld.subdomain + '.' + main
def get_all_subdomains(main, full):
doms = []
while main != full:
full = full.split('.', 1)[1]
doms.append(full)
return doms
all_subs = get_all_subdomains(main, full)
return all_subs
# =============================================================================
class DomainCacheCookieRewriter(WbUrlBaseCookieRewriter):
def __init__(self, url_rewriter, redis, cookie_key):
super(DomainCacheCookieRewriter, self).__init__(url_rewriter)
self.redis = redis
self.cookie_key = cookie_key
def rewrite_cookie(self, name, morsel):
# if domain set, no choice but to expand cookie path to root
domain = morsel.pop('domain', '')
if domain:
#if morsel.get('max-age'):
# morsel['max-age'] = int(morsel['max-age'])
#self.cookiejar.set_cookie(self.morsel_to_cookie(morsel))
#print(morsel, self.cookie_key + domain)
string = morsel.value
if morsel.get('path'):
string += '; Path=' + morsel.get('path')
if morsel.get('httponly'):
string += '; HttpOnly'
if morsel.get('secure'):
string += '; Secure'
with redis.utils.pipeline(self.redis) as pi:
pi.hset(self.cookie_key + domain, morsel.key, string)
pi.expire(self.cookie_key + domain, 120)
# else set cookie to rewritten path
if morsel.get('path'):
morsel['path'] = self.url_rewriter.rewrite(morsel['path'])
return morsel
def get_expire_sec(self, morsel):
expires = None
if morsel.get('max-age'):
return int(morsel['max-age'])
expires = morsel.get('expires')
if not expires:
return None
expires = expires.replace(' UTC', ' GMT')
try:
expires = time.strptime(expires, '%a, %d-%b-%Y %H:%M:%S GMT')
except:
pass
try:
expires = time.strptime(expires, '%a, %d %b %Y %H:%M:%S GMT')
except:
pass
expires = time.mktime(expires)
expires = expires - time.timezone - time.time()
return expires
# ============================================================================