1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-25 23:47:47 +01:00
pywb/pywb/utils/timeutils.py
Ilya Kreymer 80dcb6ff27 rewrite: improvements to non-exact replay mode, redir_to_exact option set to false
frames: add request_ts to wbinfo and use that as the timestamp in the top-frame. for exact replay, request_ts == timestamp
for latest replay / no timestamp / memento timegate, redirect to current time instead of time of last capture, while serving
last capture.
timeutils: add timestamp_now() function to return timestamp of current datetime
Add extra tests for this mode
Tracked via #72
2015-02-17 17:51:45 -08:00

289 lines
7.0 KiB
Python

"""
utility functions for converting between
datetime, iso date and 14-digit timestamp
"""
import re
import time
import datetime
import calendar
from itertools import imap
from email.utils import parsedate, formatdate
#=================================================================
# str <-> datetime conversion
#=================================================================
DATE_TIMESPLIT = re.compile(r'[^\d]')
TIMESTAMP_14 = '%Y%m%d%H%M%S'
#PAD_STAMP_END = '29991231235959'
PAD_6 = '299912'
def iso_date_to_datetime(string):
"""
>>> iso_date_to_datetime('2013-12-26T10:11:12Z')
datetime.datetime(2013, 12, 26, 10, 11, 12)
>>> iso_date_to_datetime('2013-12-26T10:11:12Z')
datetime.datetime(2013, 12, 26, 10, 11, 12)
"""
nums = DATE_TIMESPLIT.split(string)
if nums[-1] == '':
nums = nums[:-1]
the_datetime = datetime.datetime(*imap(int, nums))
return the_datetime
def http_date_to_datetime(string):
"""
>>> http_date_to_datetime('Thu, 26 Dec 2013 09:50:10 GMT')
datetime.datetime(2013, 12, 26, 9, 50, 10)
"""
return datetime.datetime(*parsedate(string)[:6])
def datetime_to_http_date(the_datetime):
"""
>>> datetime_to_http_date(datetime.datetime(2013, 12, 26, 9, 50, 10))
'Thu, 26 Dec 2013 09:50:10 GMT'
# Verify inverses
>>> x = 'Thu, 26 Dec 2013 09:50:10 GMT'
>>> datetime_to_http_date(http_date_to_datetime(x)) == x
True
"""
timeval = calendar.timegm(the_datetime.utctimetuple())
return formatdate(timeval=timeval,
localtime=False,
usegmt=True)
def datetime_to_timestamp(the_datetime):
"""
>>> datetime_to_timestamp(datetime.datetime(2013, 12, 26, 10, 11, 12))
'20131226101112'
"""
return the_datetime.strftime(TIMESTAMP_14)
def timestamp_now():
"""
>>> len(timestamp_now())
14
"""
return datetime_to_timestamp(datetime.datetime.utcnow())
def iso_date_to_timestamp(string):
"""
>>> iso_date_to_timestamp('2013-12-26T10:11:12Z')
'20131226101112'
>>> iso_date_to_timestamp('2013-12-26T10:11:12')
'20131226101112'
"""
return datetime_to_timestamp(iso_date_to_datetime(string))
def http_date_to_timestamp(string):
"""
>>> http_date_to_timestamp('Thu, 26 Dec 2013 09:50:00 GMT')
'20131226095000'
>>> http_date_to_timestamp('Sun, 26 Jan 2014 20:08:04 GMT')
'20140126200804'
"""
return datetime_to_timestamp(http_date_to_datetime(string))
# pad to certain length (default 6)
def _pad_timestamp(string, pad_str=PAD_6):
"""
>>> _pad_timestamp('20')
'209912'
>>> _pad_timestamp('2014')
'201412'
>>> _pad_timestamp('20141011')
'20141011'
>>> _pad_timestamp('201410110010')
'201410110010'
"""
str_len = len(string)
pad_len = len(pad_str)
if str_len < pad_len:
string = string + pad_str[str_len:]
return string
def timestamp_to_datetime(string):
"""
# >14-digit -- rest ignored
>>> timestamp_to_datetime('2014122609501011')
datetime.datetime(2014, 12, 26, 9, 50, 10)
# 14-digit
>>> timestamp_to_datetime('20141226095010')
datetime.datetime(2014, 12, 26, 9, 50, 10)
# 13-digit padding
>>> timestamp_to_datetime('2014122609501')
datetime.datetime(2014, 12, 26, 9, 50, 59)
# 12-digit padding
>>> timestamp_to_datetime('201412260950')
datetime.datetime(2014, 12, 26, 9, 50, 59)
# 11-digit padding
>>> timestamp_to_datetime('20141226095')
datetime.datetime(2014, 12, 26, 9, 59, 59)
# 10-digit padding
>>> timestamp_to_datetime('2014122609')
datetime.datetime(2014, 12, 26, 9, 59, 59)
# 9-digit padding
>>> timestamp_to_datetime('201412260')
datetime.datetime(2014, 12, 26, 23, 59, 59)
# 8-digit padding
>>> timestamp_to_datetime('20141226')
datetime.datetime(2014, 12, 26, 23, 59, 59)
# 7-digit padding
>>> timestamp_to_datetime('2014122')
datetime.datetime(2014, 12, 31, 23, 59, 59)
# 6-digit padding
>>> timestamp_to_datetime('201410')
datetime.datetime(2014, 10, 31, 23, 59, 59)
# 5-digit padding
>>> timestamp_to_datetime('20141')
datetime.datetime(2014, 12, 31, 23, 59, 59)
# 4-digit padding
>>> timestamp_to_datetime('2014')
datetime.datetime(2014, 12, 31, 23, 59, 59)
# 3-digit padding
>>> timestamp_to_datetime('201')
datetime.datetime(2019, 12, 31, 23, 59, 59)
# 2-digit padding
>>> timestamp_to_datetime('20')
datetime.datetime(2099, 12, 31, 23, 59, 59)
# 1-digit padding
>>> timestamp_to_datetime('2')
datetime.datetime(2999, 12, 31, 23, 59, 59)
# 1-digit out-of-range padding
>>> timestamp_to_datetime('3')
datetime.datetime(2999, 12, 31, 23, 59, 59)
# 0-digit padding
>>> timestamp_to_datetime('')
datetime.datetime(2999, 12, 31, 23, 59, 59)
# bad month
>>> timestamp_to_datetime('20131709005601')
datetime.datetime(2013, 12, 9, 0, 56, 1)
# all out of range except minutes
>>> timestamp_to_datetime('40001965252477')
datetime.datetime(2999, 12, 31, 23, 24, 59)
# not a number!
>>> timestamp_to_datetime('2010abc')
datetime.datetime(2010, 12, 31, 23, 59, 59)
"""
# pad to 6 digits
string = _pad_timestamp(string, PAD_6)
def clamp(val, min_, max_):
try:
val = int(val)
val = max(min_, min(val, max_))
return val
except:
return max_
def extract(string, start, end, min_, max_):
if len(string) >= end:
return clamp(string[start:end], min_, max_)
else:
return max_
# now parse, clamp to boundary
year = extract(string, 0, 4, 1900, 2999)
month = extract(string, 4, 6, 1, 12)
day = extract(string, 6, 8, 1, calendar.monthrange(year, month)[1])
hour = extract(string, 8, 10, 0, 23)
minute = extract(string, 10, 12, 0, 59)
second = extract(string, 12, 14, 0, 59)
return datetime.datetime(year=year,
month=month,
day=day,
hour=hour,
minute=minute,
second=second)
#return time.strptime(pad_timestamp(string), TIMESTAMP_14)
def timestamp_to_sec(string):
"""
>>> timestamp_to_sec('20131226095010')
1388051410
# rounds to end of 2014
>>> timestamp_to_sec('2014')
1420070399
"""
return calendar.timegm(timestamp_to_datetime(string).utctimetuple())
def sec_to_timestamp(secs):
"""
>>> sec_to_timestamp(1388051410)
'20131226095010'
>>> sec_to_timestamp(1420070399)
'20141231235959'
"""
return datetime_to_timestamp(datetime.datetime.utcfromtimestamp(secs))
def timestamp_to_http_date(string):
"""
>>> timestamp_to_http_date('20131226095000')
'Thu, 26 Dec 2013 09:50:00 GMT'
>>> timestamp_to_http_date('20140126200804')
'Sun, 26 Jan 2014 20:08:04 GMT'
"""
return datetime_to_http_date(timestamp_to_datetime(string))
if __name__ == "__main__":
import doctest
doctest.testmod()