1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

timeutils: timestamp_to_datetime() uses custom timestamp parsing

instead of strptime to automatically clamp timestamp to allowed
range (instead of erroring) on invalid timestamps.
returns datetime.datetime as advertised instead of struct_time as well
This commit is contained in:
Ilya Kreymer 2014-02-24 16:30:11 -08:00
parent a474335501
commit 7968f360ce
2 changed files with 127 additions and 22 deletions

View File

@ -17,7 +17,8 @@ DATE_TIMESPLIT = re.compile(r'[^\d]')
TIMESTAMP_14 = '%Y%m%d%H%M%S'
PAD_STAMP_END = '29991231235959'
#PAD_STAMP_END = '29991231235959'
PAD_6 = '299912'
def iso_date_to_datetime(string):
@ -58,41 +59,145 @@ def iso_date_to_timestamp(string):
return datetime_to_timestamp(iso_date_to_datetime(string))
# default pad is end of range for compatibility
def pad_timestamp(string, pad_str=PAD_STAMP_END):
# pad to certain length (default 6)
def _pad_timestamp(string, pad_str=PAD_6):
"""
>>> pad_timestamp('20')
'20991231235959'
>>> _pad_timestamp('20')
'209912'
>>> pad_timestamp('2014')
'20141231235959'
>>> _pad_timestamp('2014')
'201412'
>>> pad_timestamp('20141011')
'20141011235959'
>>> _pad_timestamp('20141011')
'20141011'
>>> pad_timestamp('201410110010')
'20141011001059'
>>> _pad_timestamp('201410110010')
'201410110010'
"""
str_len = len(string)
pad_len = len(pad_str)
return string if str_len >= pad_len else string + pad_str[str_len:]
if str_len < pad_len:
string = string + pad_str[str_len:]
return string
def timestamp_to_datetime(string):
"""
>>> timestamp_to_datetime('20131226095010')
time.struct_time(tm_year=2013, tm_mon=12, tm_mday=26, \
tm_hour=9, tm_min=50, tm_sec=10, tm_wday=3, tm_yday=360, tm_isdst=-1)
# >14-digit -- rest ignored
>>> timestamp_to_datetime('2014122609501011')
datetime.datetime(2014, 12, 26, 9, 50, 10)
# 14-digit
>>> timestamp_to_datetime('20141226095010')
datetime.datetime(2014, 12, 26, 9, 50, 10)
# 13-digit padding
>>> timestamp_to_datetime('2014122609501')
datetime.datetime(2014, 12, 26, 9, 50, 59)
# 12-digit padding
>>> timestamp_to_datetime('201412260950')
datetime.datetime(2014, 12, 26, 9, 50, 59)
# 11-digit padding
>>> timestamp_to_datetime('20141226095')
datetime.datetime(2014, 12, 26, 9, 59, 59)
# 10-digit padding
>>> timestamp_to_datetime('2014122609')
datetime.datetime(2014, 12, 26, 9, 59, 59)
# 9-digit padding
>>> timestamp_to_datetime('201412260')
datetime.datetime(2014, 12, 26, 23, 59, 59)
# 8-digit padding
>>> timestamp_to_datetime('20141226')
datetime.datetime(2014, 12, 26, 23, 59, 59)
# 7-digit padding
>>> timestamp_to_datetime('2014122')
datetime.datetime(2014, 12, 31, 23, 59, 59)
# 6-digit padding
>>> timestamp_to_datetime('201410')
datetime.datetime(2014, 10, 31, 23, 59, 59)
# 5-digit padding
>>> timestamp_to_datetime('20141')
datetime.datetime(2014, 12, 31, 23, 59, 59)
# 4-digit padding
>>> timestamp_to_datetime('2014')
time.struct_time(tm_year=2014, tm_mon=12, tm_mday=31, \
tm_hour=23, tm_min=59, tm_sec=59, tm_wday=2, tm_yday=365, tm_isdst=-1)
datetime.datetime(2014, 12, 31, 23, 59, 59)
# 3-digit padding
>>> timestamp_to_datetime('201')
datetime.datetime(2019, 12, 31, 23, 59, 59)
# 2-digit padding
>>> timestamp_to_datetime('20')
datetime.datetime(2099, 12, 31, 23, 59, 59)
# 1-digit padding
>>> timestamp_to_datetime('2')
datetime.datetime(2999, 12, 31, 23, 59, 59)
# 1-digit out-of-range padding
>>> timestamp_to_datetime('3')
datetime.datetime(2999, 12, 31, 23, 59, 59)
# 0-digit padding
>>> timestamp_to_datetime('')
datetime.datetime(2999, 12, 31, 23, 59, 59)
# bad month
>>> timestamp_to_datetime('20131709005601')
datetime.datetime(2013, 12, 9, 0, 56, 1)
# all out of range except minutes
>>> timestamp_to_datetime('40001965252477')
datetime.datetime(2999, 12, 31, 23, 24, 59)
"""
# Default pad to end of range for comptability
return time.strptime(pad_timestamp(string), TIMESTAMP_14)
# pad to 6 digits
string = _pad_timestamp(string, PAD_6)
def clamp(val, min_, max_):
try:
val = int(val)
val = max(min_, min(val, max_))
return val
except:
return max_
def extract(string, start, end, min_, max_):
if len(string) >= end:
return clamp(string[start:end], min_, max_)
else:
return max_
# now parse, clamp to boundary
year = extract(string, 0, 4, 1900, 2999)
month = extract(string, 4, 6, 1, 12)
day = extract(string, 6, 8, 1, calendar.monthrange(year, month)[1])
hour = extract(string, 8, 10, 0, 23)
minute = extract(string, 10, 12, 0, 59)
second = extract(string, 12, 14, 0, 59)
return datetime.datetime(year=year,
month=month,
day=day,
hour=hour,
minute=minute,
second=second)
#return time.strptime(pad_timestamp(string), TIMESTAMP_14)
def timestamp_to_sec(string):
@ -104,7 +209,7 @@ def timestamp_to_sec(string):
1420070399
"""
return calendar.timegm(timestamp_to_datetime(string))
return calendar.timegm(timestamp_to_datetime(string).utctimetuple())
if __name__ == "__main__":

View File

@ -56,9 +56,9 @@ class J2TemplateView:
# Filters
@staticmethod
def format_ts(value, format='%a, %b %d %Y %H:%M:%S'):
def format_ts(value, format_='%a, %b %d %Y %H:%M:%S'):
value = timeutils.timestamp_to_datetime(value)
return time.strftime(format, value)
return value.strftime(format_)
@staticmethod
def get_host(url):