mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
timeutils: timestamp_to_datetime() uses custom timestamp parsing
instead of strptime to automatically clamp timestamp to allowed range (instead of erroring) on invalid timestamps. returns datetime.datetime as advertised instead of struct_time as well
This commit is contained in:
parent
a474335501
commit
7968f360ce
@ -17,7 +17,8 @@ DATE_TIMESPLIT = re.compile(r'[^\d]')
|
||||
|
||||
TIMESTAMP_14 = '%Y%m%d%H%M%S'
|
||||
|
||||
PAD_STAMP_END = '29991231235959'
|
||||
#PAD_STAMP_END = '29991231235959'
|
||||
PAD_6 = '299912'
|
||||
|
||||
|
||||
def iso_date_to_datetime(string):
|
||||
@ -58,41 +59,145 @@ def iso_date_to_timestamp(string):
|
||||
return datetime_to_timestamp(iso_date_to_datetime(string))
|
||||
|
||||
|
||||
# default pad is end of range for compatibility
|
||||
def pad_timestamp(string, pad_str=PAD_STAMP_END):
|
||||
# pad to certain length (default 6)
|
||||
def _pad_timestamp(string, pad_str=PAD_6):
|
||||
"""
|
||||
>>> pad_timestamp('20')
|
||||
'20991231235959'
|
||||
>>> _pad_timestamp('20')
|
||||
'209912'
|
||||
|
||||
>>> pad_timestamp('2014')
|
||||
'20141231235959'
|
||||
>>> _pad_timestamp('2014')
|
||||
'201412'
|
||||
|
||||
>>> pad_timestamp('20141011')
|
||||
'20141011235959'
|
||||
>>> _pad_timestamp('20141011')
|
||||
'20141011'
|
||||
|
||||
>>> pad_timestamp('201410110010')
|
||||
'20141011001059'
|
||||
>>> _pad_timestamp('201410110010')
|
||||
'201410110010'
|
||||
"""
|
||||
|
||||
str_len = len(string)
|
||||
pad_len = len(pad_str)
|
||||
|
||||
return string if str_len >= pad_len else string + pad_str[str_len:]
|
||||
if str_len < pad_len:
|
||||
string = string + pad_str[str_len:]
|
||||
|
||||
return string
|
||||
|
||||
|
||||
def timestamp_to_datetime(string):
|
||||
"""
|
||||
>>> timestamp_to_datetime('20131226095010')
|
||||
time.struct_time(tm_year=2013, tm_mon=12, tm_mday=26, \
|
||||
tm_hour=9, tm_min=50, tm_sec=10, tm_wday=3, tm_yday=360, tm_isdst=-1)
|
||||
# >14-digit -- rest ignored
|
||||
>>> timestamp_to_datetime('2014122609501011')
|
||||
datetime.datetime(2014, 12, 26, 9, 50, 10)
|
||||
|
||||
# 14-digit
|
||||
>>> timestamp_to_datetime('20141226095010')
|
||||
datetime.datetime(2014, 12, 26, 9, 50, 10)
|
||||
|
||||
# 13-digit padding
|
||||
>>> timestamp_to_datetime('2014122609501')
|
||||
datetime.datetime(2014, 12, 26, 9, 50, 59)
|
||||
|
||||
# 12-digit padding
|
||||
>>> timestamp_to_datetime('201412260950')
|
||||
datetime.datetime(2014, 12, 26, 9, 50, 59)
|
||||
|
||||
# 11-digit padding
|
||||
>>> timestamp_to_datetime('20141226095')
|
||||
datetime.datetime(2014, 12, 26, 9, 59, 59)
|
||||
|
||||
# 10-digit padding
|
||||
>>> timestamp_to_datetime('2014122609')
|
||||
datetime.datetime(2014, 12, 26, 9, 59, 59)
|
||||
|
||||
# 9-digit padding
|
||||
>>> timestamp_to_datetime('201412260')
|
||||
datetime.datetime(2014, 12, 26, 23, 59, 59)
|
||||
|
||||
# 8-digit padding
|
||||
>>> timestamp_to_datetime('20141226')
|
||||
datetime.datetime(2014, 12, 26, 23, 59, 59)
|
||||
|
||||
# 7-digit padding
|
||||
>>> timestamp_to_datetime('2014122')
|
||||
datetime.datetime(2014, 12, 31, 23, 59, 59)
|
||||
|
||||
# 6-digit padding
|
||||
>>> timestamp_to_datetime('201410')
|
||||
datetime.datetime(2014, 10, 31, 23, 59, 59)
|
||||
|
||||
# 5-digit padding
|
||||
>>> timestamp_to_datetime('20141')
|
||||
datetime.datetime(2014, 12, 31, 23, 59, 59)
|
||||
|
||||
# 4-digit padding
|
||||
>>> timestamp_to_datetime('2014')
|
||||
time.struct_time(tm_year=2014, tm_mon=12, tm_mday=31, \
|
||||
tm_hour=23, tm_min=59, tm_sec=59, tm_wday=2, tm_yday=365, tm_isdst=-1)
|
||||
datetime.datetime(2014, 12, 31, 23, 59, 59)
|
||||
|
||||
# 3-digit padding
|
||||
>>> timestamp_to_datetime('201')
|
||||
datetime.datetime(2019, 12, 31, 23, 59, 59)
|
||||
|
||||
# 2-digit padding
|
||||
>>> timestamp_to_datetime('20')
|
||||
datetime.datetime(2099, 12, 31, 23, 59, 59)
|
||||
|
||||
# 1-digit padding
|
||||
>>> timestamp_to_datetime('2')
|
||||
datetime.datetime(2999, 12, 31, 23, 59, 59)
|
||||
|
||||
# 1-digit out-of-range padding
|
||||
>>> timestamp_to_datetime('3')
|
||||
datetime.datetime(2999, 12, 31, 23, 59, 59)
|
||||
|
||||
# 0-digit padding
|
||||
>>> timestamp_to_datetime('')
|
||||
datetime.datetime(2999, 12, 31, 23, 59, 59)
|
||||
|
||||
# bad month
|
||||
>>> timestamp_to_datetime('20131709005601')
|
||||
datetime.datetime(2013, 12, 9, 0, 56, 1)
|
||||
|
||||
# all out of range except minutes
|
||||
>>> timestamp_to_datetime('40001965252477')
|
||||
datetime.datetime(2999, 12, 31, 23, 24, 59)
|
||||
|
||||
"""
|
||||
|
||||
# Default pad to end of range for comptability
|
||||
return time.strptime(pad_timestamp(string), TIMESTAMP_14)
|
||||
# pad to 6 digits
|
||||
string = _pad_timestamp(string, PAD_6)
|
||||
|
||||
|
||||
def clamp(val, min_, max_):
|
||||
try:
|
||||
val = int(val)
|
||||
val = max(min_, min(val, max_))
|
||||
return val
|
||||
except:
|
||||
return max_
|
||||
|
||||
def extract(string, start, end, min_, max_):
|
||||
if len(string) >= end:
|
||||
return clamp(string[start:end], min_, max_)
|
||||
else:
|
||||
return max_
|
||||
|
||||
# now parse, clamp to boundary
|
||||
year = extract(string, 0, 4, 1900, 2999)
|
||||
month = extract(string, 4, 6, 1, 12)
|
||||
day = extract(string, 6, 8, 1, calendar.monthrange(year, month)[1])
|
||||
hour = extract(string, 8, 10, 0, 23)
|
||||
minute = extract(string, 10, 12, 0, 59)
|
||||
second = extract(string, 12, 14, 0, 59)
|
||||
|
||||
return datetime.datetime(year=year,
|
||||
month=month,
|
||||
day=day,
|
||||
hour=hour,
|
||||
minute=minute,
|
||||
second=second)
|
||||
|
||||
#return time.strptime(pad_timestamp(string), TIMESTAMP_14)
|
||||
|
||||
|
||||
def timestamp_to_sec(string):
|
||||
@ -104,7 +209,7 @@ def timestamp_to_sec(string):
|
||||
1420070399
|
||||
"""
|
||||
|
||||
return calendar.timegm(timestamp_to_datetime(string))
|
||||
return calendar.timegm(timestamp_to_datetime(string).utctimetuple())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -56,9 +56,9 @@ class J2TemplateView:
|
||||
|
||||
# Filters
|
||||
@staticmethod
|
||||
def format_ts(value, format='%a, %b %d %Y %H:%M:%S'):
|
||||
def format_ts(value, format_='%a, %b %d %Y %H:%M:%S'):
|
||||
value = timeutils.timestamp_to_datetime(value)
|
||||
return time.strftime(format, value)
|
||||
return value.strftime(format_)
|
||||
|
||||
@staticmethod
|
||||
def get_host(url):
|
||||
|
Loading…
x
Reference in New Issue
Block a user