1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 16:14:48 +01:00
pywb/utils.py
Ilya Kreymer 398e8f1a77 inputrequest: add input request handling (direct wsgi headers) or as a prepared post request
add timemap link output
rename source_name -> source
2016-02-24 14:22:29 -08:00

96 lines
2.7 KiB
Python

import re
import six
from pywb.utils.timeutils import timestamp_to_http_date
LINK_SPLIT = re.compile(',\s*(?=[<])')
LINK_SEG_SPLIT = re.compile(';\s*')
LINK_URL = re.compile('<(.*)>')
LINK_PROP = re.compile('([\w]+)="([^"]+)')
#=================================================================
class MementoUtils(object):
@staticmethod
def parse_links(link_header, def_name='timemap'):
links = LINK_SPLIT.split(link_header)
results = {}
mementos = []
for link in links:
props = LINK_SEG_SPLIT.split(link)
m = LINK_URL.match(props[0])
if not m:
raise Exception('Invalid Link Url: ' + props[0])
result = dict(url=m.group(1))
key = ''
is_mem = False
for prop in props[1:]:
m = LINK_PROP.match(prop)
if not m:
raise Exception('Invalid prop ' + prop)
name = m.group(1)
value = m.group(2)
if name == 'rel':
if 'memento' in value:
is_mem = True
result[name] = value
elif value == 'self':
key = def_name
else:
key = value
else:
result[name] = value
if key:
results[key] = result
elif is_mem:
mementos.append(result)
results['mementos'] = mementos
return results
@staticmethod
def make_timemap_memento_link(cdx, datetime=None, rel='memento', end=',\n'):
url = cdx.get('load_url')
if not url:
url = 'filename://' + cdx.get('filename')
memento = '<{0}>; rel="{1}"; datetime="{2}"; src="{3}"' + end
if not datetime:
datetime = timestamp_to_http_date(cdx['timestamp'])
return memento.format(url, rel, datetime, cdx.get('source', ''))
@staticmethod
def make_timemap(cdx_iter):
# get first memento as it'll be used for 'from' field
try:
first_cdx = six.next(cdx_iter)
from_date = timestamp_to_http_date(first_cdx['timestamp'])
except StopIteration:
first_cdx = None
# first memento link
yield MementoUtils.make_timemap_memento_link(first_cdx, datetime=from_date)
prev_cdx = None
for cdx in cdx_iter:
if prev_cdx:
yield MementoUtils.make_timemap_memento_link(prev_cdx)
prev_cdx = cdx
# last memento link, if any
if prev_cdx:
yield MementoUtils.make_timemap_memento_link(prev_cdx, end='')