2014-03-14 10:46:20 -07:00
|
|
|
from pywb.utils.wbexception import BadRequestException
|
|
|
|
from pywb.utils.timeutils import http_date_to_timestamp
|
|
|
|
from pywb.utils.timeutils import timestamp_to_http_date
|
|
|
|
|
|
|
|
from wbrequestresponse import WbRequest, WbResponse
|
2014-03-24 14:00:06 -07:00
|
|
|
from pywb.rewrite.wburl import WbUrl
|
|
|
|
|
|
|
|
LINK_FORMAT = 'application/link-format'
|
2014-03-14 10:46:20 -07:00
|
|
|
|
|
|
|
|
|
|
|
#=================================================================
|
|
|
|
class MementoReqMixin(object):
|
|
|
|
def _parse_extra(self):
|
|
|
|
if not self.wb_url:
|
|
|
|
return
|
|
|
|
|
|
|
|
if self.wb_url.type != self.wb_url.LATEST_REPLAY:
|
|
|
|
return
|
|
|
|
|
2014-07-21 14:02:31 -07:00
|
|
|
self.options['is_timegate'] = True
|
2014-03-14 10:46:20 -07:00
|
|
|
|
|
|
|
accept_datetime = self.env.get('HTTP_ACCEPT_DATETIME')
|
|
|
|
if not accept_datetime:
|
|
|
|
return
|
|
|
|
|
|
|
|
try:
|
|
|
|
timestamp = http_date_to_timestamp(accept_datetime)
|
|
|
|
except Exception:
|
|
|
|
raise BadRequestException('Invalid Accept-Datetime: ' +
|
|
|
|
accept_datetime)
|
|
|
|
|
2015-02-16 09:59:03 -08:00
|
|
|
# note: this changes from LATEST_REPLAY -> REPLAY
|
2014-03-14 10:46:20 -07:00
|
|
|
self.wb_url.set_replay_timestamp(timestamp)
|
|
|
|
|
|
|
|
|
|
|
|
#=================================================================
|
|
|
|
class MementoRequest(MementoReqMixin, WbRequest):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
#=================================================================
|
|
|
|
class MementoRespMixin(object):
|
|
|
|
def _init_derived(self, params):
|
|
|
|
wbrequest = params.get('wbrequest')
|
|
|
|
cdx = params.get('cdx')
|
|
|
|
|
|
|
|
if not wbrequest or not wbrequest.wb_url:
|
|
|
|
return
|
|
|
|
|
2015-04-01 16:21:44 -07:00
|
|
|
mod = wbrequest.options.get('replay_mod', '')
|
|
|
|
|
2015-04-01 10:13:56 -07:00
|
|
|
#is_top_frame = wbrequest.wb_url.is_top_frame
|
|
|
|
is_top_frame = wbrequest.options.get('is_top_frame')
|
2014-10-18 11:21:07 -07:00
|
|
|
|
2014-10-21 19:06:16 -07:00
|
|
|
is_timegate = (wbrequest.options.get('is_timegate', False) and
|
|
|
|
not is_top_frame)
|
2014-03-14 10:46:20 -07:00
|
|
|
|
|
|
|
if is_timegate:
|
|
|
|
self.status_headers.headers.append(('Vary', 'accept-datetime'))
|
|
|
|
|
|
|
|
# Determine if memento:
|
2014-10-18 11:21:07 -07:00
|
|
|
is_memento = False
|
|
|
|
|
|
|
|
# if no cdx included, not a memento, unless top-frame special
|
2014-03-14 10:46:20 -07:00
|
|
|
if not cdx:
|
2014-10-18 11:21:07 -07:00
|
|
|
# special case: include the headers but except Memento-Datetime
|
|
|
|
# since this is really an intermediate resource
|
|
|
|
if is_top_frame:
|
|
|
|
is_memento = True
|
2014-03-14 10:46:20 -07:00
|
|
|
|
|
|
|
# otherwise, if in proxy mode, then always a memento
|
2014-07-21 14:02:31 -07:00
|
|
|
elif wbrequest.options['is_proxy']:
|
2014-03-14 10:46:20 -07:00
|
|
|
is_memento = True
|
|
|
|
|
2015-02-16 09:59:03 -08:00
|
|
|
# otherwise only if timestamp replay (and not a timegate)
|
|
|
|
elif not is_timegate:
|
2014-03-14 10:46:20 -07:00
|
|
|
is_memento = (wbrequest.wb_url.type == wbrequest.wb_url.REPLAY)
|
|
|
|
|
2014-10-18 11:21:07 -07:00
|
|
|
link = []
|
2015-02-23 23:21:39 -08:00
|
|
|
req_url = wbrequest.wb_url.url
|
2014-10-21 19:06:16 -07:00
|
|
|
|
2015-02-16 09:59:03 -08:00
|
|
|
if is_memento or is_timegate:
|
2015-02-23 23:21:39 -08:00
|
|
|
url = req_url
|
2015-01-29 22:27:15 -08:00
|
|
|
if cdx:
|
2015-02-16 09:59:03 -08:00
|
|
|
ts = cdx['timestamp']
|
2015-03-19 13:29:29 -07:00
|
|
|
url = cdx['url']
|
2015-01-29 22:27:15 -08:00
|
|
|
# for top frame
|
|
|
|
elif wbrequest.wb_url.timestamp:
|
2015-02-16 09:59:03 -08:00
|
|
|
ts = wbrequest.wb_url.timestamp
|
2015-01-29 22:27:15 -08:00
|
|
|
else:
|
2015-02-16 09:59:03 -08:00
|
|
|
ts = None
|
2015-01-29 22:27:15 -08:00
|
|
|
|
2015-02-16 09:59:03 -08:00
|
|
|
if ts:
|
|
|
|
http_date = timestamp_to_http_date(ts)
|
2015-01-29 22:27:15 -08:00
|
|
|
|
2015-02-16 09:59:03 -08:00
|
|
|
if is_memento:
|
|
|
|
self.status_headers.headers.append(('Memento-Datetime',
|
|
|
|
http_date))
|
|
|
|
|
2015-04-01 16:21:44 -07:00
|
|
|
canon_link = wbrequest.urlrewriter.get_new_url(mod=mod,
|
2015-02-23 23:21:39 -08:00
|
|
|
timestamp=ts,
|
|
|
|
url=url)
|
|
|
|
|
2015-01-29 22:27:15 -08:00
|
|
|
link.append(self.make_memento_link(canon_link,
|
|
|
|
'memento',
|
|
|
|
http_date))
|
2014-03-14 10:46:20 -07:00
|
|
|
|
2014-03-24 14:00:06 -07:00
|
|
|
if is_memento and is_timegate:
|
|
|
|
link.append(self.make_link(req_url, 'original timegate'))
|
|
|
|
else:
|
2014-03-14 10:46:20 -07:00
|
|
|
link.append(self.make_link(req_url, 'original'))
|
2014-03-24 14:00:06 -07:00
|
|
|
|
|
|
|
# for now, include timemap only in non-proxy mode
|
2014-07-21 14:02:31 -07:00
|
|
|
if not wbrequest.options['is_proxy'] and (is_memento or is_timegate):
|
2014-03-24 14:00:06 -07:00
|
|
|
link.append(self.make_timemap_link(wbrequest))
|
|
|
|
|
|
|
|
if is_memento and not is_timegate:
|
2015-04-01 16:21:44 -07:00
|
|
|
timegate = wbrequest.urlrewriter.get_new_url(mod=mod, timestamp='')
|
2014-03-14 10:46:20 -07:00
|
|
|
link.append(self.make_link(timegate, 'timegate'))
|
2014-03-24 14:00:06 -07:00
|
|
|
|
|
|
|
link = ', '.join(link)
|
2014-03-14 10:46:20 -07:00
|
|
|
|
|
|
|
self.status_headers.headers.append(('Link', link))
|
|
|
|
|
|
|
|
def make_link(self, url, type):
|
|
|
|
return '<{0}>; rel="{1}"'.format(url, type)
|
|
|
|
|
2015-01-29 22:27:15 -08:00
|
|
|
def make_memento_link(self, url, type_, dt):
|
|
|
|
return '<{0}>; rel="{1}"; datetime="{2}"'.format(url, type_, dt)
|
|
|
|
|
2014-03-24 14:00:06 -07:00
|
|
|
def make_timemap_link(self, wbrequest):
|
|
|
|
format_ = '<{0}>; rel="timemap"; type="{1}"'
|
|
|
|
|
2014-10-19 00:24:00 -07:00
|
|
|
url = wbrequest.urlrewriter.get_new_url(mod='timemap',
|
|
|
|
timestamp='',
|
|
|
|
type=wbrequest.wb_url.QUERY)
|
2014-03-24 14:00:06 -07:00
|
|
|
|
|
|
|
return format_.format(url, LINK_FORMAT)
|
|
|
|
|
2014-03-14 10:46:20 -07:00
|
|
|
|
|
|
|
#=================================================================
|
|
|
|
class MementoResponse(MementoRespMixin, WbResponse):
|
|
|
|
pass
|
2014-03-24 14:00:06 -07:00
|
|
|
|
|
|
|
|
|
|
|
#=================================================================
|
2015-04-01 16:21:44 -07:00
|
|
|
def make_timemap_memento_link(cdx, prefix, datetime=None,
|
|
|
|
rel='memento', end=',\n', mod=''):
|
|
|
|
|
2014-03-24 14:00:06 -07:00
|
|
|
memento = '<{0}>; rel="{1}"; datetime="{2}"' + end
|
|
|
|
|
2015-03-19 13:29:29 -07:00
|
|
|
string = WbUrl.to_wburl_str(url=cdx['url'],
|
2015-04-01 16:21:44 -07:00
|
|
|
mod=mod,
|
2014-03-24 14:00:06 -07:00
|
|
|
timestamp=cdx['timestamp'],
|
|
|
|
type=WbUrl.REPLAY)
|
|
|
|
|
|
|
|
url = prefix + string
|
|
|
|
|
|
|
|
if not datetime:
|
|
|
|
datetime = timestamp_to_http_date(cdx['timestamp'])
|
|
|
|
|
|
|
|
return memento.format(url, rel, datetime)
|
|
|
|
|
|
|
|
|
|
|
|
#=================================================================
|
|
|
|
def make_timemap(wbrequest, cdx_lines):
|
|
|
|
prefix = wbrequest.wb_prefix
|
|
|
|
url = wbrequest.wb_url.url
|
2015-04-01 16:21:44 -07:00
|
|
|
mod = wbrequest.options.get('replay_mod', '')
|
2014-03-24 14:00:06 -07:00
|
|
|
|
|
|
|
# get first memento as it'll be used for 'from' field
|
|
|
|
first_cdx = cdx_lines.next()
|
|
|
|
from_date = timestamp_to_http_date(first_cdx['timestamp'])
|
|
|
|
|
|
|
|
# timemap link
|
|
|
|
timemap = ('<{0}>; rel="self"; ' +
|
|
|
|
'type="application/link-format"; from="{1}",\n')
|
2014-07-20 15:43:39 -07:00
|
|
|
yield timemap.format(prefix + wbrequest.wb_url.to_str(),
|
|
|
|
from_date)
|
2014-03-24 14:00:06 -07:00
|
|
|
|
2014-03-24 14:57:41 -07:00
|
|
|
# original link
|
|
|
|
original = '<{0}>; rel="original",\n'
|
|
|
|
yield original.format(url)
|
|
|
|
|
2014-03-24 14:00:06 -07:00
|
|
|
# timegate link
|
|
|
|
timegate = '<{0}>; rel="timegate",\n'
|
2015-04-01 16:21:44 -07:00
|
|
|
timegate_url= WbUrl.to_wburl_str(url=url,
|
|
|
|
mod=mod,
|
|
|
|
type=WbUrl.LATEST_REPLAY)
|
|
|
|
|
|
|
|
yield timegate.format(prefix + timegate_url)
|
2014-03-24 14:00:06 -07:00
|
|
|
|
|
|
|
# first memento link
|
2015-01-29 22:27:15 -08:00
|
|
|
yield make_timemap_memento_link(first_cdx, prefix,
|
2015-04-01 16:21:44 -07:00
|
|
|
datetime=from_date, mod=mod)
|
2014-03-24 14:00:06 -07:00
|
|
|
|
|
|
|
prev_cdx = None
|
|
|
|
|
|
|
|
for cdx in cdx_lines:
|
|
|
|
if prev_cdx:
|
2015-04-01 16:21:44 -07:00
|
|
|
yield make_timemap_memento_link(prev_cdx, prefix, mod=mod)
|
2014-03-24 14:00:06 -07:00
|
|
|
|
|
|
|
prev_cdx = cdx
|
|
|
|
|
2014-03-24 14:57:41 -07:00
|
|
|
# last memento link, if any
|
2014-03-24 14:00:06 -07:00
|
|
|
if prev_cdx:
|
2015-04-01 16:21:44 -07:00
|
|
|
yield make_timemap_memento_link(prev_cdx, prefix, end='', mod=mod)
|