mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-26 07:49:24 +01:00
wombat: - I: function overrides applied by wombat now better appear to be the original new function name same as originals when possible - I: WombatLocation now looks and behaves more like the original Location interface - I: The custom storage class now looks and behaves more like the original Storage - I: SVG image rewriting has been improved: both the href and xlink:href deprecated since SVG2 now rewritten always - I: document.open now handles the case of creation of a new window - I: Request object rewriting of the readonly href property is now correctly handled - I: EventTarget.addEventListener, removeEventListener overrides now preserve the original this argument of the wrapped listener - A: document.close override to ensure wombat is initialized after write or writeln usage - A: reconstruction of <doctype...> in rewriteHTMLComplete IFF it was included in the original string of HTML - A: document.body setter override to ensure rewriting of the new body or frameset - A: Attr.[value, nodeValue, textContent] added setter override to perform URL rewrites - A: SVGElements rewriting of the filter, style, xlink:href, href, and src attributes - A: HTMLTrackElement rewriting of the src attribute of the - A: HTMLQuoteElement and HTMLModElement rewriting of the cite attribute - A: Worklet.addModule: Loads JS module specified by a URL. - A: HTMLHyperlinkElementUtils overrides to the areaelement - A: ShadowRootoverrides to: innerHTML even though inherites from DocumentFragement and Node it still has innerHTML getter setter. - A: ShadowRoot, Element, DocumentFragment append, prepend: adds strings of HTML or a new Node inherited from ParentNode - A: StylePropertyMap override: New way to access and set CSS properties. - A: Response.redirecthttps rewriting of the URL argument. - A: UIEvent, MouseEvent, TouchEvent, KeyboardEvent, WheelEvent, InputEvent, and CompositionEven constructor and init{even-name} overrides in order to ensure that wombats JS Proxy usage does not affect their defined behaviors - A: XSLTProcessor override to ensure its usage is not affected by wombats JS Proxy usage. - A: navigator.unregisterProtocolHandler: Same override as existing navigator.registerProtocolHandler but from the inverse operation - A: PresentationRequest: Constructor takes a URL or an array of URLs. - A: EventSource and WebSocket override in order to ensure that they do not cause live leaks - A: overrides for the child node interface - Fix: autofetch worker creatation of the backing worker when it is operating within an execution context with a null origin tests: - A: 559 tests specific to wombat and client side rewritting pywb: - Fix: a few broken tests due to iana.org requiring a user agent in its requests rewrite: - introduced a new JSWorkerRewriter class in order to support rewriting via wombat workers in the context of all supported worker variants via - ensured rewriter app correctly sets the static prefix ci: - Modified travis.yml to specifically enumerate jobs documentation: - Documented new wombat, wombat proxy moded, wombat workers auto-fetch: - switched to mutation observer when in proxy mode so that the behaviors can operate in tandem with the autofetcher
688 lines
24 KiB
Python
688 lines
24 KiB
Python
import requests
|
|
|
|
from werkzeug.http import HTTP_STATUS_CODES
|
|
from six.moves.urllib.parse import urlencode, urlsplit, urlunsplit
|
|
|
|
from pywb.rewrite.default_rewriter import DefaultRewriter, RewriterWithJSProxy
|
|
|
|
from pywb.rewrite.wburl import WbUrl
|
|
from pywb.rewrite.url_rewriter import UrlRewriter, IdentityUrlRewriter
|
|
|
|
from pywb.utils.wbexception import WbException
|
|
from pywb.utils.canonicalize import canonicalize
|
|
from pywb.utils.loaders import extract_client_cookie
|
|
from pywb.utils.io import BUFF_SIZE, OffsetLimitReader
|
|
from pywb.utils.memento import MementoUtils
|
|
|
|
from warcio.timeutils import http_date_to_timestamp, timestamp_to_http_date
|
|
from warcio.bufferedreaders import BufferedReader
|
|
from warcio.recordloader import ArcWarcRecordLoader
|
|
|
|
from pywb.warcserver.index.cdxobject import CDXObject
|
|
from pywb.apps.wbrequestresponse import WbResponse
|
|
|
|
from pywb.rewrite.rewriteinputreq import RewriteInputRequest
|
|
from pywb.rewrite.templateview import JinjaEnv, HeadInsertView, TopFrameView, BaseInsertView
|
|
|
|
|
|
from io import BytesIO
|
|
from copy import copy
|
|
|
|
import gevent
|
|
import json
|
|
|
|
|
|
# ============================================================================
|
|
class UpstreamException(WbException):
|
|
def __init__(self, status_code, url, details):
|
|
super(UpstreamException, self).__init__(url=url, msg=details)
|
|
self.status_code = status_code
|
|
|
|
|
|
# ============================================================================
|
|
#class Rewriter(RewriteDASHMixin, RewriteAMFMixin, RewriteContent):
|
|
# pass
|
|
|
|
|
|
# ============================================================================
|
|
class RewriterApp(object):
|
|
VIDEO_INFO_CONTENT_TYPE = 'application/vnd.youtube-dl_formats+json'
|
|
|
|
DEFAULT_CSP = "default-src 'unsafe-eval' 'unsafe-inline' 'self' data: blob: mediastream: ws: wss: ; form-action 'self'"
|
|
|
|
def __init__(self, framed_replay=False, jinja_env=None, config=None, paths=None):
|
|
self.loader = ArcWarcRecordLoader()
|
|
|
|
self.config = config or {}
|
|
self.paths = paths or {}
|
|
|
|
self.framed_replay = framed_replay
|
|
|
|
if framed_replay:
|
|
self.frame_mod = ''
|
|
self.replay_mod = 'mp_'
|
|
else:
|
|
self.frame_mod = None
|
|
self.replay_mod = ''
|
|
|
|
self.default_rw = DefaultRewriter(replay_mod=self.replay_mod,
|
|
config=config)
|
|
|
|
self.js_proxy_rw = RewriterWithJSProxy(replay_mod=self.replay_mod)
|
|
|
|
if not jinja_env:
|
|
jinja_env = JinjaEnv(globals={'static_path': 'static'})
|
|
|
|
self.jinja_env = jinja_env
|
|
|
|
self.redirect_to_exact = config.get('redirect_to_exact')
|
|
|
|
self.banner_view = BaseInsertView(self.jinja_env, self._html_templ('banner_html'))
|
|
|
|
self.head_insert_view = HeadInsertView(self.jinja_env,
|
|
self._html_templ('head_insert_html'),
|
|
self.banner_view)
|
|
|
|
self.frame_insert_view = TopFrameView(self.jinja_env,
|
|
self._html_templ('frame_insert_html'),
|
|
self.banner_view)
|
|
|
|
self.error_view = BaseInsertView(self.jinja_env, self._html_templ('error_html'))
|
|
self.not_found_view = BaseInsertView(self.jinja_env, self._html_templ('not_found_html'))
|
|
self.query_view = BaseInsertView(self.jinja_env, self._html_templ('query_html'))
|
|
|
|
self.use_js_obj_proxy = config.get('use_js_obj_proxy', True)
|
|
|
|
self.cookie_tracker = None
|
|
|
|
self.enable_memento = self.config.get('enable_memento')
|
|
|
|
csp_header = self.config.get('csp-header', self.DEFAULT_CSP)
|
|
if csp_header:
|
|
self.csp_header = ('Content-Security-Policy', csp_header)
|
|
else:
|
|
self.csp_header = None
|
|
|
|
# deprecated: Use X-Forwarded-Proto header instead!
|
|
self.force_scheme = config.get('force_scheme')
|
|
|
|
def add_csp_header(self, wb_url, status_headers):
|
|
if self.csp_header and wb_url.mod == self.replay_mod:
|
|
status_headers.headers.append(self.csp_header)
|
|
|
|
def _html_templ(self, name):
|
|
value = self.config.get(name)
|
|
if not value:
|
|
value = name.replace('_html', '.html')
|
|
return value
|
|
|
|
def is_framed_replay(self, wb_url):
|
|
return (self.framed_replay and
|
|
wb_url.mod == self.frame_mod and
|
|
wb_url.is_replay())
|
|
|
|
def _check_accept_dt(self, wb_url, environ):
|
|
is_timegate = False
|
|
if wb_url.is_latest_replay():
|
|
accept_dt = environ.get('HTTP_ACCEPT_DATETIME')
|
|
is_timegate = True
|
|
if accept_dt:
|
|
try:
|
|
wb_url.timestamp = http_date_to_timestamp(accept_dt)
|
|
except:
|
|
raise UpstreamException(400, url=wb_url.url, details='Invalid Accept-Datetime')
|
|
#return WbResponse.text_response('Invalid Accept-Datetime', status='400 Bad Request')
|
|
|
|
wb_url.type = wb_url.REPLAY
|
|
|
|
elif 'pywb_proxy_default_timestamp' in environ:
|
|
wb_url.timestamp = environ['pywb_proxy_default_timestamp']
|
|
wb_url.type = wb_url.REPLAY
|
|
|
|
return is_timegate
|
|
|
|
def _check_range(self, inputreq, wb_url):
|
|
skip_record = False
|
|
range_start = None
|
|
range_end = None
|
|
|
|
rangeres = inputreq.extract_range()
|
|
|
|
if not rangeres:
|
|
return range_start, range_end, skip_record
|
|
|
|
mod_url, start, end, use_206 = rangeres
|
|
|
|
# remove the range and still proxy
|
|
if not use_206:
|
|
return range_start, range_end, skip_record
|
|
|
|
wb_url.url = mod_url
|
|
inputreq.url = mod_url
|
|
|
|
range_start = start
|
|
range_end = end
|
|
|
|
#if start with 0, load from upstream, but add range after
|
|
if start == 0:
|
|
del inputreq.env['HTTP_RANGE']
|
|
else:
|
|
skip_record = True
|
|
|
|
return range_start, range_end, skip_record
|
|
|
|
def _add_range(self, record, wb_url, range_start, range_end):
|
|
if range_end is None and range_start is None:
|
|
return
|
|
|
|
if record.http_headers.get_statuscode() != '200':
|
|
return
|
|
|
|
content_length = (record.http_headers.
|
|
get_header('Content-Length'))
|
|
|
|
if content_length is None:
|
|
return
|
|
|
|
content_length = content_length.split(',')[0]
|
|
|
|
try:
|
|
content_length = int(content_length)
|
|
if not range_end:
|
|
range_end = content_length - 1
|
|
|
|
if range_start >= content_length or range_end >= content_length:
|
|
details = 'Invalid Range: {0} >= {2} or {1} >= {2}'.format(range_start, range_end, content_length)
|
|
try:
|
|
r.raw.close()
|
|
except:
|
|
pass
|
|
|
|
raise UpstreamException(416, url=wb_url.url, details=details)
|
|
|
|
range_len = range_end - range_start + 1
|
|
record.http_headers.add_range(range_start, range_len,
|
|
content_length)
|
|
|
|
record.http_headers.replace_header('Content-Length', str(range_len))
|
|
|
|
record.raw_stream = OffsetLimitReader(record.raw_stream, range_start, range_len)
|
|
return True
|
|
|
|
except (ValueError, TypeError):
|
|
pass
|
|
|
|
def send_redirect(self, new_path, url_parts, urlrewriter):
|
|
scheme, netloc, path, query, frag = url_parts
|
|
path = new_path
|
|
url = urlunsplit((scheme, netloc, path, query, frag))
|
|
resp = WbResponse.redir_response(urlrewriter.rewrite(url),
|
|
'307 Temporary Redirect')
|
|
|
|
if self.enable_memento:
|
|
resp.status_headers['Link'] = MementoUtils.make_link(url, 'original')
|
|
|
|
return resp
|
|
|
|
def render_content(self, wb_url, kwargs, environ):
|
|
wb_url = wb_url.replace('#', '%23')
|
|
wb_url = WbUrl(wb_url)
|
|
|
|
proto = environ.get('HTTP_X_FORWARDED_PROTO', self.force_scheme)
|
|
|
|
if proto:
|
|
environ['wsgi.url_scheme'] = proto
|
|
|
|
is_timegate = self._check_accept_dt(wb_url, environ)
|
|
|
|
host_prefix = self.get_host_prefix(environ)
|
|
rel_prefix = self.get_rel_prefix(environ)
|
|
full_prefix = host_prefix + rel_prefix
|
|
pywb_static_prefix = environ.get('pywb.host_prefix', '') + environ.get('pywb.app_prefix', '') + environ.get(
|
|
'pywb.static_prefix', '/static/')
|
|
is_proxy = ('wsgiprox.proxy_host' in environ)
|
|
|
|
response = self.handle_custom_response(environ, wb_url,
|
|
full_prefix, host_prefix,
|
|
kwargs)
|
|
|
|
if response:
|
|
return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy)
|
|
|
|
if is_proxy:
|
|
environ['pywb_proxy_magic'] = environ['wsgiprox.proxy_host']
|
|
urlrewriter = IdentityUrlRewriter(wb_url, '')
|
|
framed_replay = False
|
|
|
|
else:
|
|
urlrewriter = UrlRewriter(wb_url,
|
|
prefix=full_prefix,
|
|
full_prefix=full_prefix,
|
|
rel_prefix=rel_prefix,
|
|
pywb_static_prefix=pywb_static_prefix)
|
|
|
|
framed_replay = self.framed_replay
|
|
|
|
url_parts = urlsplit(wb_url.url)
|
|
if not url_parts.path:
|
|
return self.send_redirect('/', url_parts, urlrewriter)
|
|
|
|
self.unrewrite_referrer(environ, full_prefix)
|
|
|
|
urlkey = canonicalize(wb_url.url)
|
|
|
|
environ['pywb.host_prefix'] = host_prefix
|
|
|
|
if self.use_js_obj_proxy:
|
|
content_rw = self.js_proxy_rw
|
|
else:
|
|
content_rw = self.default_rw
|
|
|
|
inputreq = RewriteInputRequest(environ, urlkey, wb_url.url, content_rw)
|
|
|
|
inputreq.include_method_query(wb_url.url)
|
|
|
|
range_start, range_end, skip_record = self._check_range(inputreq, wb_url)
|
|
|
|
setcookie_headers = None
|
|
if self.cookie_tracker:
|
|
cookie_key = self.get_cookie_key(kwargs)
|
|
res = self.cookie_tracker.get_cookie_headers(wb_url.url, urlrewriter, cookie_key)
|
|
inputreq.extra_cookie, setcookie_headers = res
|
|
|
|
r = self._do_req(inputreq, wb_url, kwargs, skip_record)
|
|
|
|
if r.status_code >= 400:
|
|
error = None
|
|
try:
|
|
error = r.raw.read()
|
|
r.raw.close()
|
|
except:
|
|
pass
|
|
|
|
if error:
|
|
error = error.decode('utf-8')
|
|
else:
|
|
error = ''
|
|
|
|
details = dict(args=kwargs, error=error)
|
|
raise UpstreamException(r.status_code, url=wb_url.url, details=details)
|
|
|
|
cdx = CDXObject(r.headers.get('Warcserver-Cdx').encode('utf-8'))
|
|
|
|
cdx_url_parts = urlsplit(cdx['url'])
|
|
|
|
if cdx_url_parts.path.endswith('/') and not url_parts.path.endswith('/'):
|
|
# add trailing slash
|
|
new_path = url_parts.path + '/'
|
|
|
|
try:
|
|
r.raw.close()
|
|
except:
|
|
pass
|
|
|
|
return self.send_redirect(new_path, url_parts, urlrewriter)
|
|
|
|
stream = BufferedReader(r.raw, block_size=BUFF_SIZE)
|
|
record = self.loader.parse_record_stream(stream,
|
|
ensure_http_headers=True)
|
|
|
|
memento_dt = r.headers.get('Memento-Datetime')
|
|
target_uri = r.headers.get('WARC-Target-URI')
|
|
|
|
#cdx['urlkey'] = urlkey
|
|
#cdx['timestamp'] = http_date_to_timestamp(memento_dt)
|
|
#cdx['url'] = target_uri
|
|
|
|
set_content_loc = False
|
|
|
|
# Check if Fuzzy Match
|
|
if target_uri != wb_url.url and cdx.get('is_fuzzy') == '1':
|
|
set_content_loc = True
|
|
|
|
# if redir to exact, redir if url or ts are different
|
|
if self.redirect_to_exact:
|
|
if (set_content_loc or
|
|
(wb_url.timestamp != cdx.get('timestamp') and not cdx.get('is_live'))):
|
|
|
|
new_url = urlrewriter.get_new_url(url=target_uri,
|
|
timestamp=cdx['timestamp'],
|
|
mod=wb_url.mod)
|
|
|
|
resp = WbResponse.redir_response(new_url, '307 Temporary Redirect')
|
|
if self.enable_memento:
|
|
if is_timegate and not is_proxy:
|
|
self._add_memento_links(target_uri, full_prefix,
|
|
memento_dt, cdx['timestamp'],
|
|
resp.status_headers,
|
|
is_timegate, is_proxy)
|
|
|
|
else:
|
|
resp.status_headers['Link'] = MementoUtils.make_link(target_uri, 'original')
|
|
|
|
return resp
|
|
|
|
self._add_custom_params(cdx, r.headers, kwargs, record)
|
|
|
|
if self._add_range(record, wb_url, range_start, range_end):
|
|
wb_url.mod = 'id_'
|
|
|
|
is_ajax = self.is_ajax(environ)
|
|
|
|
if is_ajax:
|
|
head_insert_func = None
|
|
urlrewriter.rewrite_opts['is_ajax'] = True
|
|
else:
|
|
top_url = self.get_top_url(full_prefix, wb_url, cdx, kwargs)
|
|
head_insert_func = (self.head_insert_view.
|
|
create_insert_func(wb_url,
|
|
full_prefix,
|
|
host_prefix,
|
|
top_url,
|
|
environ,
|
|
framed_replay,
|
|
coll=kwargs.get('coll', ''),
|
|
replay_mod=self.replay_mod,
|
|
config=self.config))
|
|
|
|
cookie_rewriter = None
|
|
if self.cookie_tracker:
|
|
cookie_rewriter = self.cookie_tracker.get_rewriter(urlrewriter,
|
|
cookie_key)
|
|
|
|
urlrewriter.rewrite_opts['ua_string'] = environ.get('HTTP_USER_AGENT')
|
|
|
|
result = content_rw(record, urlrewriter, cookie_rewriter, head_insert_func, cdx, environ)
|
|
|
|
status_headers, gen, is_rw = result
|
|
|
|
if setcookie_headers:
|
|
status_headers.headers.extend(setcookie_headers)
|
|
|
|
if ' ' not in status_headers.statusline:
|
|
status_headers.statusline += ' None'
|
|
|
|
if not is_ajax and self.enable_memento:
|
|
self._add_memento_links(cdx['url'], full_prefix,
|
|
memento_dt, cdx['timestamp'], status_headers,
|
|
is_timegate, is_proxy, cdx.get('source-coll'))
|
|
|
|
set_content_loc = True
|
|
|
|
if set_content_loc and not self.redirect_to_exact:
|
|
status_headers.headers.append(('Content-Location', urlrewriter.get_new_url(timestamp=cdx['timestamp'],
|
|
url=cdx['url'])))
|
|
if not is_proxy:
|
|
self.add_csp_header(wb_url, status_headers)
|
|
|
|
response = WbResponse(status_headers, gen)
|
|
|
|
return response
|
|
|
|
def format_response(self, response, wb_url, full_prefix, is_timegate, is_proxy):
|
|
memento_ts = None
|
|
if not isinstance(response, WbResponse):
|
|
content_type = 'text/html'
|
|
|
|
# if not replay outer frame, specify utf-8 charset
|
|
if not self.is_framed_replay(wb_url):
|
|
content_type += '; charset=utf-8'
|
|
else:
|
|
memento_ts = wb_url.timestamp
|
|
|
|
response = WbResponse.text_response(response, content_type=content_type)
|
|
|
|
if self.enable_memento:
|
|
self._add_memento_links(wb_url.url, full_prefix, None, memento_ts,
|
|
response.status_headers, is_timegate, is_proxy)
|
|
return response
|
|
|
|
def _add_memento_links(self, url, full_prefix, memento_dt, memento_ts,
|
|
status_headers, is_timegate, is_proxy, coll=None):
|
|
|
|
# memento url + header
|
|
if not memento_dt and memento_ts:
|
|
memento_dt = timestamp_to_http_date(memento_ts)
|
|
|
|
if memento_dt:
|
|
status_headers.headers.append(('Memento-Datetime', memento_dt))
|
|
|
|
if is_proxy:
|
|
memento_url = url
|
|
else:
|
|
memento_url = full_prefix + memento_ts + self.replay_mod
|
|
memento_url += '/' + url
|
|
else:
|
|
memento_url = None
|
|
|
|
timegate_url, timemap_url = self._get_timegate_timemap(url, full_prefix)
|
|
|
|
link = []
|
|
if not is_proxy:
|
|
link.append(MementoUtils.make_link(url, 'original'))
|
|
link.append(MementoUtils.make_link(timegate_url, 'timegate'))
|
|
link.append(MementoUtils.make_link(timemap_url, 'timemap'))
|
|
|
|
if memento_dt:
|
|
link.append(MementoUtils.make_memento_link(memento_url, 'memento', memento_dt, coll))
|
|
|
|
link_str = ', '.join(link)
|
|
|
|
status_headers.headers.append(('Link', link_str))
|
|
|
|
if is_timegate:
|
|
status_headers.headers.append(('Vary', 'accept-datetime'))
|
|
|
|
def _get_timegate_timemap(self, url, full_prefix):
|
|
# timegate url
|
|
timegate_url = full_prefix
|
|
if self.replay_mod:
|
|
timegate_url += self.replay_mod + '/'
|
|
|
|
timegate_url += url
|
|
|
|
# timemap url
|
|
timemap_url = full_prefix + 'timemap/link/' + url
|
|
return timegate_url, timemap_url
|
|
|
|
def get_top_url(self, full_prefix, wb_url, cdx, kwargs):
|
|
top_url = full_prefix
|
|
top_url += wb_url.to_str(mod='')
|
|
return top_url
|
|
|
|
def handle_error(self, environ, ue):
|
|
if ue.status_code == 404:
|
|
return self._not_found_response(environ, ue.url)
|
|
|
|
else:
|
|
status = str(ue.status_code) + ' ' + HTTP_STATUS_CODES.get(ue.status_code, 'Unknown Error')
|
|
return self._error_response(environ, ue.url, ue.msg,
|
|
status=status)
|
|
|
|
def _not_found_response(self, environ, url):
|
|
resp = self.not_found_view.render_to_string(environ, url=url)
|
|
|
|
return WbResponse.text_response(resp, status='404 Not Found', content_type='text/html')
|
|
|
|
def _error_response(self, environ, msg='', details='', status='404 Not Found'):
|
|
resp = self.error_view.render_to_string(environ,
|
|
err_msg=msg,
|
|
err_details=details)
|
|
|
|
return WbResponse.text_response(resp, status=status, content_type='text/html')
|
|
|
|
|
|
def _do_req(self, inputreq, wb_url, kwargs, skip_record):
|
|
req_data = inputreq.reconstruct_request(wb_url.url)
|
|
|
|
headers = {'Content-Length': str(len(req_data)),
|
|
'Content-Type': 'application/request'}
|
|
|
|
if skip_record:
|
|
headers['Recorder-Skip'] = '1'
|
|
|
|
if wb_url.is_latest_replay():
|
|
closest = 'now'
|
|
else:
|
|
closest = wb_url.timestamp
|
|
|
|
params = {}
|
|
params['url'] = wb_url.url
|
|
params['closest'] = closest
|
|
params['matchType'] = 'exact'
|
|
|
|
if wb_url.mod == 'vi_':
|
|
params['content_type'] = self.VIDEO_INFO_CONTENT_TYPE
|
|
|
|
upstream_url = self.get_upstream_url(wb_url, kwargs, params)
|
|
|
|
r = requests.post(upstream_url,
|
|
data=BytesIO(req_data),
|
|
headers=headers,
|
|
stream=True)
|
|
|
|
return r
|
|
|
|
def do_query(self, wb_url, kwargs):
|
|
params = {}
|
|
params['url'] = wb_url.url
|
|
params['output'] = kwargs.get('output', 'json')
|
|
params['from'] = wb_url.timestamp
|
|
params['to'] = wb_url.end_timestamp
|
|
|
|
upstream_url = self.get_upstream_url(wb_url, kwargs, params)
|
|
upstream_url = upstream_url.replace('/resource/postreq', '/index')
|
|
|
|
r = requests.get(upstream_url)
|
|
|
|
return r
|
|
|
|
def make_timemap(self, wb_url, res, full_prefix, output):
|
|
wb_url.type = wb_url.QUERY
|
|
|
|
content_type = res.headers.get('Content-Type')
|
|
text = res.text
|
|
|
|
if not res.text:
|
|
status = '404 Not Found'
|
|
|
|
elif res.status_code:
|
|
status = str(res.status_code) + ' ' + res.reason
|
|
|
|
if res.status_code == 200 and output == 'link':
|
|
timegate, timemap = self._get_timegate_timemap(wb_url.url, full_prefix)
|
|
|
|
text = MementoUtils.wrap_timemap_header(wb_url.url,
|
|
timegate,
|
|
timemap,
|
|
res.text)
|
|
return WbResponse.text_response(text,
|
|
content_type=content_type,
|
|
status=status)
|
|
|
|
def handle_timemap(self, wb_url, kwargs, full_prefix):
|
|
output = kwargs.get('output')
|
|
res = self.do_query(wb_url, kwargs)
|
|
return self.make_timemap(wb_url, res, full_prefix, output)
|
|
|
|
def handle_query(self, environ, wb_url, kwargs, full_prefix):
|
|
prefix = self.get_full_prefix(environ)
|
|
|
|
params = dict(url=wb_url.url,
|
|
prefix=prefix)
|
|
|
|
return self.query_view.render_to_string(environ, **params)
|
|
|
|
def get_host_prefix(self, environ):
|
|
scheme = environ['wsgi.url_scheme'] + '://'
|
|
|
|
# proxy
|
|
host = environ.get('wsgiprox.proxy_host')
|
|
if host:
|
|
return scheme + host
|
|
|
|
# default
|
|
host = environ.get('HTTP_HOST')
|
|
if host:
|
|
return scheme + host
|
|
|
|
# if no host
|
|
host = environ['SERVER_NAME']
|
|
if environ['wsgi.url_scheme'] == 'https':
|
|
if environ['SERVER_PORT'] != '443':
|
|
host += ':' + environ['SERVER_PORT']
|
|
else:
|
|
if environ['SERVER_PORT'] != '80':
|
|
host += ':' + environ['SERVER_PORT']
|
|
|
|
return scheme + host
|
|
|
|
def get_rel_prefix(self, environ):
|
|
#return request.script_name
|
|
return environ.get('SCRIPT_NAME') + '/'
|
|
|
|
def get_full_prefix(self, environ):
|
|
return self.get_host_prefix(environ) + self.get_rel_prefix(environ)
|
|
|
|
def unrewrite_referrer(self, environ, full_prefix):
|
|
referrer = environ.get('HTTP_REFERER')
|
|
if not referrer:
|
|
return False
|
|
|
|
if referrer.startswith(full_prefix):
|
|
referrer = referrer[len(full_prefix):]
|
|
if referrer:
|
|
environ['HTTP_REFERER'] = WbUrl(referrer).url
|
|
return True
|
|
|
|
return False
|
|
|
|
def is_ajax(self, environ):
|
|
value = environ.get('HTTP_X_REQUESTED_WITH')
|
|
value = value or environ.get('HTTP_X_PYWB_REQUESTED_WITH')
|
|
if value and value.lower() == 'xmlhttprequest':
|
|
return True
|
|
|
|
return False
|
|
|
|
def get_base_url(self, wb_url, kwargs):
|
|
type = kwargs.get('type')
|
|
return self.paths[type].format(**kwargs)
|
|
|
|
def get_upstream_url(self, wb_url, kwargs, params):
|
|
base_url = self.get_base_url(wb_url, kwargs)
|
|
param_str = urlencode(params, True)
|
|
if param_str:
|
|
q_char = '&' if '?' in base_url else '?'
|
|
base_url += q_char + param_str
|
|
return base_url
|
|
|
|
def get_cookie_key(self, kwargs):
|
|
raise NotImplemented()
|
|
|
|
def _add_custom_params(self, cdx, headers, kwargs, record):
|
|
pass
|
|
|
|
def get_top_frame_params(self, wb_url, kwargs):
|
|
return None
|
|
|
|
def handle_custom_response(self, environ, wb_url, full_prefix, host_prefix, kwargs):
|
|
if kwargs.get('output'):
|
|
return self.handle_timemap(wb_url, kwargs, full_prefix)
|
|
|
|
if wb_url.is_query():
|
|
return self.handle_query(environ, wb_url, kwargs, full_prefix)
|
|
|
|
if self.is_framed_replay(wb_url):
|
|
extra_params = self.get_top_frame_params(wb_url, kwargs)
|
|
return self.frame_insert_view.get_top_frame(wb_url,
|
|
full_prefix,
|
|
host_prefix,
|
|
environ,
|
|
self.frame_mod,
|
|
self.replay_mod,
|
|
coll='',
|
|
extra_params=extra_params)
|
|
|
|
return None
|