1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

refactoring for better extensibility:

remove BaseContentView, move top-frame functionality to SearchPageWbUrlHandler
remove RewriteLiveView, fold functionality into the handler
move default mod setting into RewriteContent
This commit is contained in:
Ilya Kreymer 2014-08-04 01:18:46 -07:00
parent 160182ec48
commit 8d54153326
8 changed files with 131 additions and 138 deletions

View File

@ -18,11 +18,15 @@ from pywb.utils.bufferedreaders import ChunkedDataReader
#================================================================= #=================================================================
class RewriteContent: class RewriteContent:
def __init__(self, ds_rules_file=None, defmod=''): def __init__(self, ds_rules_file=None, is_framed_replay=False):
self.ruleset = RuleSet(RewriteRules, 'rewrite', self.ruleset = RuleSet(RewriteRules, 'rewrite',
default_rule_config={}, default_rule_config={},
ds_rules_file=ds_rules_file) ds_rules_file=ds_rules_file)
self.defmod = defmod
if is_framed_replay:
self.defmod = 'mp_'
else:
self.defmod = ''
def sanitize_content(self, status_headers, stream): def sanitize_content(self, status_headers, stream):
# remove transfer encoding chunked and wrap in a dechunking stream # remove transfer encoding chunked and wrap in a dechunking stream

View File

@ -21,8 +21,8 @@ from rewrite_content import RewriteContent
#================================================================= #=================================================================
class LiveRewriter(object): class LiveRewriter(object):
def __init__(self, defmod='', default_proxy=None): def __init__(self, is_framed_replay=False, default_proxy=None):
self.rewriter = RewriteContent(defmod=defmod) self.rewriter = RewriteContent(is_framed_replay=is_framed_replay)
self.default_proxy = default_proxy self.default_proxy = default_proxy
if self.default_proxy: if self.default_proxy:
logging.debug('Live Rewrite via proxy ' + self.default_proxy) logging.debug('Live Rewrite via proxy ' + self.default_proxy)
@ -73,7 +73,7 @@ class LiveRewriter(object):
def fetch_http(self, url, def fetch_http(self, url,
env=None, env=None,
req_headers={}, req_headers=None,
follow_redirects=False, follow_redirects=False,
proxies=None): proxies=None):
@ -84,6 +84,9 @@ class LiveRewriter(object):
proxies = {'http': self.default_proxy, proxies = {'http': self.default_proxy,
'https': self.default_proxy} 'https': self.default_proxy}
if not req_headers:
req_headers = {}
if env is not None: if env is not None:
method = env['REQUEST_METHOD'].upper() method = env['REQUEST_METHOD'].upper()
input_ = env['wsgi.input'] input_ = env['wsgi.input']

View File

@ -2,6 +2,8 @@ import pkgutil
import mimetypes import mimetypes
import time import time
from datetime import datetime
from pywb.utils.wbexception import NotFoundException from pywb.utils.wbexception import NotFoundException
from pywb.utils.loaders import BlockLoader from pywb.utils.loaders import BlockLoader
@ -11,8 +13,9 @@ from pywb.framework.wbrequestresponse import WbResponse
from pywb.warc.recordloader import ArcWarcRecordLoader from pywb.warc.recordloader import ArcWarcRecordLoader
from pywb.warc.resolvingloader import ResolvingLoader from pywb.warc.resolvingloader import ResolvingLoader
from views import J2TemplateView, add_env_globals from views import J2TemplateView
from replay_views import ReplayView from replay_views import ReplayView
from pywb.utils.timeutils import datetime_to_timestamp
#================================================================= #=================================================================
@ -26,6 +29,15 @@ class SearchPageWbUrlHandler(WbUrlHandler):
create_template(config.get('search_html'), create_template(config.get('search_html'),
'Search Page')) 'Search Page'))
self.is_frame_mode = config.get('framed_replay', False)
if self.is_frame_mode:
html = config.get('frame_insert_html', 'ui/frame_insert.html')
self.frame_insert_view = (J2TemplateView.
create_template(html, 'Frame Insert'))
else:
self.frame_insert_view = None
def render_search_page(self, wbrequest, **kwargs): def render_search_page(self, wbrequest, **kwargs):
if self.search_view: if self.search_view:
return self.search_view.render_response(wbrequest=wbrequest, return self.search_view.render_response(wbrequest=wbrequest,
@ -34,6 +46,38 @@ class SearchPageWbUrlHandler(WbUrlHandler):
else: else:
return WbResponse.text_response('No Lookup Url Specified') return WbResponse.text_response('No Lookup Url Specified')
def __call__(self, wbrequest):
# root search page
if wbrequest.wb_url_str == '/':
return self.render_search_page(wbrequest)
# render top level frame if in frame mode
# (not supported in proxy mode)
if (self.is_frame_mode and wbrequest.wb_url and
not wbrequest.wb_url.is_query() and
not wbrequest.wb_url.mod and
not wbrequest.options['is_proxy']):
params = self.get_top_frame_params(wbrequest)
return self.frame_insert_view.render_response(**params)
return self.handle_request(wbrequest)
def get_top_frame_params(self, wbrequest):
if wbrequest.wb_url.timestamp:
timestamp = wbrequest.wb_url.timestamp
else:
timestamp = datetime_to_timestamp(datetime.utcnow())
embed_url = wbrequest.wb_url.to_str(mod='mp_')
return dict(embed_url=embed_url,
wbrequest=wbrequest,
timestamp=timestamp,
url=wbrequest.wb_url.url,
content_type='text/html')
#================================================================= #=================================================================
# Standard WB Handler # Standard WB Handler
@ -52,10 +96,6 @@ class WBHandler(SearchPageWbUrlHandler):
resolving_loader = ResolvingLoader(paths=paths, resolving_loader = ResolvingLoader(paths=paths,
record_loader=record_loader) record_loader=record_loader)
template_globals = config.get('template_globals')
if template_globals:
add_env_globals(template_globals)
self.replay = ReplayView(resolving_loader, config) self.replay = ReplayView(resolving_loader, config)
self.fallback_handler = None self.fallback_handler = None
@ -65,13 +105,9 @@ class WBHandler(SearchPageWbUrlHandler):
if self.fallback_name: if self.fallback_name:
self.fallback_handler = handler_dict.get(self.fallback_name) self.fallback_handler = handler_dict.get(self.fallback_name)
def __call__(self, wbrequest): def handle_request(self, wbrequest):
if wbrequest.wb_url_str == '/':
return self.render_search_page(wbrequest)
try: try:
with PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t: response = self.handle_query(wbrequest)
response = self.index_reader.load_for_request(wbrequest)
except NotFoundException as nfe: except NotFoundException as nfe:
return self.handle_not_found(wbrequest, nfe) return self.handle_not_found(wbrequest, nfe)
@ -81,11 +117,13 @@ class WBHandler(SearchPageWbUrlHandler):
cdx_lines, cdx_callback = response cdx_lines, cdx_callback = response
return self.handle_replay(wbrequest, cdx_lines, cdx_callback) return self.handle_replay(wbrequest, cdx_lines, cdx_callback)
def handle_query(self, wbrequest):
return self.index_reader.load_for_request(wbrequest)
def handle_replay(self, wbrequest, cdx_lines, cdx_callback): def handle_replay(self, wbrequest, cdx_lines, cdx_callback):
with PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t: return self.replay.render_content(wbrequest,
return self.replay(wbrequest, cdx_lines,
cdx_lines, cdx_callback)
cdx_callback)
def handle_not_found(self, wbrequest, nfe): def handle_not_found(self, wbrequest, nfe):
if (not self.fallback_handler or if (not self.fallback_handler or
@ -154,19 +192,3 @@ class DebugEchoEnvHandler(BaseHandler): # pragma: no cover
class DebugEchoHandler(BaseHandler): # pragma: no cover class DebugEchoHandler(BaseHandler): # pragma: no cover
def __call__(self, wbrequest): def __call__(self, wbrequest):
return WbResponse.text_response(str(wbrequest)) return WbResponse.text_response(str(wbrequest))
#=================================================================
class PerfTimer:
def __init__(self, perfdict, name):
self.perfdict = perfdict
self.name = name
def __enter__(self):
self.start = time.clock()
return self
def __exit__(self, *args):
self.end = time.clock()
if self.perfdict is not None:
self.perfdict[self.name] = str(self.end - self.start)

View File

@ -2,9 +2,11 @@ from pywb.framework.basehandlers import WbUrlHandler
from pywb.framework.wbrequestresponse import WbResponse from pywb.framework.wbrequestresponse import WbResponse
from pywb.framework.archivalrouter import ArchivalRouter, Route from pywb.framework.archivalrouter import ArchivalRouter, Route
from handlers import StaticHandler, SearchPageWbUrlHandler from pywb.rewrite.rewrite_live import LiveRewriter
from pywb.rewrite.wburl import WbUrl
from replay_views import RewriteLiveView from handlers import StaticHandler, SearchPageWbUrlHandler
from views import HeadInsertView
from pywb.utils.wbexception import WbException from pywb.utils.wbexception import WbException
@ -19,20 +21,44 @@ class LiveResourceException(WbException):
class RewriteHandler(SearchPageWbUrlHandler): class RewriteHandler(SearchPageWbUrlHandler):
def __init__(self, config): def __init__(self, config):
super(RewriteHandler, self).__init__(config) super(RewriteHandler, self).__init__(config)
self.rewrite_view = RewriteLiveView(config)
def __call__(self, wbrequest): default_proxy = config.get('proxyhostport')
if wbrequest.wb_url_str == '/': self.rewriter = LiveRewriter(is_framed_replay=self.is_frame_mode,
return self.render_search_page(wbrequest) default_proxy=default_proxy)
self.head_insert_view = HeadInsertView.init_from_config(config)
def handle_request(self, wbrequest):
try: try:
return self.rewrite_view(wbrequest) return self.render_content(wbrequest)
except Exception as exc: except Exception as exc:
url = wbrequest.wb_url.url url = wbrequest.wb_url.url
msg = 'Could not load the url from the live web: ' + url msg = 'Could not load the url from the live web: ' + url
raise LiveResourceException(msg=msg, url=url) raise LiveResourceException(msg=msg, url=url)
def _live_request_headers(self, wbrequest):
return {}
def render_content(self, wbrequest):
head_insert_func = self.head_insert_view.create_insert_func(wbrequest)
req_headers = self._live_request_headers(wbrequest)
ref_wburl_str = wbrequest.extract_referrer_wburl_str()
if ref_wburl_str:
wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url
wb_url = wbrequest.wb_url
result = self.rewriter.fetch_request(wb_url, wbrequest.urlrewriter,
head_insert_func=head_insert_func,
req_headers=req_headers,
env=wbrequest.env)
return self._make_response(wbrequest, *result)
def _make_response(self, wbrequest, status_headers, gen, is_rewritten):
return WbResponse(status_headers, gen)
def __str__(self): def __str__(self):
return 'Live Web Rewrite Handler' return 'Live Web Rewrite Handler'

View File

@ -6,7 +6,7 @@ from pywb.framework.wbrequestresponse import WbRequest
from pywb.framework.memento import MementoRequest from pywb.framework.memento import MementoRequest
from pywb.framework.basehandlers import BaseHandler from pywb.framework.basehandlers import BaseHandler
from views import J2TemplateView from views import J2TemplateView, add_env_globals
from views import J2HtmlCapturesView, HeadInsertView from views import J2HtmlCapturesView, HeadInsertView
from live_rewrite_handler import RewriteHandler from live_rewrite_handler import RewriteHandler
@ -71,7 +71,10 @@ def create_wb_handler(query_handler, config):
#================================================================= #=================================================================
def create_live_handler(config): def create_live_handler(config):
live_handler = RewriteHandler(config) wb_handler_class = config.get('wb_handler_class', RewriteHandler)
live_handler = wb_handler_class(config)
return live_handler return live_handler
@ -92,9 +95,12 @@ def init_collection(route_config):
create_template(route_config.get('query_html'), create_template(route_config.get('query_html'),
'Captures Page')) 'Captures Page'))
server_cls = route_config.get('server_cls')
query_handler = QueryHandler.init_from_config(route_config, query_handler = QueryHandler.init_from_config(route_config,
ds_rules_file, ds_rules_file,
html_view) html_view,
server_cls)
return query_handler return query_handler
@ -162,6 +168,11 @@ def create_wb_router(passed_config={}):
# store live and replay handlers # store live and replay handlers
handler_dict = {} handler_dict = {}
# setup template globals
template_globals = config.get('template_globals')
if template_globals:
add_env_globals(template_globals)
for name, value in collections.iteritems(): for name, value in collections.iteritems():
if isinstance(value, BaseHandler): if isinstance(value, BaseHandler):
handler_dict[name] = value handler_dict[name] = value

View File

@ -1,19 +1,14 @@
import re import re
import datetime
from io import BytesIO from io import BytesIO
from pywb.utils.statusandheaders import StatusAndHeaders from pywb.utils.statusandheaders import StatusAndHeaders
from pywb.utils.wbexception import WbException, NotFoundException from pywb.utils.wbexception import WbException, NotFoundException
from pywb.utils.loaders import LimitReader from pywb.utils.loaders import LimitReader
from pywb.utils.timeutils import datetime_to_timestamp
from pywb.framework.wbrequestresponse import WbResponse from pywb.framework.wbrequestresponse import WbResponse
from pywb.framework.memento import MementoResponse from pywb.framework.memento import MementoResponse
from pywb.rewrite.rewrite_content import RewriteContent from pywb.rewrite.rewrite_content import RewriteContent
from pywb.rewrite.rewrite_live import LiveRewriter
from pywb.rewrite.wburl import WbUrl
from pywb.warc.recordloader import ArchiveLoadFailed from pywb.warc.recordloader import ArchiveLoadFailed
from views import J2TemplateView, add_env_globals from views import J2TemplateView, add_env_globals
@ -32,92 +27,16 @@ class CaptureException(WbException):
#================================================================= #=================================================================
class BaseContentView(object): class ReplayView(object):
def __init__(self, config):
self.is_frame_mode = config.get('framed_replay', False)
if self.is_frame_mode:
self._mp_mod = 'mp_'
else:
self._mp_mod = ''
view = config.get('head_insert_view')
if not view:
head_insert = config.get('head_insert_html',
'ui/head_insert.html')
view = HeadInsertView.create_template(head_insert, 'Head Insert')
self.head_insert_view = view
if not self.is_frame_mode:
self.frame_insert_view = None
return
view = config.get('frame_insert_view')
if not view:
frame_insert = config.get('frame_insert_html',
'ui/frame_insert.html')
view = J2TemplateView.create_template(frame_insert, 'Frame Insert')
self.frame_insert_view = view
def __call__(self, wbrequest, *args):
# render top level frame if in frame mode
# (not supported in proxy mode)
if (self.is_frame_mode and wbrequest.wb_url and
not wbrequest.wb_url.mod and
not wbrequest.options['is_proxy']):
embed_url = wbrequest.wb_url.to_str(mod=self._mp_mod)
timestamp = datetime_to_timestamp(datetime.datetime.utcnow())
url = wbrequest.wb_url.url
ctype = 'text/html'
return self.frame_insert_view.render_response(embed_url=embed_url,
wbrequest=wbrequest,
timestamp=timestamp,
url=url,
content_type=ctype)
return self.render_content(wbrequest, *args)
#=================================================================
class RewriteLiveView(BaseContentView):
def __init__(self, config):
super(RewriteLiveView, self).__init__(config)
default_proxy = config.get('proxyhostport')
self.rewriter = LiveRewriter(defmod=self._mp_mod,
default_proxy=default_proxy)
def render_content(self, wbrequest, *args):
head_insert_func = self.head_insert_view.create_insert_func(wbrequest)
ref_wburl_str = wbrequest.extract_referrer_wburl_str()
if ref_wburl_str:
wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url
wb_url = wbrequest.wb_url
result = self.rewriter.fetch_request(wb_url, wbrequest.urlrewriter,
head_insert_func=head_insert_func,
env=wbrequest.env)
status_headers, gen, is_rewritten = result
return WbResponse(status_headers, gen)
#=================================================================
class ReplayView(BaseContentView):
STRIP_SCHEME = re.compile('^([\w]+:[/]*)?(.*?)$') STRIP_SCHEME = re.compile('^([\w]+:[/]*)?(.*?)$')
def __init__(self, content_loader, config): def __init__(self, content_loader, config):
super(ReplayView, self).__init__(config)
self.content_loader = content_loader self.content_loader = content_loader
self.content_rewriter = RewriteContent(defmod=self._mp_mod)
framed = config.get('framed_replay')
self.content_rewriter = RewriteContent(is_framed_replay=framed)
self.head_insert_view = HeadInsertView.init_from_config(config)
self.buffer_response = config.get('buffer_response', True) self.buffer_response = config.get('buffer_response', True)
@ -131,12 +50,12 @@ class ReplayView(BaseContentView):
self._reporter = config.get('reporter') self._reporter = config.get('reporter')
def render_content(self, wbrequest, *args): def render_content(self, wbrequest, cdx_lines, cdx_loader):
last_e = None last_e = None
first = True first = True
cdx_lines = args[0] #cdx_lines = args[0]
cdx_loader = args[1] #cdx_loader = args[1]
# List of already failed w/arcs # List of already failed w/arcs
failed_files = [] failed_files = []

View File

@ -141,6 +141,14 @@ class HeadInsertView(J2TemplateView):
return J2TemplateView.create_template(filename, desc, return J2TemplateView.create_template(filename, desc,
HeadInsertView) HeadInsertView)
@staticmethod
def init_from_config(config):
view = config.get('head_insert_view')
if not view:
html = config.get('head_insert_html', 'ui/head_insert.html')
view = HeadInsertView.create_template(html, 'Head Insert')
return view
#================================================================= #=================================================================
# query views # query views

View File

@ -310,7 +310,7 @@ class TestWb:
def test_excluded_content(self): def test_excluded_content(self):
resp = self.testapp.get('/pywb/http://www.iana.org/_img/bookmark_icon.ico', status = 403) resp = self.testapp.get('/pywb/mp_/http://www.iana.org/_img/bookmark_icon.ico', status = 403)
assert resp.status_int == 403 assert resp.status_int == 403
assert 'Excluded' in resp.body assert 'Excluded' in resp.body
@ -414,7 +414,7 @@ class TestWb:
def test_error(self): def test_error(self):
resp = self.testapp.get('/pywb/?abc', status = 400) resp = self.testapp.get('/pywb/mp_/?abc', status = 400)
assert resp.status_int == 400 assert resp.status_int == 400
assert 'Invalid Url: http://?abc' in resp.body assert 'Invalid Url: http://?abc' in resp.body