From c0f8edf517fa02747c53fb41e2ae5e6c04baa6c1 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Tue, 28 Jan 2014 17:23:44 -0800 Subject: [PATCH] more refactoring: seperate top-level handlers (WBHandler) from views (html, text) Add CDXHandler for interfacing with cdx server directly, #12 --- pywb/handlers.py | 72 +++++++++++++++++++++++++++++ pywb/pywb_init.py | 13 ++++-- pywb/{replay.py => replay_views.py} | 36 ++------------- pywb/views.py | 11 ----- 4 files changed, 86 insertions(+), 46 deletions(-) create mode 100644 pywb/handlers.py rename pywb/{replay.py => replay_views.py} (91%) diff --git a/pywb/handlers.py b/pywb/handlers.py new file mode 100644 index 00000000..2e2e77fe --- /dev/null +++ b/pywb/handlers.py @@ -0,0 +1,72 @@ +import views +import utils +import urlparse + +#================================================================= +# Standard WB Handler +#================================================================= +class WBHandler: + def __init__(self, cdx_reader, replay, html_view = None): + self.cdx_reader = cdx_reader + self.replay = replay + self.html_view = html_view + self.text_view = views.TextQueryView() + + def __call__(self, wbrequest): + with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t: + cdx_lines = self.cdx_reader.load_for_request(wbrequest, parsed_cdx = True) + + # new special modifier to always show cdx index + if wbrequest.wb_url.mod == 'cdx_': + return self.text_view(wbrequest, cdx_lines) + + if (wbrequest.wb_url.type == wbrequest.wb_url.QUERY) or (wbrequest.wb_url.type == wbrequest.wb_url.URL_QUERY): + if not self.html_view: + return self.text_view(wbrequest, cdx_lines) + else: + return self.html_view(wbrequest, cdx_lines) + + with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t: + return self.replay(wbrequest, cdx_lines, self.cdx_reader) + + + +#================================================================= +# CDX-Server Handler -- pass all params to cdx server +#================================================================= +class CDXHandler: + def __init__(self, cdx_reader, view = None): + self.cdx_reader = cdx_reader + self.view = view if view else views.TextQueryView() + + def __call__(self, wbrequest): + url = wbrequest.wb_url.url + + # use url= param to get actual url + params = urlparse.parse_qs(wbrequest.env['QUERY_STRING']) + + url = params.get('url') + if not url: + raise Exception('Must specify a url= param to query cdx server') + + url = url[0] + + cdx_lines = self.cdx_reader.load_cdx(url, params, parsed_cdx = False) + + return self.view(wbrequest, cdx_lines) + + +#================================================================= +# Debug Handlers +#================================================================= +class DebugEchoEnvHandler: + def __call__(self, wbrequest): + return wbrequestresponse.WbResponse.text_response(str(wbrequest.env)) + +#================================================================= +class DebugEchoHandler: + def __call__(self, wbrequest): + return wbrequestresponse.WbResponse.text_response(str(wbrequest)) + + + diff --git a/pywb/pywb_init.py b/pywb/pywb_init.py index 7570f90e..c7eff2b6 100644 --- a/pywb/pywb_init.py +++ b/pywb/pywb_init.py @@ -1,7 +1,8 @@ import archiveloader import views +import handlers import indexreader -import replay +import replay_views import replay_resolvers import cdxserve from archivalrouter import ArchivalRequestRouter, Route @@ -24,19 +25,23 @@ def pywb_config(head_insert = ''): prefixes = [replay_resolvers.PrefixResolver(test_dir)] # Create rewriting replay handler to rewrite records - replayer = replay.RewritingReplayHandler(resolvers = prefixes, archiveloader = aloader, headInsert = head_insert, buffer_response = True) + replayer = replay_views.RewritingReplayView(resolvers = prefixes, archiveloader = aloader, headInsert = head_insert, buffer_response = True) # Create Jinja2 based html query view html_view = views.J2QueryView('./ui/', 'query.html') # WB handler which uses the index reader, replayer, and html_view - wb_handler = replay.WBHandler(indexs, replayer, html_view) + wb_handler = handlers.WBHandler(indexs, replayer, html_view) + + # cdx handler + cdx_handler = handlers.CDXHandler(indexs) # Finally, create wb router return ArchivalRequestRouter( { - Route('echo_req', views.DebugEchoView()), # Debug ex: just echo parsed request + Route('echo_req', handlers.DebugEchoHandler()), # Debug ex: just echo parsed request Route('pywb', wb_handler), + Route('cdx', cdx_handler), }, # Specify hostnames that pywb will be running on # This will help catch occasionally missed rewrites that fall-through to the host diff --git a/pywb/replay.py b/pywb/replay_views.py similarity index 91% rename from pywb/replay.py rename to pywb/replay_views.py index e3675209..52fadf55 100644 --- a/pywb/replay.py +++ b/pywb/replay_views.py @@ -5,7 +5,6 @@ import copy import itertools import archiveloader -import views from wbrequestresponse import WbResponse, StatusAndHeaders from wbarchivalurl import ArchivalUrl import utils @@ -17,34 +16,9 @@ import regex_rewriters import wbexceptions -#================================================================= -class WBHandler: - def __init__(self, cdx_reader, replay, html_view = None): - self.cdx_reader = cdx_reader - self.replay = replay - self.html_view = html_view - self.text_view = views.TextQueryView() - - def __call__(self, wbrequest): - with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t: - cdx_lines = self.cdx_reader.load_for_request(wbrequest, parsed_cdx = True) - - # new special modifier to always show cdx index - if wbrequest.wb_url.mod == 'cdx_': - return self.text_view(wbrequest, cdx_lines) - - if (wbrequest.wb_url.type == wbrequest.wb_url.QUERY) or (wbrequest.wb_url.type == wbrequest.wb_url.URL_QUERY): - if not self.html_view: - return self.text_view(wbrequest, cdx_lines) - else: - return self.html_view(wbrequest, cdx_lines) - - with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t: - return self.replay(wbrequest, cdx_lines, self.cdx_reader) - #================================================================= -class ReplayHandler(object): +class ReplayView: def __init__(self, resolvers, archiveloader): self.resolvers = resolvers self.loader = archiveloader @@ -238,10 +212,10 @@ class ReplayHandler(object): #================================================================= -class RewritingReplayHandler(ReplayHandler): +class RewritingReplayView(ReplayView): def __init__(self, resolvers, archiveloader, headInsert = None, headerRewriter = None, redir_to_exact = True, buffer_response = False): - ReplayHandler.__init__(self, resolvers, archiveloader) + ReplayView.__init__(self, resolvers, archiveloader) self.headInsert = headInsert if not headerRewriter: headerRewriter = HeaderRewriter() @@ -264,7 +238,7 @@ class RewritingReplayHandler(ReplayHandler): urlrewriter = ArchivalUrlRewriter(wbrequest.wb_url, wbrequest.wb_prefix) wbrequest.urlrewriter = urlrewriter - response = ReplayHandler.__call__(self, wbrequest, index, cdx_reader) + response = ReplayView.__call__(self, wbrequest, index, cdx_reader) if response and response.cdx: self._checkRedir(wbrequest, response.cdx) @@ -419,7 +393,7 @@ class RewritingReplayHandler(ReplayHandler): def doReplay(self, cdx, wbrequest, index, failedFiles): - wbresponse = ReplayHandler.doReplay(self, cdx, wbrequest, index, failedFiles) + wbresponse = ReplayView.doReplay(self, cdx, wbrequest, index, failedFiles) # Check for self redirect if wbresponse.status_headers.statusline.startswith('3'): diff --git a/pywb/views.py b/pywb/views.py index 7e572ccb..c9ea5b9e 100644 --- a/pywb/views.py +++ b/pywb/views.py @@ -36,14 +36,3 @@ class J2QueryView: return wbrequestresponse.WbResponse.text_response(str(response), content_type = 'text/html') -#================================================================= -class DebugEchoView: - def __call__(self, wbrequest): - return wbrequestresponse.WbResponse.text_response(str(wbrequest.env)) - -#================================================================= -class DebugEchoView: - def __call__(self, wbrequest): - return wbrequestresponse.WbResponse.text_response(str(wbrequest)) - -