1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

more refactoring: seperate top-level handlers (WBHandler) from

views (html, text)
Add CDXHandler for interfacing with cdx server directly, #12
This commit is contained in:
Ilya Kreymer 2014-01-28 17:23:44 -08:00
parent 1a234f2953
commit c0f8edf517
4 changed files with 86 additions and 46 deletions

72
pywb/handlers.py Normal file
View File

@ -0,0 +1,72 @@
import views
import utils
import urlparse
#=================================================================
# Standard WB Handler
#=================================================================
class WBHandler:
def __init__(self, cdx_reader, replay, html_view = None):
self.cdx_reader = cdx_reader
self.replay = replay
self.html_view = html_view
self.text_view = views.TextQueryView()
def __call__(self, wbrequest):
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
cdx_lines = self.cdx_reader.load_for_request(wbrequest, parsed_cdx = True)
# new special modifier to always show cdx index
if wbrequest.wb_url.mod == 'cdx_':
return self.text_view(wbrequest, cdx_lines)
if (wbrequest.wb_url.type == wbrequest.wb_url.QUERY) or (wbrequest.wb_url.type == wbrequest.wb_url.URL_QUERY):
if not self.html_view:
return self.text_view(wbrequest, cdx_lines)
else:
return self.html_view(wbrequest, cdx_lines)
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t:
return self.replay(wbrequest, cdx_lines, self.cdx_reader)
#=================================================================
# CDX-Server Handler -- pass all params to cdx server
#=================================================================
class CDXHandler:
def __init__(self, cdx_reader, view = None):
self.cdx_reader = cdx_reader
self.view = view if view else views.TextQueryView()
def __call__(self, wbrequest):
url = wbrequest.wb_url.url
# use url= param to get actual url
params = urlparse.parse_qs(wbrequest.env['QUERY_STRING'])
url = params.get('url')
if not url:
raise Exception('Must specify a url= param to query cdx server')
url = url[0]
cdx_lines = self.cdx_reader.load_cdx(url, params, parsed_cdx = False)
return self.view(wbrequest, cdx_lines)
#=================================================================
# Debug Handlers
#=================================================================
class DebugEchoEnvHandler:
def __call__(self, wbrequest):
return wbrequestresponse.WbResponse.text_response(str(wbrequest.env))
#=================================================================
class DebugEchoHandler:
def __call__(self, wbrequest):
return wbrequestresponse.WbResponse.text_response(str(wbrequest))

View File

@ -1,7 +1,8 @@
import archiveloader
import views
import handlers
import indexreader
import replay
import replay_views
import replay_resolvers
import cdxserve
from archivalrouter import ArchivalRequestRouter, Route
@ -24,19 +25,23 @@ def pywb_config(head_insert = ''):
prefixes = [replay_resolvers.PrefixResolver(test_dir)]
# Create rewriting replay handler to rewrite records
replayer = replay.RewritingReplayHandler(resolvers = prefixes, archiveloader = aloader, headInsert = head_insert, buffer_response = True)
replayer = replay_views.RewritingReplayView(resolvers = prefixes, archiveloader = aloader, headInsert = head_insert, buffer_response = True)
# Create Jinja2 based html query view
html_view = views.J2QueryView('./ui/', 'query.html')
# WB handler which uses the index reader, replayer, and html_view
wb_handler = replay.WBHandler(indexs, replayer, html_view)
wb_handler = handlers.WBHandler(indexs, replayer, html_view)
# cdx handler
cdx_handler = handlers.CDXHandler(indexs)
# Finally, create wb router
return ArchivalRequestRouter(
{
Route('echo_req', views.DebugEchoView()), # Debug ex: just echo parsed request
Route('echo_req', handlers.DebugEchoHandler()), # Debug ex: just echo parsed request
Route('pywb', wb_handler),
Route('cdx', cdx_handler),
},
# Specify hostnames that pywb will be running on
# This will help catch occasionally missed rewrites that fall-through to the host

View File

@ -5,7 +5,6 @@ import copy
import itertools
import archiveloader
import views
from wbrequestresponse import WbResponse, StatusAndHeaders
from wbarchivalurl import ArchivalUrl
import utils
@ -17,34 +16,9 @@ import regex_rewriters
import wbexceptions
#=================================================================
class WBHandler:
def __init__(self, cdx_reader, replay, html_view = None):
self.cdx_reader = cdx_reader
self.replay = replay
self.html_view = html_view
self.text_view = views.TextQueryView()
def __call__(self, wbrequest):
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
cdx_lines = self.cdx_reader.load_for_request(wbrequest, parsed_cdx = True)
# new special modifier to always show cdx index
if wbrequest.wb_url.mod == 'cdx_':
return self.text_view(wbrequest, cdx_lines)
if (wbrequest.wb_url.type == wbrequest.wb_url.QUERY) or (wbrequest.wb_url.type == wbrequest.wb_url.URL_QUERY):
if not self.html_view:
return self.text_view(wbrequest, cdx_lines)
else:
return self.html_view(wbrequest, cdx_lines)
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t:
return self.replay(wbrequest, cdx_lines, self.cdx_reader)
#=================================================================
class ReplayHandler(object):
class ReplayView:
def __init__(self, resolvers, archiveloader):
self.resolvers = resolvers
self.loader = archiveloader
@ -238,10 +212,10 @@ class ReplayHandler(object):
#=================================================================
class RewritingReplayHandler(ReplayHandler):
class RewritingReplayView(ReplayView):
def __init__(self, resolvers, archiveloader, headInsert = None, headerRewriter = None, redir_to_exact = True, buffer_response = False):
ReplayHandler.__init__(self, resolvers, archiveloader)
ReplayView.__init__(self, resolvers, archiveloader)
self.headInsert = headInsert
if not headerRewriter:
headerRewriter = HeaderRewriter()
@ -264,7 +238,7 @@ class RewritingReplayHandler(ReplayHandler):
urlrewriter = ArchivalUrlRewriter(wbrequest.wb_url, wbrequest.wb_prefix)
wbrequest.urlrewriter = urlrewriter
response = ReplayHandler.__call__(self, wbrequest, index, cdx_reader)
response = ReplayView.__call__(self, wbrequest, index, cdx_reader)
if response and response.cdx:
self._checkRedir(wbrequest, response.cdx)
@ -419,7 +393,7 @@ class RewritingReplayHandler(ReplayHandler):
def doReplay(self, cdx, wbrequest, index, failedFiles):
wbresponse = ReplayHandler.doReplay(self, cdx, wbrequest, index, failedFiles)
wbresponse = ReplayView.doReplay(self, cdx, wbrequest, index, failedFiles)
# Check for self redirect
if wbresponse.status_headers.statusline.startswith('3'):

View File

@ -36,14 +36,3 @@ class J2QueryView:
return wbrequestresponse.WbResponse.text_response(str(response), content_type = 'text/html')
#=================================================================
class DebugEchoView:
def __call__(self, wbrequest):
return wbrequestresponse.WbResponse.text_response(str(wbrequest.env))
#=================================================================
class DebugEchoView:
def __call__(self, wbrequest):
return wbrequestresponse.WbResponse.text_response(str(wbrequest))