1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

more refactoring: seperate top-level handlers (WBHandler) from

views (html, text)
Add CDXHandler for interfacing with cdx server directly, #12
This commit is contained in:
Ilya Kreymer 2014-01-28 17:23:44 -08:00
parent 1a234f2953
commit c0f8edf517
4 changed files with 86 additions and 46 deletions

72
pywb/handlers.py Normal file
View File

@ -0,0 +1,72 @@
import views
import utils
import urlparse
#=================================================================
# Standard WB Handler
#=================================================================
class WBHandler:
def __init__(self, cdx_reader, replay, html_view = None):
self.cdx_reader = cdx_reader
self.replay = replay
self.html_view = html_view
self.text_view = views.TextQueryView()
def __call__(self, wbrequest):
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
cdx_lines = self.cdx_reader.load_for_request(wbrequest, parsed_cdx = True)
# new special modifier to always show cdx index
if wbrequest.wb_url.mod == 'cdx_':
return self.text_view(wbrequest, cdx_lines)
if (wbrequest.wb_url.type == wbrequest.wb_url.QUERY) or (wbrequest.wb_url.type == wbrequest.wb_url.URL_QUERY):
if not self.html_view:
return self.text_view(wbrequest, cdx_lines)
else:
return self.html_view(wbrequest, cdx_lines)
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t:
return self.replay(wbrequest, cdx_lines, self.cdx_reader)
#=================================================================
# CDX-Server Handler -- pass all params to cdx server
#=================================================================
class CDXHandler:
def __init__(self, cdx_reader, view = None):
self.cdx_reader = cdx_reader
self.view = view if view else views.TextQueryView()
def __call__(self, wbrequest):
url = wbrequest.wb_url.url
# use url= param to get actual url
params = urlparse.parse_qs(wbrequest.env['QUERY_STRING'])
url = params.get('url')
if not url:
raise Exception('Must specify a url= param to query cdx server')
url = url[0]
cdx_lines = self.cdx_reader.load_cdx(url, params, parsed_cdx = False)
return self.view(wbrequest, cdx_lines)
#=================================================================
# Debug Handlers
#=================================================================
class DebugEchoEnvHandler:
def __call__(self, wbrequest):
return wbrequestresponse.WbResponse.text_response(str(wbrequest.env))
#=================================================================
class DebugEchoHandler:
def __call__(self, wbrequest):
return wbrequestresponse.WbResponse.text_response(str(wbrequest))

View File

@ -1,7 +1,8 @@
import archiveloader import archiveloader
import views import views
import handlers
import indexreader import indexreader
import replay import replay_views
import replay_resolvers import replay_resolvers
import cdxserve import cdxserve
from archivalrouter import ArchivalRequestRouter, Route from archivalrouter import ArchivalRequestRouter, Route
@ -24,19 +25,23 @@ def pywb_config(head_insert = ''):
prefixes = [replay_resolvers.PrefixResolver(test_dir)] prefixes = [replay_resolvers.PrefixResolver(test_dir)]
# Create rewriting replay handler to rewrite records # Create rewriting replay handler to rewrite records
replayer = replay.RewritingReplayHandler(resolvers = prefixes, archiveloader = aloader, headInsert = head_insert, buffer_response = True) replayer = replay_views.RewritingReplayView(resolvers = prefixes, archiveloader = aloader, headInsert = head_insert, buffer_response = True)
# Create Jinja2 based html query view # Create Jinja2 based html query view
html_view = views.J2QueryView('./ui/', 'query.html') html_view = views.J2QueryView('./ui/', 'query.html')
# WB handler which uses the index reader, replayer, and html_view # WB handler which uses the index reader, replayer, and html_view
wb_handler = replay.WBHandler(indexs, replayer, html_view) wb_handler = handlers.WBHandler(indexs, replayer, html_view)
# cdx handler
cdx_handler = handlers.CDXHandler(indexs)
# Finally, create wb router # Finally, create wb router
return ArchivalRequestRouter( return ArchivalRequestRouter(
{ {
Route('echo_req', views.DebugEchoView()), # Debug ex: just echo parsed request Route('echo_req', handlers.DebugEchoHandler()), # Debug ex: just echo parsed request
Route('pywb', wb_handler), Route('pywb', wb_handler),
Route('cdx', cdx_handler),
}, },
# Specify hostnames that pywb will be running on # Specify hostnames that pywb will be running on
# This will help catch occasionally missed rewrites that fall-through to the host # This will help catch occasionally missed rewrites that fall-through to the host

View File

@ -5,7 +5,6 @@ import copy
import itertools import itertools
import archiveloader import archiveloader
import views
from wbrequestresponse import WbResponse, StatusAndHeaders from wbrequestresponse import WbResponse, StatusAndHeaders
from wbarchivalurl import ArchivalUrl from wbarchivalurl import ArchivalUrl
import utils import utils
@ -17,34 +16,9 @@ import regex_rewriters
import wbexceptions import wbexceptions
#=================================================================
class WBHandler:
def __init__(self, cdx_reader, replay, html_view = None):
self.cdx_reader = cdx_reader
self.replay = replay
self.html_view = html_view
self.text_view = views.TextQueryView()
def __call__(self, wbrequest):
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
cdx_lines = self.cdx_reader.load_for_request(wbrequest, parsed_cdx = True)
# new special modifier to always show cdx index
if wbrequest.wb_url.mod == 'cdx_':
return self.text_view(wbrequest, cdx_lines)
if (wbrequest.wb_url.type == wbrequest.wb_url.QUERY) or (wbrequest.wb_url.type == wbrequest.wb_url.URL_QUERY):
if not self.html_view:
return self.text_view(wbrequest, cdx_lines)
else:
return self.html_view(wbrequest, cdx_lines)
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t:
return self.replay(wbrequest, cdx_lines, self.cdx_reader)
#================================================================= #=================================================================
class ReplayHandler(object): class ReplayView:
def __init__(self, resolvers, archiveloader): def __init__(self, resolvers, archiveloader):
self.resolvers = resolvers self.resolvers = resolvers
self.loader = archiveloader self.loader = archiveloader
@ -238,10 +212,10 @@ class ReplayHandler(object):
#================================================================= #=================================================================
class RewritingReplayHandler(ReplayHandler): class RewritingReplayView(ReplayView):
def __init__(self, resolvers, archiveloader, headInsert = None, headerRewriter = None, redir_to_exact = True, buffer_response = False): def __init__(self, resolvers, archiveloader, headInsert = None, headerRewriter = None, redir_to_exact = True, buffer_response = False):
ReplayHandler.__init__(self, resolvers, archiveloader) ReplayView.__init__(self, resolvers, archiveloader)
self.headInsert = headInsert self.headInsert = headInsert
if not headerRewriter: if not headerRewriter:
headerRewriter = HeaderRewriter() headerRewriter = HeaderRewriter()
@ -264,7 +238,7 @@ class RewritingReplayHandler(ReplayHandler):
urlrewriter = ArchivalUrlRewriter(wbrequest.wb_url, wbrequest.wb_prefix) urlrewriter = ArchivalUrlRewriter(wbrequest.wb_url, wbrequest.wb_prefix)
wbrequest.urlrewriter = urlrewriter wbrequest.urlrewriter = urlrewriter
response = ReplayHandler.__call__(self, wbrequest, index, cdx_reader) response = ReplayView.__call__(self, wbrequest, index, cdx_reader)
if response and response.cdx: if response and response.cdx:
self._checkRedir(wbrequest, response.cdx) self._checkRedir(wbrequest, response.cdx)
@ -419,7 +393,7 @@ class RewritingReplayHandler(ReplayHandler):
def doReplay(self, cdx, wbrequest, index, failedFiles): def doReplay(self, cdx, wbrequest, index, failedFiles):
wbresponse = ReplayHandler.doReplay(self, cdx, wbrequest, index, failedFiles) wbresponse = ReplayView.doReplay(self, cdx, wbrequest, index, failedFiles)
# Check for self redirect # Check for self redirect
if wbresponse.status_headers.statusline.startswith('3'): if wbresponse.status_headers.statusline.startswith('3'):

View File

@ -36,14 +36,3 @@ class J2QueryView:
return wbrequestresponse.WbResponse.text_response(str(response), content_type = 'text/html') return wbrequestresponse.WbResponse.text_response(str(response), content_type = 'text/html')
#=================================================================
class DebugEchoView:
def __call__(self, wbrequest):
return wbrequestresponse.WbResponse.text_response(str(wbrequest.env))
#=================================================================
class DebugEchoView:
def __call__(self, wbrequest):
return wbrequestresponse.WbResponse.text_response(str(wbrequest))