mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
more refactoring: seperate top-level handlers (WBHandler) from
views (html, text) Add CDXHandler for interfacing with cdx server directly, #12
This commit is contained in:
parent
1a234f2953
commit
c0f8edf517
72
pywb/handlers.py
Normal file
72
pywb/handlers.py
Normal file
@ -0,0 +1,72 @@
|
||||
import views
|
||||
import utils
|
||||
import urlparse
|
||||
|
||||
#=================================================================
|
||||
# Standard WB Handler
|
||||
#=================================================================
|
||||
class WBHandler:
|
||||
def __init__(self, cdx_reader, replay, html_view = None):
|
||||
self.cdx_reader = cdx_reader
|
||||
self.replay = replay
|
||||
self.html_view = html_view
|
||||
self.text_view = views.TextQueryView()
|
||||
|
||||
def __call__(self, wbrequest):
|
||||
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
|
||||
cdx_lines = self.cdx_reader.load_for_request(wbrequest, parsed_cdx = True)
|
||||
|
||||
# new special modifier to always show cdx index
|
||||
if wbrequest.wb_url.mod == 'cdx_':
|
||||
return self.text_view(wbrequest, cdx_lines)
|
||||
|
||||
if (wbrequest.wb_url.type == wbrequest.wb_url.QUERY) or (wbrequest.wb_url.type == wbrequest.wb_url.URL_QUERY):
|
||||
if not self.html_view:
|
||||
return self.text_view(wbrequest, cdx_lines)
|
||||
else:
|
||||
return self.html_view(wbrequest, cdx_lines)
|
||||
|
||||
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t:
|
||||
return self.replay(wbrequest, cdx_lines, self.cdx_reader)
|
||||
|
||||
|
||||
|
||||
#=================================================================
|
||||
# CDX-Server Handler -- pass all params to cdx server
|
||||
#=================================================================
|
||||
class CDXHandler:
|
||||
def __init__(self, cdx_reader, view = None):
|
||||
self.cdx_reader = cdx_reader
|
||||
self.view = view if view else views.TextQueryView()
|
||||
|
||||
def __call__(self, wbrequest):
|
||||
url = wbrequest.wb_url.url
|
||||
|
||||
# use url= param to get actual url
|
||||
params = urlparse.parse_qs(wbrequest.env['QUERY_STRING'])
|
||||
|
||||
url = params.get('url')
|
||||
if not url:
|
||||
raise Exception('Must specify a url= param to query cdx server')
|
||||
|
||||
url = url[0]
|
||||
|
||||
cdx_lines = self.cdx_reader.load_cdx(url, params, parsed_cdx = False)
|
||||
|
||||
return self.view(wbrequest, cdx_lines)
|
||||
|
||||
|
||||
#=================================================================
|
||||
# Debug Handlers
|
||||
#=================================================================
|
||||
class DebugEchoEnvHandler:
|
||||
def __call__(self, wbrequest):
|
||||
return wbrequestresponse.WbResponse.text_response(str(wbrequest.env))
|
||||
|
||||
#=================================================================
|
||||
class DebugEchoHandler:
|
||||
def __call__(self, wbrequest):
|
||||
return wbrequestresponse.WbResponse.text_response(str(wbrequest))
|
||||
|
||||
|
||||
|
@ -1,7 +1,8 @@
|
||||
import archiveloader
|
||||
import views
|
||||
import handlers
|
||||
import indexreader
|
||||
import replay
|
||||
import replay_views
|
||||
import replay_resolvers
|
||||
import cdxserve
|
||||
from archivalrouter import ArchivalRequestRouter, Route
|
||||
@ -24,19 +25,23 @@ def pywb_config(head_insert = ''):
|
||||
prefixes = [replay_resolvers.PrefixResolver(test_dir)]
|
||||
|
||||
# Create rewriting replay handler to rewrite records
|
||||
replayer = replay.RewritingReplayHandler(resolvers = prefixes, archiveloader = aloader, headInsert = head_insert, buffer_response = True)
|
||||
replayer = replay_views.RewritingReplayView(resolvers = prefixes, archiveloader = aloader, headInsert = head_insert, buffer_response = True)
|
||||
|
||||
# Create Jinja2 based html query view
|
||||
html_view = views.J2QueryView('./ui/', 'query.html')
|
||||
|
||||
# WB handler which uses the index reader, replayer, and html_view
|
||||
wb_handler = replay.WBHandler(indexs, replayer, html_view)
|
||||
wb_handler = handlers.WBHandler(indexs, replayer, html_view)
|
||||
|
||||
# cdx handler
|
||||
cdx_handler = handlers.CDXHandler(indexs)
|
||||
|
||||
# Finally, create wb router
|
||||
return ArchivalRequestRouter(
|
||||
{
|
||||
Route('echo_req', views.DebugEchoView()), # Debug ex: just echo parsed request
|
||||
Route('echo_req', handlers.DebugEchoHandler()), # Debug ex: just echo parsed request
|
||||
Route('pywb', wb_handler),
|
||||
Route('cdx', cdx_handler),
|
||||
},
|
||||
# Specify hostnames that pywb will be running on
|
||||
# This will help catch occasionally missed rewrites that fall-through to the host
|
||||
|
@ -5,7 +5,6 @@ import copy
|
||||
import itertools
|
||||
|
||||
import archiveloader
|
||||
import views
|
||||
from wbrequestresponse import WbResponse, StatusAndHeaders
|
||||
from wbarchivalurl import ArchivalUrl
|
||||
import utils
|
||||
@ -17,34 +16,9 @@ import regex_rewriters
|
||||
|
||||
import wbexceptions
|
||||
|
||||
#=================================================================
|
||||
class WBHandler:
|
||||
def __init__(self, cdx_reader, replay, html_view = None):
|
||||
self.cdx_reader = cdx_reader
|
||||
self.replay = replay
|
||||
self.html_view = html_view
|
||||
self.text_view = views.TextQueryView()
|
||||
|
||||
def __call__(self, wbrequest):
|
||||
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
|
||||
cdx_lines = self.cdx_reader.load_for_request(wbrequest, parsed_cdx = True)
|
||||
|
||||
# new special modifier to always show cdx index
|
||||
if wbrequest.wb_url.mod == 'cdx_':
|
||||
return self.text_view(wbrequest, cdx_lines)
|
||||
|
||||
if (wbrequest.wb_url.type == wbrequest.wb_url.QUERY) or (wbrequest.wb_url.type == wbrequest.wb_url.URL_QUERY):
|
||||
if not self.html_view:
|
||||
return self.text_view(wbrequest, cdx_lines)
|
||||
else:
|
||||
return self.html_view(wbrequest, cdx_lines)
|
||||
|
||||
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t:
|
||||
return self.replay(wbrequest, cdx_lines, self.cdx_reader)
|
||||
|
||||
|
||||
#=================================================================
|
||||
class ReplayHandler(object):
|
||||
class ReplayView:
|
||||
def __init__(self, resolvers, archiveloader):
|
||||
self.resolvers = resolvers
|
||||
self.loader = archiveloader
|
||||
@ -238,10 +212,10 @@ class ReplayHandler(object):
|
||||
|
||||
|
||||
#=================================================================
|
||||
class RewritingReplayHandler(ReplayHandler):
|
||||
class RewritingReplayView(ReplayView):
|
||||
|
||||
def __init__(self, resolvers, archiveloader, headInsert = None, headerRewriter = None, redir_to_exact = True, buffer_response = False):
|
||||
ReplayHandler.__init__(self, resolvers, archiveloader)
|
||||
ReplayView.__init__(self, resolvers, archiveloader)
|
||||
self.headInsert = headInsert
|
||||
if not headerRewriter:
|
||||
headerRewriter = HeaderRewriter()
|
||||
@ -264,7 +238,7 @@ class RewritingReplayHandler(ReplayHandler):
|
||||
urlrewriter = ArchivalUrlRewriter(wbrequest.wb_url, wbrequest.wb_prefix)
|
||||
wbrequest.urlrewriter = urlrewriter
|
||||
|
||||
response = ReplayHandler.__call__(self, wbrequest, index, cdx_reader)
|
||||
response = ReplayView.__call__(self, wbrequest, index, cdx_reader)
|
||||
|
||||
if response and response.cdx:
|
||||
self._checkRedir(wbrequest, response.cdx)
|
||||
@ -419,7 +393,7 @@ class RewritingReplayHandler(ReplayHandler):
|
||||
|
||||
|
||||
def doReplay(self, cdx, wbrequest, index, failedFiles):
|
||||
wbresponse = ReplayHandler.doReplay(self, cdx, wbrequest, index, failedFiles)
|
||||
wbresponse = ReplayView.doReplay(self, cdx, wbrequest, index, failedFiles)
|
||||
|
||||
# Check for self redirect
|
||||
if wbresponse.status_headers.statusline.startswith('3'):
|
@ -36,14 +36,3 @@ class J2QueryView:
|
||||
return wbrequestresponse.WbResponse.text_response(str(response), content_type = 'text/html')
|
||||
|
||||
|
||||
#=================================================================
|
||||
class DebugEchoView:
|
||||
def __call__(self, wbrequest):
|
||||
return wbrequestresponse.WbResponse.text_response(str(wbrequest.env))
|
||||
|
||||
#=================================================================
|
||||
class DebugEchoView:
|
||||
def __call__(self, wbrequest):
|
||||
return wbrequestresponse.WbResponse.text_response(str(wbrequest))
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user