mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
more refactoring: seperate top-level handlers (WBHandler) from
views (html, text) Add CDXHandler for interfacing with cdx server directly, #12
This commit is contained in:
parent
1a234f2953
commit
c0f8edf517
72
pywb/handlers.py
Normal file
72
pywb/handlers.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
import views
|
||||||
|
import utils
|
||||||
|
import urlparse
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
# Standard WB Handler
|
||||||
|
#=================================================================
|
||||||
|
class WBHandler:
|
||||||
|
def __init__(self, cdx_reader, replay, html_view = None):
|
||||||
|
self.cdx_reader = cdx_reader
|
||||||
|
self.replay = replay
|
||||||
|
self.html_view = html_view
|
||||||
|
self.text_view = views.TextQueryView()
|
||||||
|
|
||||||
|
def __call__(self, wbrequest):
|
||||||
|
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
|
||||||
|
cdx_lines = self.cdx_reader.load_for_request(wbrequest, parsed_cdx = True)
|
||||||
|
|
||||||
|
# new special modifier to always show cdx index
|
||||||
|
if wbrequest.wb_url.mod == 'cdx_':
|
||||||
|
return self.text_view(wbrequest, cdx_lines)
|
||||||
|
|
||||||
|
if (wbrequest.wb_url.type == wbrequest.wb_url.QUERY) or (wbrequest.wb_url.type == wbrequest.wb_url.URL_QUERY):
|
||||||
|
if not self.html_view:
|
||||||
|
return self.text_view(wbrequest, cdx_lines)
|
||||||
|
else:
|
||||||
|
return self.html_view(wbrequest, cdx_lines)
|
||||||
|
|
||||||
|
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t:
|
||||||
|
return self.replay(wbrequest, cdx_lines, self.cdx_reader)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
# CDX-Server Handler -- pass all params to cdx server
|
||||||
|
#=================================================================
|
||||||
|
class CDXHandler:
|
||||||
|
def __init__(self, cdx_reader, view = None):
|
||||||
|
self.cdx_reader = cdx_reader
|
||||||
|
self.view = view if view else views.TextQueryView()
|
||||||
|
|
||||||
|
def __call__(self, wbrequest):
|
||||||
|
url = wbrequest.wb_url.url
|
||||||
|
|
||||||
|
# use url= param to get actual url
|
||||||
|
params = urlparse.parse_qs(wbrequest.env['QUERY_STRING'])
|
||||||
|
|
||||||
|
url = params.get('url')
|
||||||
|
if not url:
|
||||||
|
raise Exception('Must specify a url= param to query cdx server')
|
||||||
|
|
||||||
|
url = url[0]
|
||||||
|
|
||||||
|
cdx_lines = self.cdx_reader.load_cdx(url, params, parsed_cdx = False)
|
||||||
|
|
||||||
|
return self.view(wbrequest, cdx_lines)
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
# Debug Handlers
|
||||||
|
#=================================================================
|
||||||
|
class DebugEchoEnvHandler:
|
||||||
|
def __call__(self, wbrequest):
|
||||||
|
return wbrequestresponse.WbResponse.text_response(str(wbrequest.env))
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class DebugEchoHandler:
|
||||||
|
def __call__(self, wbrequest):
|
||||||
|
return wbrequestresponse.WbResponse.text_response(str(wbrequest))
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
|||||||
import archiveloader
|
import archiveloader
|
||||||
import views
|
import views
|
||||||
|
import handlers
|
||||||
import indexreader
|
import indexreader
|
||||||
import replay
|
import replay_views
|
||||||
import replay_resolvers
|
import replay_resolvers
|
||||||
import cdxserve
|
import cdxserve
|
||||||
from archivalrouter import ArchivalRequestRouter, Route
|
from archivalrouter import ArchivalRequestRouter, Route
|
||||||
@ -24,19 +25,23 @@ def pywb_config(head_insert = ''):
|
|||||||
prefixes = [replay_resolvers.PrefixResolver(test_dir)]
|
prefixes = [replay_resolvers.PrefixResolver(test_dir)]
|
||||||
|
|
||||||
# Create rewriting replay handler to rewrite records
|
# Create rewriting replay handler to rewrite records
|
||||||
replayer = replay.RewritingReplayHandler(resolvers = prefixes, archiveloader = aloader, headInsert = head_insert, buffer_response = True)
|
replayer = replay_views.RewritingReplayView(resolvers = prefixes, archiveloader = aloader, headInsert = head_insert, buffer_response = True)
|
||||||
|
|
||||||
# Create Jinja2 based html query view
|
# Create Jinja2 based html query view
|
||||||
html_view = views.J2QueryView('./ui/', 'query.html')
|
html_view = views.J2QueryView('./ui/', 'query.html')
|
||||||
|
|
||||||
# WB handler which uses the index reader, replayer, and html_view
|
# WB handler which uses the index reader, replayer, and html_view
|
||||||
wb_handler = replay.WBHandler(indexs, replayer, html_view)
|
wb_handler = handlers.WBHandler(indexs, replayer, html_view)
|
||||||
|
|
||||||
|
# cdx handler
|
||||||
|
cdx_handler = handlers.CDXHandler(indexs)
|
||||||
|
|
||||||
# Finally, create wb router
|
# Finally, create wb router
|
||||||
return ArchivalRequestRouter(
|
return ArchivalRequestRouter(
|
||||||
{
|
{
|
||||||
Route('echo_req', views.DebugEchoView()), # Debug ex: just echo parsed request
|
Route('echo_req', handlers.DebugEchoHandler()), # Debug ex: just echo parsed request
|
||||||
Route('pywb', wb_handler),
|
Route('pywb', wb_handler),
|
||||||
|
Route('cdx', cdx_handler),
|
||||||
},
|
},
|
||||||
# Specify hostnames that pywb will be running on
|
# Specify hostnames that pywb will be running on
|
||||||
# This will help catch occasionally missed rewrites that fall-through to the host
|
# This will help catch occasionally missed rewrites that fall-through to the host
|
||||||
|
@ -5,7 +5,6 @@ import copy
|
|||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
import archiveloader
|
import archiveloader
|
||||||
import views
|
|
||||||
from wbrequestresponse import WbResponse, StatusAndHeaders
|
from wbrequestresponse import WbResponse, StatusAndHeaders
|
||||||
from wbarchivalurl import ArchivalUrl
|
from wbarchivalurl import ArchivalUrl
|
||||||
import utils
|
import utils
|
||||||
@ -17,34 +16,9 @@ import regex_rewriters
|
|||||||
|
|
||||||
import wbexceptions
|
import wbexceptions
|
||||||
|
|
||||||
#=================================================================
|
|
||||||
class WBHandler:
|
|
||||||
def __init__(self, cdx_reader, replay, html_view = None):
|
|
||||||
self.cdx_reader = cdx_reader
|
|
||||||
self.replay = replay
|
|
||||||
self.html_view = html_view
|
|
||||||
self.text_view = views.TextQueryView()
|
|
||||||
|
|
||||||
def __call__(self, wbrequest):
|
|
||||||
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'query') as t:
|
|
||||||
cdx_lines = self.cdx_reader.load_for_request(wbrequest, parsed_cdx = True)
|
|
||||||
|
|
||||||
# new special modifier to always show cdx index
|
|
||||||
if wbrequest.wb_url.mod == 'cdx_':
|
|
||||||
return self.text_view(wbrequest, cdx_lines)
|
|
||||||
|
|
||||||
if (wbrequest.wb_url.type == wbrequest.wb_url.QUERY) or (wbrequest.wb_url.type == wbrequest.wb_url.URL_QUERY):
|
|
||||||
if not self.html_view:
|
|
||||||
return self.text_view(wbrequest, cdx_lines)
|
|
||||||
else:
|
|
||||||
return self.html_view(wbrequest, cdx_lines)
|
|
||||||
|
|
||||||
with utils.PerfTimer(wbrequest.env.get('X_PERF'), 'replay') as t:
|
|
||||||
return self.replay(wbrequest, cdx_lines, self.cdx_reader)
|
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class ReplayHandler(object):
|
class ReplayView:
|
||||||
def __init__(self, resolvers, archiveloader):
|
def __init__(self, resolvers, archiveloader):
|
||||||
self.resolvers = resolvers
|
self.resolvers = resolvers
|
||||||
self.loader = archiveloader
|
self.loader = archiveloader
|
||||||
@ -238,10 +212,10 @@ class ReplayHandler(object):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class RewritingReplayHandler(ReplayHandler):
|
class RewritingReplayView(ReplayView):
|
||||||
|
|
||||||
def __init__(self, resolvers, archiveloader, headInsert = None, headerRewriter = None, redir_to_exact = True, buffer_response = False):
|
def __init__(self, resolvers, archiveloader, headInsert = None, headerRewriter = None, redir_to_exact = True, buffer_response = False):
|
||||||
ReplayHandler.__init__(self, resolvers, archiveloader)
|
ReplayView.__init__(self, resolvers, archiveloader)
|
||||||
self.headInsert = headInsert
|
self.headInsert = headInsert
|
||||||
if not headerRewriter:
|
if not headerRewriter:
|
||||||
headerRewriter = HeaderRewriter()
|
headerRewriter = HeaderRewriter()
|
||||||
@ -264,7 +238,7 @@ class RewritingReplayHandler(ReplayHandler):
|
|||||||
urlrewriter = ArchivalUrlRewriter(wbrequest.wb_url, wbrequest.wb_prefix)
|
urlrewriter = ArchivalUrlRewriter(wbrequest.wb_url, wbrequest.wb_prefix)
|
||||||
wbrequest.urlrewriter = urlrewriter
|
wbrequest.urlrewriter = urlrewriter
|
||||||
|
|
||||||
response = ReplayHandler.__call__(self, wbrequest, index, cdx_reader)
|
response = ReplayView.__call__(self, wbrequest, index, cdx_reader)
|
||||||
|
|
||||||
if response and response.cdx:
|
if response and response.cdx:
|
||||||
self._checkRedir(wbrequest, response.cdx)
|
self._checkRedir(wbrequest, response.cdx)
|
||||||
@ -419,7 +393,7 @@ class RewritingReplayHandler(ReplayHandler):
|
|||||||
|
|
||||||
|
|
||||||
def doReplay(self, cdx, wbrequest, index, failedFiles):
|
def doReplay(self, cdx, wbrequest, index, failedFiles):
|
||||||
wbresponse = ReplayHandler.doReplay(self, cdx, wbrequest, index, failedFiles)
|
wbresponse = ReplayView.doReplay(self, cdx, wbrequest, index, failedFiles)
|
||||||
|
|
||||||
# Check for self redirect
|
# Check for self redirect
|
||||||
if wbresponse.status_headers.statusline.startswith('3'):
|
if wbresponse.status_headers.statusline.startswith('3'):
|
@ -36,14 +36,3 @@ class J2QueryView:
|
|||||||
return wbrequestresponse.WbResponse.text_response(str(response), content_type = 'text/html')
|
return wbrequestresponse.WbResponse.text_response(str(response), content_type = 'text/html')
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
|
||||||
class DebugEchoView:
|
|
||||||
def __call__(self, wbrequest):
|
|
||||||
return wbrequestresponse.WbResponse.text_response(str(wbrequest.env))
|
|
||||||
|
|
||||||
#=================================================================
|
|
||||||
class DebugEchoView:
|
|
||||||
def __call__(self, wbrequest):
|
|
||||||
return wbrequestresponse.WbResponse.text_response(str(wbrequest))
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user