1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Fix #5, bringing back customParams optional params sent to cdx server

Rename archivalrouter.MatchRegex -> archivalrouter.Route, supporting regex/prefix matching
add redir_to_exact to turn off redirect to exact timestamp in RewritingReplayHandler
update README
This commit is contained in:
Ilya Kreymer 2014-01-20 10:50:06 -08:00
parent 80b2585d22
commit 9ff3fc300b
6 changed files with 20 additions and 53 deletions

View File

@ -65,7 +65,7 @@ one could declare a `createWB()` method as follows:
return ArchivalRequestRouter(
{
MatchPrefix('mycoll': replay.WBHandler(query, replay)),
Route('mycoll': replay.WBHandler(query, replay)),
},
hostpaths = ['http://mywb.example.com:8080/'])
@ -73,7 +73,7 @@ one could declare a `createWB()` method as follows:
Quick File Reference
--------------------
- `archivalrouter.py`- Archival mode routing and referer fallback, include MatchPrefix and MatchRegex
- `archivalrouter.py`- Archival mode routing by regex and fallback based on referrer
- `archiveloader.py` - IO for loading W/ARC data

View File

@ -28,51 +28,12 @@ class ArchivalRequestRouter:
#=================================================================
# Route by matching prefix -- deprecated, as MatchRegex
# also supports the same
#=================================================================
class MatchPrefix:
def __init__(self, prefix, handler):
self.prefix = '/' + prefix + '/' if prefix else '/'
self.coll = prefix
self.handler = handler
def __call__(self, env, useAbsPrefix, archivalurl_class):
request_uri = env['REL_REQUEST_URI']
if not request_uri.startswith(self.prefix):
return None
if self.coll:
wb_prefix = env['SCRIPT_NAME'] + self.prefix
wb_url = request_uri[len(self.coll) + 1:]
else:
wb_prefix = env['SCRIPT_NAME'] + self.prefix
wb_url = request_uri
wbrequest = WbRequest(env,
request_uri = request_uri,
coll = self.coll,
wb_url = wb_url,
wb_prefix = wb_prefix,
use_abs_prefix = useAbsPrefix,
archivalurl_class = archivalurl_class)
return self._handleRequest(wbrequest)
def _handleRequest(self, wbrequest):
return self.handler(wbrequest)
#=================================================================
# Route by matching regex of request uri (excluding first '/')
# May be a fixed prefix
# Route by matching regex (or fixed prefix)
# of request uri (excluding first '/')
#=================================================================
class MatchRegex:
class Route:
def __init__(self, regex, handler, coll_group = 0):
self.regex = re.compile(regex)
self.handler = handler

View File

@ -18,10 +18,13 @@ class QueryHandler:
# init standard params
params = self.cdxserver.getQueryParams(wburl)
# add any custom params from the request
# add any custom filter from the request
if wbrequest.queryFilter:
params['filter'] = wbrequest.queryFilter
if wbrequest.customParams:
params.update(wbrequest.customParams)
cdxlines = self.cdxserver.load(wburl.url, params)
cdxlines = utils.peek_iter(cdxlines)

View File

@ -198,12 +198,13 @@ class ReplayHandler(object):
#=================================================================
class RewritingReplayHandler(ReplayHandler):
def __init__(self, resolvers, archiveloader, headInsert = None, headerRewriter = None):
def __init__(self, resolvers, archiveloader, headInsert = None, headerRewriter = None, redir_to_exact = True):
ReplayHandler.__init__(self, resolvers, archiveloader)
self.headInsert = headInsert
if not headerRewriter:
headerRewriter = HeaderRewriter()
self.headerRewriter = headerRewriter
self.redir_to_exact = redir_to_exact
def _textContentType(self, contentType):
@ -333,7 +334,7 @@ class RewritingReplayHandler(ReplayHandler):
return (result['encoding'], buff)
def _checkRedir(self, wbrequest, cdx):
if cdx and (cdx['timestamp'] != wbrequest.wb_url.timestamp):
if self.redir_to_exact and cdx and (cdx['timestamp'] != wbrequest.wb_url.timestamp):
newUrl = wbrequest.urlrewriter.getTimestampUrl(cdx['timestamp'], cdx['original'])
raise wbexceptions.InternalRedirect(newUrl)
#return WbResponse.better_timestamp_response(wbrequest, cdx['timestamp'])

View File

@ -5,7 +5,7 @@ import wbexceptions
import indexreader
from wbrequestresponse import WbResponse, StatusAndHeaders
from archivalrouter import ArchivalRequestRouter, MatchPrefix
from archivalrouter import ArchivalRequestRouter, Route
## ===========
headInsert = """
@ -49,7 +49,7 @@ one could declare a `createWB()` method as follows:
return ArchivalRequestRouter(
{
MatchPrefix('mycoll', WBHandler(query, replay))
Route('mycoll', WBHandler(query, replay))
},
hostpaths = ['http://mywb.example.com:8080/'])
'''
@ -58,10 +58,10 @@ def createDefaultWB(headInsert):
query = QueryHandler(indexreader.RemoteCDXServer('http://web.archive.org/cdx/search/cdx'))
return ArchivalRequestRouter(
{
MatchPrefix('echo', EchoEnv()), # Just echo the env
MatchPrefix('req', EchoRequest()), # Echo the WbRequest
MatchPrefix('cdx', query), # Query the CDX
MatchPrefix('web', query), # Query the CDX
Route('echo', EchoEnv()), # Just echo the env
Route('req', EchoRequest()), # Echo the WbRequest
Route('cdx', query), # Query the CDX
Route('web', query), # Query the CDX
},
hostpaths = ['http://localhost:9090/'])
## ===========

View File

@ -78,6 +78,8 @@ class WbRequest:
self.queryFilter = []
self.customParams = {}
# PERF
env['X_PERF'] = {}