mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Fix #5, bringing back customParams optional params sent to cdx server
Rename archivalrouter.MatchRegex -> archivalrouter.Route, supporting regex/prefix matching add redir_to_exact to turn off redirect to exact timestamp in RewritingReplayHandler update README
This commit is contained in:
parent
80b2585d22
commit
9ff3fc300b
@ -65,7 +65,7 @@ one could declare a `createWB()` method as follows:
|
||||
|
||||
return ArchivalRequestRouter(
|
||||
{
|
||||
MatchPrefix('mycoll': replay.WBHandler(query, replay)),
|
||||
Route('mycoll': replay.WBHandler(query, replay)),
|
||||
},
|
||||
hostpaths = ['http://mywb.example.com:8080/'])
|
||||
|
||||
@ -73,7 +73,7 @@ one could declare a `createWB()` method as follows:
|
||||
Quick File Reference
|
||||
--------------------
|
||||
|
||||
- `archivalrouter.py`- Archival mode routing and referer fallback, include MatchPrefix and MatchRegex
|
||||
- `archivalrouter.py`- Archival mode routing by regex and fallback based on referrer
|
||||
|
||||
- `archiveloader.py` - IO for loading W/ARC data
|
||||
|
||||
|
@ -28,51 +28,12 @@ class ArchivalRequestRouter:
|
||||
|
||||
|
||||
|
||||
#=================================================================
|
||||
# Route by matching prefix -- deprecated, as MatchRegex
|
||||
# also supports the same
|
||||
#=================================================================
|
||||
|
||||
class MatchPrefix:
|
||||
def __init__(self, prefix, handler):
|
||||
self.prefix = '/' + prefix + '/' if prefix else '/'
|
||||
self.coll = prefix
|
||||
self.handler = handler
|
||||
|
||||
|
||||
def __call__(self, env, useAbsPrefix, archivalurl_class):
|
||||
request_uri = env['REL_REQUEST_URI']
|
||||
if not request_uri.startswith(self.prefix):
|
||||
return None
|
||||
|
||||
if self.coll:
|
||||
wb_prefix = env['SCRIPT_NAME'] + self.prefix
|
||||
wb_url = request_uri[len(self.coll) + 1:]
|
||||
else:
|
||||
wb_prefix = env['SCRIPT_NAME'] + self.prefix
|
||||
wb_url = request_uri
|
||||
|
||||
wbrequest = WbRequest(env,
|
||||
request_uri = request_uri,
|
||||
coll = self.coll,
|
||||
wb_url = wb_url,
|
||||
wb_prefix = wb_prefix,
|
||||
use_abs_prefix = useAbsPrefix,
|
||||
archivalurl_class = archivalurl_class)
|
||||
|
||||
return self._handleRequest(wbrequest)
|
||||
|
||||
|
||||
def _handleRequest(self, wbrequest):
|
||||
return self.handler(wbrequest)
|
||||
|
||||
|
||||
|
||||
#=================================================================
|
||||
# Route by matching regex of request uri (excluding first '/')
|
||||
# May be a fixed prefix
|
||||
# Route by matching regex (or fixed prefix)
|
||||
# of request uri (excluding first '/')
|
||||
#=================================================================
|
||||
class MatchRegex:
|
||||
class Route:
|
||||
def __init__(self, regex, handler, coll_group = 0):
|
||||
self.regex = re.compile(regex)
|
||||
self.handler = handler
|
||||
|
@ -18,10 +18,13 @@ class QueryHandler:
|
||||
# init standard params
|
||||
params = self.cdxserver.getQueryParams(wburl)
|
||||
|
||||
# add any custom params from the request
|
||||
# add any custom filter from the request
|
||||
if wbrequest.queryFilter:
|
||||
params['filter'] = wbrequest.queryFilter
|
||||
|
||||
if wbrequest.customParams:
|
||||
params.update(wbrequest.customParams)
|
||||
|
||||
cdxlines = self.cdxserver.load(wburl.url, params)
|
||||
|
||||
cdxlines = utils.peek_iter(cdxlines)
|
||||
|
@ -198,12 +198,13 @@ class ReplayHandler(object):
|
||||
#=================================================================
|
||||
class RewritingReplayHandler(ReplayHandler):
|
||||
|
||||
def __init__(self, resolvers, archiveloader, headInsert = None, headerRewriter = None):
|
||||
def __init__(self, resolvers, archiveloader, headInsert = None, headerRewriter = None, redir_to_exact = True):
|
||||
ReplayHandler.__init__(self, resolvers, archiveloader)
|
||||
self.headInsert = headInsert
|
||||
if not headerRewriter:
|
||||
headerRewriter = HeaderRewriter()
|
||||
self.headerRewriter = headerRewriter
|
||||
self.redir_to_exact = redir_to_exact
|
||||
|
||||
|
||||
def _textContentType(self, contentType):
|
||||
@ -333,7 +334,7 @@ class RewritingReplayHandler(ReplayHandler):
|
||||
return (result['encoding'], buff)
|
||||
|
||||
def _checkRedir(self, wbrequest, cdx):
|
||||
if cdx and (cdx['timestamp'] != wbrequest.wb_url.timestamp):
|
||||
if self.redir_to_exact and cdx and (cdx['timestamp'] != wbrequest.wb_url.timestamp):
|
||||
newUrl = wbrequest.urlrewriter.getTimestampUrl(cdx['timestamp'], cdx['original'])
|
||||
raise wbexceptions.InternalRedirect(newUrl)
|
||||
#return WbResponse.better_timestamp_response(wbrequest, cdx['timestamp'])
|
||||
|
@ -5,7 +5,7 @@ import wbexceptions
|
||||
import indexreader
|
||||
|
||||
from wbrequestresponse import WbResponse, StatusAndHeaders
|
||||
from archivalrouter import ArchivalRequestRouter, MatchPrefix
|
||||
from archivalrouter import ArchivalRequestRouter, Route
|
||||
|
||||
## ===========
|
||||
headInsert = """
|
||||
@ -49,7 +49,7 @@ one could declare a `createWB()` method as follows:
|
||||
|
||||
return ArchivalRequestRouter(
|
||||
{
|
||||
MatchPrefix('mycoll', WBHandler(query, replay))
|
||||
Route('mycoll', WBHandler(query, replay))
|
||||
},
|
||||
hostpaths = ['http://mywb.example.com:8080/'])
|
||||
'''
|
||||
@ -58,10 +58,10 @@ def createDefaultWB(headInsert):
|
||||
query = QueryHandler(indexreader.RemoteCDXServer('http://web.archive.org/cdx/search/cdx'))
|
||||
return ArchivalRequestRouter(
|
||||
{
|
||||
MatchPrefix('echo', EchoEnv()), # Just echo the env
|
||||
MatchPrefix('req', EchoRequest()), # Echo the WbRequest
|
||||
MatchPrefix('cdx', query), # Query the CDX
|
||||
MatchPrefix('web', query), # Query the CDX
|
||||
Route('echo', EchoEnv()), # Just echo the env
|
||||
Route('req', EchoRequest()), # Echo the WbRequest
|
||||
Route('cdx', query), # Query the CDX
|
||||
Route('web', query), # Query the CDX
|
||||
},
|
||||
hostpaths = ['http://localhost:9090/'])
|
||||
## ===========
|
||||
|
@ -78,6 +78,8 @@ class WbRequest:
|
||||
|
||||
self.queryFilter = []
|
||||
|
||||
self.customParams = {}
|
||||
|
||||
# PERF
|
||||
env['X_PERF'] = {}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user