mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-28 00:25:21 +01:00
Fix #5, bringing back customParams optional params sent to cdx server
Rename archivalrouter.MatchRegex -> archivalrouter.Route, supporting regex/prefix matching add redir_to_exact to turn off redirect to exact timestamp in RewritingReplayHandler update README
This commit is contained in:
parent
80b2585d22
commit
9ff3fc300b
@ -65,7 +65,7 @@ one could declare a `createWB()` method as follows:
|
|||||||
|
|
||||||
return ArchivalRequestRouter(
|
return ArchivalRequestRouter(
|
||||||
{
|
{
|
||||||
MatchPrefix('mycoll': replay.WBHandler(query, replay)),
|
Route('mycoll': replay.WBHandler(query, replay)),
|
||||||
},
|
},
|
||||||
hostpaths = ['http://mywb.example.com:8080/'])
|
hostpaths = ['http://mywb.example.com:8080/'])
|
||||||
|
|
||||||
@ -73,7 +73,7 @@ one could declare a `createWB()` method as follows:
|
|||||||
Quick File Reference
|
Quick File Reference
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
- `archivalrouter.py`- Archival mode routing and referer fallback, include MatchPrefix and MatchRegex
|
- `archivalrouter.py`- Archival mode routing by regex and fallback based on referrer
|
||||||
|
|
||||||
- `archiveloader.py` - IO for loading W/ARC data
|
- `archiveloader.py` - IO for loading W/ARC data
|
||||||
|
|
||||||
|
@ -28,51 +28,12 @@ class ArchivalRequestRouter:
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
|
||||||
# Route by matching prefix -- deprecated, as MatchRegex
|
|
||||||
# also supports the same
|
|
||||||
#=================================================================
|
|
||||||
|
|
||||||
class MatchPrefix:
|
|
||||||
def __init__(self, prefix, handler):
|
|
||||||
self.prefix = '/' + prefix + '/' if prefix else '/'
|
|
||||||
self.coll = prefix
|
|
||||||
self.handler = handler
|
|
||||||
|
|
||||||
|
|
||||||
def __call__(self, env, useAbsPrefix, archivalurl_class):
|
|
||||||
request_uri = env['REL_REQUEST_URI']
|
|
||||||
if not request_uri.startswith(self.prefix):
|
|
||||||
return None
|
|
||||||
|
|
||||||
if self.coll:
|
|
||||||
wb_prefix = env['SCRIPT_NAME'] + self.prefix
|
|
||||||
wb_url = request_uri[len(self.coll) + 1:]
|
|
||||||
else:
|
|
||||||
wb_prefix = env['SCRIPT_NAME'] + self.prefix
|
|
||||||
wb_url = request_uri
|
|
||||||
|
|
||||||
wbrequest = WbRequest(env,
|
|
||||||
request_uri = request_uri,
|
|
||||||
coll = self.coll,
|
|
||||||
wb_url = wb_url,
|
|
||||||
wb_prefix = wb_prefix,
|
|
||||||
use_abs_prefix = useAbsPrefix,
|
|
||||||
archivalurl_class = archivalurl_class)
|
|
||||||
|
|
||||||
return self._handleRequest(wbrequest)
|
|
||||||
|
|
||||||
|
|
||||||
def _handleRequest(self, wbrequest):
|
|
||||||
return self.handler(wbrequest)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# Route by matching regex of request uri (excluding first '/')
|
# Route by matching regex (or fixed prefix)
|
||||||
# May be a fixed prefix
|
# of request uri (excluding first '/')
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class MatchRegex:
|
class Route:
|
||||||
def __init__(self, regex, handler, coll_group = 0):
|
def __init__(self, regex, handler, coll_group = 0):
|
||||||
self.regex = re.compile(regex)
|
self.regex = re.compile(regex)
|
||||||
self.handler = handler
|
self.handler = handler
|
||||||
|
@ -18,10 +18,13 @@ class QueryHandler:
|
|||||||
# init standard params
|
# init standard params
|
||||||
params = self.cdxserver.getQueryParams(wburl)
|
params = self.cdxserver.getQueryParams(wburl)
|
||||||
|
|
||||||
# add any custom params from the request
|
# add any custom filter from the request
|
||||||
if wbrequest.queryFilter:
|
if wbrequest.queryFilter:
|
||||||
params['filter'] = wbrequest.queryFilter
|
params['filter'] = wbrequest.queryFilter
|
||||||
|
|
||||||
|
if wbrequest.customParams:
|
||||||
|
params.update(wbrequest.customParams)
|
||||||
|
|
||||||
cdxlines = self.cdxserver.load(wburl.url, params)
|
cdxlines = self.cdxserver.load(wburl.url, params)
|
||||||
|
|
||||||
cdxlines = utils.peek_iter(cdxlines)
|
cdxlines = utils.peek_iter(cdxlines)
|
||||||
|
@ -198,12 +198,13 @@ class ReplayHandler(object):
|
|||||||
#=================================================================
|
#=================================================================
|
||||||
class RewritingReplayHandler(ReplayHandler):
|
class RewritingReplayHandler(ReplayHandler):
|
||||||
|
|
||||||
def __init__(self, resolvers, archiveloader, headInsert = None, headerRewriter = None):
|
def __init__(self, resolvers, archiveloader, headInsert = None, headerRewriter = None, redir_to_exact = True):
|
||||||
ReplayHandler.__init__(self, resolvers, archiveloader)
|
ReplayHandler.__init__(self, resolvers, archiveloader)
|
||||||
self.headInsert = headInsert
|
self.headInsert = headInsert
|
||||||
if not headerRewriter:
|
if not headerRewriter:
|
||||||
headerRewriter = HeaderRewriter()
|
headerRewriter = HeaderRewriter()
|
||||||
self.headerRewriter = headerRewriter
|
self.headerRewriter = headerRewriter
|
||||||
|
self.redir_to_exact = redir_to_exact
|
||||||
|
|
||||||
|
|
||||||
def _textContentType(self, contentType):
|
def _textContentType(self, contentType):
|
||||||
@ -333,7 +334,7 @@ class RewritingReplayHandler(ReplayHandler):
|
|||||||
return (result['encoding'], buff)
|
return (result['encoding'], buff)
|
||||||
|
|
||||||
def _checkRedir(self, wbrequest, cdx):
|
def _checkRedir(self, wbrequest, cdx):
|
||||||
if cdx and (cdx['timestamp'] != wbrequest.wb_url.timestamp):
|
if self.redir_to_exact and cdx and (cdx['timestamp'] != wbrequest.wb_url.timestamp):
|
||||||
newUrl = wbrequest.urlrewriter.getTimestampUrl(cdx['timestamp'], cdx['original'])
|
newUrl = wbrequest.urlrewriter.getTimestampUrl(cdx['timestamp'], cdx['original'])
|
||||||
raise wbexceptions.InternalRedirect(newUrl)
|
raise wbexceptions.InternalRedirect(newUrl)
|
||||||
#return WbResponse.better_timestamp_response(wbrequest, cdx['timestamp'])
|
#return WbResponse.better_timestamp_response(wbrequest, cdx['timestamp'])
|
||||||
|
@ -5,7 +5,7 @@ import wbexceptions
|
|||||||
import indexreader
|
import indexreader
|
||||||
|
|
||||||
from wbrequestresponse import WbResponse, StatusAndHeaders
|
from wbrequestresponse import WbResponse, StatusAndHeaders
|
||||||
from archivalrouter import ArchivalRequestRouter, MatchPrefix
|
from archivalrouter import ArchivalRequestRouter, Route
|
||||||
|
|
||||||
## ===========
|
## ===========
|
||||||
headInsert = """
|
headInsert = """
|
||||||
@ -49,7 +49,7 @@ one could declare a `createWB()` method as follows:
|
|||||||
|
|
||||||
return ArchivalRequestRouter(
|
return ArchivalRequestRouter(
|
||||||
{
|
{
|
||||||
MatchPrefix('mycoll', WBHandler(query, replay))
|
Route('mycoll', WBHandler(query, replay))
|
||||||
},
|
},
|
||||||
hostpaths = ['http://mywb.example.com:8080/'])
|
hostpaths = ['http://mywb.example.com:8080/'])
|
||||||
'''
|
'''
|
||||||
@ -58,10 +58,10 @@ def createDefaultWB(headInsert):
|
|||||||
query = QueryHandler(indexreader.RemoteCDXServer('http://web.archive.org/cdx/search/cdx'))
|
query = QueryHandler(indexreader.RemoteCDXServer('http://web.archive.org/cdx/search/cdx'))
|
||||||
return ArchivalRequestRouter(
|
return ArchivalRequestRouter(
|
||||||
{
|
{
|
||||||
MatchPrefix('echo', EchoEnv()), # Just echo the env
|
Route('echo', EchoEnv()), # Just echo the env
|
||||||
MatchPrefix('req', EchoRequest()), # Echo the WbRequest
|
Route('req', EchoRequest()), # Echo the WbRequest
|
||||||
MatchPrefix('cdx', query), # Query the CDX
|
Route('cdx', query), # Query the CDX
|
||||||
MatchPrefix('web', query), # Query the CDX
|
Route('web', query), # Query the CDX
|
||||||
},
|
},
|
||||||
hostpaths = ['http://localhost:9090/'])
|
hostpaths = ['http://localhost:9090/'])
|
||||||
## ===========
|
## ===========
|
||||||
|
@ -78,6 +78,8 @@ class WbRequest:
|
|||||||
|
|
||||||
self.queryFilter = []
|
self.queryFilter = []
|
||||||
|
|
||||||
|
self.customParams = {}
|
||||||
|
|
||||||
# PERF
|
# PERF
|
||||||
env['X_PERF'] = {}
|
env['X_PERF'] = {}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user