mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Should resolve #4 -- supports pywb running as a non-root app
* Instead of relying on REQUEST_URI, pywb constructs a REL_REQUEST_URI, from PATH_INFO + QUERY_STRING. SCRIPT_NAME auto-added to prefix * MatchPrefix is now superceded by MatchRegex, which can match a plain string -- collId defaults to the full match * Added optional archivalurl_class to router to allow for customized ArchivalUrl implementations to be specified * run.sh can test on a non-root mountpoint, eg. ./run.sh "/approot"
This commit is contained in:
parent
2e4d78d079
commit
80b2585d22
@ -3,51 +3,62 @@ import re
|
||||
|
||||
from wbrequestresponse import WbRequest, WbResponse
|
||||
from url_rewriter import ArchivalUrlRewriter
|
||||
from wbarchivalurl import ArchivalUrl
|
||||
|
||||
#=================================================================
|
||||
# ArchivalRequestRouter -- route WB requests in archival mode
|
||||
#=================================================================
|
||||
class ArchivalRequestRouter:
|
||||
def __init__(self, handlers, hostpaths = None, abs_path = True):
|
||||
def __init__(self, handlers, hostpaths = None, abs_path = True, archivalurl_class = ArchivalUrl):
|
||||
self.handlers = handlers
|
||||
self.fallback = ReferRedirect(hostpaths)
|
||||
self.abs_path = abs_path
|
||||
self.archivalurl_class = archivalurl_class
|
||||
|
||||
def __call__(self, env):
|
||||
for handler in self.handlers:
|
||||
result = handler(env, self.abs_path)
|
||||
result = handler(env, self.abs_path, self.archivalurl_class)
|
||||
if result:
|
||||
return result
|
||||
|
||||
if not self.fallback:
|
||||
return None
|
||||
|
||||
return self.fallback(WbRequest.from_uri(None, env), self.abs_path)
|
||||
return self.fallback(WbRequest.from_uri(None, env))
|
||||
|
||||
|
||||
|
||||
#=================================================================
|
||||
# Route by matching prefix
|
||||
# Route by matching prefix -- deprecated, as MatchRegex
|
||||
# also supports the same
|
||||
#=================================================================
|
||||
|
||||
class MatchPrefix:
|
||||
def __init__(self, prefix, handler):
|
||||
self.prefix = '/' + prefix + '/'
|
||||
self.prefix = '/' + prefix + '/' if prefix else '/'
|
||||
self.coll = prefix
|
||||
self.handler = handler
|
||||
|
||||
|
||||
def __call__(self, env, useAbsPrefix):
|
||||
request_uri = env['REQUEST_URI']
|
||||
def __call__(self, env, useAbsPrefix, archivalurl_class):
|
||||
request_uri = env['REL_REQUEST_URI']
|
||||
if not request_uri.startswith(self.prefix):
|
||||
return None
|
||||
|
||||
if self.coll:
|
||||
wb_prefix = env['SCRIPT_NAME'] + self.prefix
|
||||
wb_url = request_uri[len(self.coll) + 1:]
|
||||
else:
|
||||
wb_prefix = env['SCRIPT_NAME'] + self.prefix
|
||||
wb_url = request_uri
|
||||
|
||||
wbrequest = WbRequest(env,
|
||||
request_uri = request_uri,
|
||||
coll = self.coll,
|
||||
wb_url = request_uri[len(self.coll) + 1:],
|
||||
wb_prefix = self.prefix,
|
||||
use_abs_prefix = useAbsPrefix)
|
||||
wb_url = wb_url,
|
||||
wb_prefix = wb_prefix,
|
||||
use_abs_prefix = useAbsPrefix,
|
||||
archivalurl_class = archivalurl_class)
|
||||
|
||||
return self._handleRequest(wbrequest)
|
||||
|
||||
@ -59,35 +70,53 @@ class MatchPrefix:
|
||||
|
||||
#=================================================================
|
||||
# Route by matching regex of request uri (excluding first '/')
|
||||
# May be a fixed prefix
|
||||
#=================================================================
|
||||
class MatchRegex:
|
||||
def __init__(self, regex, handler):
|
||||
def __init__(self, regex, handler, coll_group = 0):
|
||||
self.regex = re.compile(regex)
|
||||
self.handler = handler
|
||||
# collection id from regex group (default 0)
|
||||
self.coll_group = coll_group
|
||||
|
||||
|
||||
def __call__(self, env, useAbsPrefix):
|
||||
request_uri = env['REQUEST_URI']
|
||||
def __call__(self, env, useAbsPrefix, archivalurl_class):
|
||||
request_uri = env['REL_REQUEST_URI']
|
||||
matcher = self.regex.match(request_uri[1:])
|
||||
if not matcher:
|
||||
return None
|
||||
|
||||
rel_prefix = matcher.group(0)
|
||||
|
||||
if rel_prefix:
|
||||
wb_prefix = env['SCRIPT_NAME'] + '/' + rel_prefix + '/'
|
||||
wb_url = request_uri[len(rel_prefix) + 1:] # remove the '/' + rel_prefix part of uri
|
||||
else:
|
||||
wb_prefix = env['SCRIPT_NAME'] + '/'
|
||||
wb_url = request_uri # the request_uri is the wb_url, since no coll
|
||||
|
||||
coll = matcher.group(self.coll_group)
|
||||
|
||||
wbrequest = WbRequest(env,
|
||||
request_uri = request_uri,
|
||||
coll = matcher.group(1),
|
||||
wb_url = request_uri[len(rel_prefix) + 1:],
|
||||
wb_prefix = '/' + rel_prefix + '/',
|
||||
use_abs_prefix = useAbsPrefix)
|
||||
coll = coll,
|
||||
wb_url = wb_url,
|
||||
wb_prefix = wb_prefix,
|
||||
use_abs_prefix = useAbsPrefix,
|
||||
archivalurl_class = archivalurl_class)
|
||||
|
||||
|
||||
# Allow for setup of additional filters
|
||||
self._addFilters(wbrequest, matcher)
|
||||
|
||||
return self.handler(wbrequest)
|
||||
return self._handleRequest(wbrequest)
|
||||
|
||||
def _addFilters(self, wbrequest, matcher):
|
||||
pass
|
||||
|
||||
def _handleRequest(self, wbrequest):
|
||||
return self.handler(wbrequest)
|
||||
|
||||
|
||||
#=================================================================
|
||||
# ReferRedirect -- redirect urls that have 'fallen through' based on the referrer settings
|
||||
@ -121,7 +150,7 @@ class ReferRedirect:
|
||||
self.matchPrefixs = [matchPrefixs]
|
||||
|
||||
|
||||
def __call__(self, wbrequest, abs_path):
|
||||
def __call__(self, wbrequest):
|
||||
if wbrequest.referrer is None:
|
||||
return None
|
||||
|
||||
@ -152,11 +181,11 @@ if __name__ == "__main__":
|
||||
import doctest
|
||||
|
||||
def test_redir(matchHost, request_uri, referrer):
|
||||
env = {'REQUEST_URI': request_uri, 'HTTP_REFERER': referrer}
|
||||
env = {'REL_REQUEST_URI': request_uri, 'HTTP_REFERER': referrer}
|
||||
|
||||
redir = ReferRedirect(matchHost)
|
||||
req = WbRequest.from_uri(request_uri, env)
|
||||
rep = redir(req, None)
|
||||
rep = redir(req)
|
||||
if not rep:
|
||||
return False
|
||||
|
||||
|
@ -110,21 +110,21 @@ def iso_date_to_timestamp(string):
|
||||
# adapted -from wsgiref.request_uri, but doesn't include domain name and allows all characters
|
||||
# allowed in the path segment according to: http://tools.ietf.org/html/rfc3986#section-3.3
|
||||
# explained here: http://stackoverflow.com/questions/4669692/valid-characters-for-directory-part-of-a-url-for-short-links
|
||||
def request_uri(environ, include_query=1):
|
||||
def rel_request_uri(environ, include_query=1):
|
||||
"""
|
||||
Return the requested path, optionally including the query string
|
||||
|
||||
# Simple test:
|
||||
>>> request_uri({'PATH_INFO': '/web/example.com'})
|
||||
>>> rel_request_uri({'PATH_INFO': '/web/example.com'})
|
||||
'/web/example.com'
|
||||
|
||||
# Test all unecoded special chars and double-quote
|
||||
# (double-quote must be encoded but not single quote)
|
||||
>>> request_uri({'PATH_INFO': "/web/example.com/0~!+$&'()*+,;=:\\\""})
|
||||
>>> rel_request_uri({'PATH_INFO': "/web/example.com/0~!+$&'()*+,;=:\\\""})
|
||||
"/web/example.com/0~!+$&'()*+,;=:%22"
|
||||
"""
|
||||
from urllib import quote
|
||||
url = quote(environ.get('SCRIPT_NAME', '') + environ.get('PATH_INFO',''), safe='/~!$&\'()*+,;=:@')
|
||||
url = quote(environ.get('PATH_INFO',''), safe='/~!$&\'()*+,;=:@')
|
||||
if include_query and environ.get('QUERY_STRING'):
|
||||
url += '?' + environ['QUERY_STRING']
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
from utils import request_uri
|
||||
from utils import rel_request_uri
|
||||
from query import QueryHandler, EchoEnv, EchoRequest
|
||||
from replay import WBHandler
|
||||
import wbexceptions
|
||||
@ -7,8 +7,6 @@ import indexreader
|
||||
from wbrequestresponse import WbResponse, StatusAndHeaders
|
||||
from archivalrouter import ArchivalRequestRouter, MatchPrefix
|
||||
|
||||
|
||||
|
||||
## ===========
|
||||
headInsert = """
|
||||
|
||||
@ -82,8 +80,11 @@ except:
|
||||
|
||||
|
||||
def application(env, start_response):
|
||||
if not env.get('REQUEST_URI'):
|
||||
env['REQUEST_URI'] = request_uri(env)
|
||||
|
||||
if env.get('SCRIPT_NAME') or not env.get('REQUEST_URI'):
|
||||
env['REL_REQUEST_URI'] = rel_request_uri(env)
|
||||
else:
|
||||
env['REL_REQUEST_URI'] = env['REQUEST_URI']
|
||||
|
||||
response = None
|
||||
|
||||
@ -91,7 +92,7 @@ def application(env, start_response):
|
||||
response = wbparser(env)
|
||||
|
||||
if not response:
|
||||
raise wbexceptions.NotFoundException(env['REQUEST_URI'] + ' was not found')
|
||||
raise wbexceptions.NotFoundException(env['REL_REQUEST_URI'] + ' was not found')
|
||||
|
||||
except wbexceptions.InternalRedirect as ir:
|
||||
response = WbResponse(StatusAndHeaders(ir.status, ir.httpHeaders))
|
||||
@ -117,7 +118,4 @@ def handleException(env, exc):
|
||||
|
||||
return WbResponse.text_response(status + ' Error: ' + str(exc), status = status)
|
||||
|
||||
#def handle_not_found(env):
|
||||
# return WbResponse.text_response('Not Found: ' + env['REQUEST_URI'], status = '404 Not Found')
|
||||
|
||||
|
||||
|
@ -162,7 +162,7 @@ class ArchivalUrl:
|
||||
return "/" + url
|
||||
|
||||
def __str__(self):
|
||||
return ArchivalUrl.to_str(self.type, self.mod, self.timestamp, self.url)
|
||||
return self.to_str(self.type, self.mod, self.timestamp, self.url)
|
||||
|
||||
def __repr__(self):
|
||||
return str((self.type, self.timestamp, self.mod, self.url, str(self)))
|
||||
|
@ -31,7 +31,7 @@ class WbRequest:
|
||||
@staticmethod
|
||||
def from_uri(request_uri, env = {}, use_abs_prefix = False):
|
||||
if not request_uri:
|
||||
request_uri = env.get('REQUEST_URI')
|
||||
request_uri = env.get('REL_REQUEST_URI')
|
||||
|
||||
parts = request_uri.split('/', 2)
|
||||
|
||||
@ -61,14 +61,14 @@ class WbRequest:
|
||||
return rel_prefix
|
||||
|
||||
|
||||
def __init__(self, env, request_uri, wb_prefix, wb_url, coll, use_abs_prefix = False):
|
||||
def __init__(self, env, request_uri, wb_prefix, wb_url, coll, use_abs_prefix = False, archivalurl_class = ArchivalUrl):
|
||||
self.env = env
|
||||
|
||||
self.request_uri = request_uri if request_uri else env.get('REQUEST_URI')
|
||||
self.request_uri = request_uri if request_uri else env.get('REL_REQUEST_URI')
|
||||
|
||||
self.wb_prefix = wb_prefix if not use_abs_prefix else WbRequest.makeAbsPrefix(env, wb_prefix)
|
||||
|
||||
self.wb_url = ArchivalUrl(wb_url)
|
||||
self.wb_url = archivalurl_class(wb_url)
|
||||
|
||||
self.coll = coll
|
||||
|
||||
|
11
run.sh
11
run.sh
@ -2,10 +2,17 @@
|
||||
|
||||
mypath=$(cd `dirname $0` && pwd)
|
||||
|
||||
app=$1
|
||||
app=$2
|
||||
cd $mypath/pywb
|
||||
if [ -z "$app" ]; then
|
||||
app=wbapp.py
|
||||
fi
|
||||
|
||||
uwsgi --static-map /static=$mypath/static --http :8080 --wsgi-file $app
|
||||
if [ -z "$1" ]; then
|
||||
# Standard root config
|
||||
uwsgi --static-map /static=$mypath/static --http-socket :8080 --wsgi-file $app
|
||||
else
|
||||
# Test on non-root mount
|
||||
uwsgi --static-map /static=$mypath/static --http-socket :8080 --mount "$1=$app" --no-default-app --manage-script-name
|
||||
fi
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user