mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Should resolve #4 -- supports pywb running as a non-root app
* Instead of relying on REQUEST_URI, pywb constructs a REL_REQUEST_URI, from PATH_INFO + QUERY_STRING. SCRIPT_NAME auto-added to prefix * MatchPrefix is now superceded by MatchRegex, which can match a plain string -- collId defaults to the full match * Added optional archivalurl_class to router to allow for customized ArchivalUrl implementations to be specified * run.sh can test on a non-root mountpoint, eg. ./run.sh "/approot"
This commit is contained in:
parent
2e4d78d079
commit
80b2585d22
@ -3,51 +3,62 @@ import re
|
|||||||
|
|
||||||
from wbrequestresponse import WbRequest, WbResponse
|
from wbrequestresponse import WbRequest, WbResponse
|
||||||
from url_rewriter import ArchivalUrlRewriter
|
from url_rewriter import ArchivalUrlRewriter
|
||||||
|
from wbarchivalurl import ArchivalUrl
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# ArchivalRequestRouter -- route WB requests in archival mode
|
# ArchivalRequestRouter -- route WB requests in archival mode
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class ArchivalRequestRouter:
|
class ArchivalRequestRouter:
|
||||||
def __init__(self, handlers, hostpaths = None, abs_path = True):
|
def __init__(self, handlers, hostpaths = None, abs_path = True, archivalurl_class = ArchivalUrl):
|
||||||
self.handlers = handlers
|
self.handlers = handlers
|
||||||
self.fallback = ReferRedirect(hostpaths)
|
self.fallback = ReferRedirect(hostpaths)
|
||||||
self.abs_path = abs_path
|
self.abs_path = abs_path
|
||||||
|
self.archivalurl_class = archivalurl_class
|
||||||
|
|
||||||
def __call__(self, env):
|
def __call__(self, env):
|
||||||
for handler in self.handlers:
|
for handler in self.handlers:
|
||||||
result = handler(env, self.abs_path)
|
result = handler(env, self.abs_path, self.archivalurl_class)
|
||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
if not self.fallback:
|
if not self.fallback:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return self.fallback(WbRequest.from_uri(None, env), self.abs_path)
|
return self.fallback(WbRequest.from_uri(None, env))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# Route by matching prefix
|
# Route by matching prefix -- deprecated, as MatchRegex
|
||||||
|
# also supports the same
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
|
||||||
class MatchPrefix:
|
class MatchPrefix:
|
||||||
def __init__(self, prefix, handler):
|
def __init__(self, prefix, handler):
|
||||||
self.prefix = '/' + prefix + '/'
|
self.prefix = '/' + prefix + '/' if prefix else '/'
|
||||||
self.coll = prefix
|
self.coll = prefix
|
||||||
self.handler = handler
|
self.handler = handler
|
||||||
|
|
||||||
|
|
||||||
def __call__(self, env, useAbsPrefix):
|
def __call__(self, env, useAbsPrefix, archivalurl_class):
|
||||||
request_uri = env['REQUEST_URI']
|
request_uri = env['REL_REQUEST_URI']
|
||||||
if not request_uri.startswith(self.prefix):
|
if not request_uri.startswith(self.prefix):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
if self.coll:
|
||||||
|
wb_prefix = env['SCRIPT_NAME'] + self.prefix
|
||||||
|
wb_url = request_uri[len(self.coll) + 1:]
|
||||||
|
else:
|
||||||
|
wb_prefix = env['SCRIPT_NAME'] + self.prefix
|
||||||
|
wb_url = request_uri
|
||||||
|
|
||||||
wbrequest = WbRequest(env,
|
wbrequest = WbRequest(env,
|
||||||
request_uri = request_uri,
|
request_uri = request_uri,
|
||||||
coll = self.coll,
|
coll = self.coll,
|
||||||
wb_url = request_uri[len(self.coll) + 1:],
|
wb_url = wb_url,
|
||||||
wb_prefix = self.prefix,
|
wb_prefix = wb_prefix,
|
||||||
use_abs_prefix = useAbsPrefix)
|
use_abs_prefix = useAbsPrefix,
|
||||||
|
archivalurl_class = archivalurl_class)
|
||||||
|
|
||||||
return self._handleRequest(wbrequest)
|
return self._handleRequest(wbrequest)
|
||||||
|
|
||||||
@ -59,35 +70,53 @@ class MatchPrefix:
|
|||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# Route by matching regex of request uri (excluding first '/')
|
# Route by matching regex of request uri (excluding first '/')
|
||||||
|
# May be a fixed prefix
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class MatchRegex:
|
class MatchRegex:
|
||||||
def __init__(self, regex, handler):
|
def __init__(self, regex, handler, coll_group = 0):
|
||||||
self.regex = re.compile(regex)
|
self.regex = re.compile(regex)
|
||||||
self.handler = handler
|
self.handler = handler
|
||||||
|
# collection id from regex group (default 0)
|
||||||
|
self.coll_group = coll_group
|
||||||
|
|
||||||
|
|
||||||
def __call__(self, env, useAbsPrefix):
|
def __call__(self, env, useAbsPrefix, archivalurl_class):
|
||||||
request_uri = env['REQUEST_URI']
|
request_uri = env['REL_REQUEST_URI']
|
||||||
matcher = self.regex.match(request_uri[1:])
|
matcher = self.regex.match(request_uri[1:])
|
||||||
if not matcher:
|
if not matcher:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
rel_prefix = matcher.group(0)
|
rel_prefix = matcher.group(0)
|
||||||
|
|
||||||
|
if rel_prefix:
|
||||||
|
wb_prefix = env['SCRIPT_NAME'] + '/' + rel_prefix + '/'
|
||||||
|
wb_url = request_uri[len(rel_prefix) + 1:] # remove the '/' + rel_prefix part of uri
|
||||||
|
else:
|
||||||
|
wb_prefix = env['SCRIPT_NAME'] + '/'
|
||||||
|
wb_url = request_uri # the request_uri is the wb_url, since no coll
|
||||||
|
|
||||||
|
coll = matcher.group(self.coll_group)
|
||||||
|
|
||||||
wbrequest = WbRequest(env,
|
wbrequest = WbRequest(env,
|
||||||
request_uri = request_uri,
|
request_uri = request_uri,
|
||||||
coll = matcher.group(1),
|
coll = coll,
|
||||||
wb_url = request_uri[len(rel_prefix) + 1:],
|
wb_url = wb_url,
|
||||||
wb_prefix = '/' + rel_prefix + '/',
|
wb_prefix = wb_prefix,
|
||||||
use_abs_prefix = useAbsPrefix)
|
use_abs_prefix = useAbsPrefix,
|
||||||
|
archivalurl_class = archivalurl_class)
|
||||||
|
|
||||||
|
|
||||||
# Allow for setup of additional filters
|
# Allow for setup of additional filters
|
||||||
self._addFilters(wbrequest, matcher)
|
self._addFilters(wbrequest, matcher)
|
||||||
|
|
||||||
return self.handler(wbrequest)
|
return self._handleRequest(wbrequest)
|
||||||
|
|
||||||
def _addFilters(self, wbrequest, matcher):
|
def _addFilters(self, wbrequest, matcher):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def _handleRequest(self, wbrequest):
|
||||||
|
return self.handler(wbrequest)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# ReferRedirect -- redirect urls that have 'fallen through' based on the referrer settings
|
# ReferRedirect -- redirect urls that have 'fallen through' based on the referrer settings
|
||||||
@ -121,7 +150,7 @@ class ReferRedirect:
|
|||||||
self.matchPrefixs = [matchPrefixs]
|
self.matchPrefixs = [matchPrefixs]
|
||||||
|
|
||||||
|
|
||||||
def __call__(self, wbrequest, abs_path):
|
def __call__(self, wbrequest):
|
||||||
if wbrequest.referrer is None:
|
if wbrequest.referrer is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -152,11 +181,11 @@ if __name__ == "__main__":
|
|||||||
import doctest
|
import doctest
|
||||||
|
|
||||||
def test_redir(matchHost, request_uri, referrer):
|
def test_redir(matchHost, request_uri, referrer):
|
||||||
env = {'REQUEST_URI': request_uri, 'HTTP_REFERER': referrer}
|
env = {'REL_REQUEST_URI': request_uri, 'HTTP_REFERER': referrer}
|
||||||
|
|
||||||
redir = ReferRedirect(matchHost)
|
redir = ReferRedirect(matchHost)
|
||||||
req = WbRequest.from_uri(request_uri, env)
|
req = WbRequest.from_uri(request_uri, env)
|
||||||
rep = redir(req, None)
|
rep = redir(req)
|
||||||
if not rep:
|
if not rep:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -110,21 +110,21 @@ def iso_date_to_timestamp(string):
|
|||||||
# adapted -from wsgiref.request_uri, but doesn't include domain name and allows all characters
|
# adapted -from wsgiref.request_uri, but doesn't include domain name and allows all characters
|
||||||
# allowed in the path segment according to: http://tools.ietf.org/html/rfc3986#section-3.3
|
# allowed in the path segment according to: http://tools.ietf.org/html/rfc3986#section-3.3
|
||||||
# explained here: http://stackoverflow.com/questions/4669692/valid-characters-for-directory-part-of-a-url-for-short-links
|
# explained here: http://stackoverflow.com/questions/4669692/valid-characters-for-directory-part-of-a-url-for-short-links
|
||||||
def request_uri(environ, include_query=1):
|
def rel_request_uri(environ, include_query=1):
|
||||||
"""
|
"""
|
||||||
Return the requested path, optionally including the query string
|
Return the requested path, optionally including the query string
|
||||||
|
|
||||||
# Simple test:
|
# Simple test:
|
||||||
>>> request_uri({'PATH_INFO': '/web/example.com'})
|
>>> rel_request_uri({'PATH_INFO': '/web/example.com'})
|
||||||
'/web/example.com'
|
'/web/example.com'
|
||||||
|
|
||||||
# Test all unecoded special chars and double-quote
|
# Test all unecoded special chars and double-quote
|
||||||
# (double-quote must be encoded but not single quote)
|
# (double-quote must be encoded but not single quote)
|
||||||
>>> request_uri({'PATH_INFO': "/web/example.com/0~!+$&'()*+,;=:\\\""})
|
>>> rel_request_uri({'PATH_INFO': "/web/example.com/0~!+$&'()*+,;=:\\\""})
|
||||||
"/web/example.com/0~!+$&'()*+,;=:%22"
|
"/web/example.com/0~!+$&'()*+,;=:%22"
|
||||||
"""
|
"""
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
url = quote(environ.get('SCRIPT_NAME', '') + environ.get('PATH_INFO',''), safe='/~!$&\'()*+,;=:@')
|
url = quote(environ.get('PATH_INFO',''), safe='/~!$&\'()*+,;=:@')
|
||||||
if include_query and environ.get('QUERY_STRING'):
|
if include_query and environ.get('QUERY_STRING'):
|
||||||
url += '?' + environ['QUERY_STRING']
|
url += '?' + environ['QUERY_STRING']
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from utils import request_uri
|
from utils import rel_request_uri
|
||||||
from query import QueryHandler, EchoEnv, EchoRequest
|
from query import QueryHandler, EchoEnv, EchoRequest
|
||||||
from replay import WBHandler
|
from replay import WBHandler
|
||||||
import wbexceptions
|
import wbexceptions
|
||||||
@ -7,8 +7,6 @@ import indexreader
|
|||||||
from wbrequestresponse import WbResponse, StatusAndHeaders
|
from wbrequestresponse import WbResponse, StatusAndHeaders
|
||||||
from archivalrouter import ArchivalRequestRouter, MatchPrefix
|
from archivalrouter import ArchivalRequestRouter, MatchPrefix
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## ===========
|
## ===========
|
||||||
headInsert = """
|
headInsert = """
|
||||||
|
|
||||||
@ -82,8 +80,11 @@ except:
|
|||||||
|
|
||||||
|
|
||||||
def application(env, start_response):
|
def application(env, start_response):
|
||||||
if not env.get('REQUEST_URI'):
|
|
||||||
env['REQUEST_URI'] = request_uri(env)
|
if env.get('SCRIPT_NAME') or not env.get('REQUEST_URI'):
|
||||||
|
env['REL_REQUEST_URI'] = rel_request_uri(env)
|
||||||
|
else:
|
||||||
|
env['REL_REQUEST_URI'] = env['REQUEST_URI']
|
||||||
|
|
||||||
response = None
|
response = None
|
||||||
|
|
||||||
@ -91,7 +92,7 @@ def application(env, start_response):
|
|||||||
response = wbparser(env)
|
response = wbparser(env)
|
||||||
|
|
||||||
if not response:
|
if not response:
|
||||||
raise wbexceptions.NotFoundException(env['REQUEST_URI'] + ' was not found')
|
raise wbexceptions.NotFoundException(env['REL_REQUEST_URI'] + ' was not found')
|
||||||
|
|
||||||
except wbexceptions.InternalRedirect as ir:
|
except wbexceptions.InternalRedirect as ir:
|
||||||
response = WbResponse(StatusAndHeaders(ir.status, ir.httpHeaders))
|
response = WbResponse(StatusAndHeaders(ir.status, ir.httpHeaders))
|
||||||
@ -117,7 +118,4 @@ def handleException(env, exc):
|
|||||||
|
|
||||||
return WbResponse.text_response(status + ' Error: ' + str(exc), status = status)
|
return WbResponse.text_response(status + ' Error: ' + str(exc), status = status)
|
||||||
|
|
||||||
#def handle_not_found(env):
|
|
||||||
# return WbResponse.text_response('Not Found: ' + env['REQUEST_URI'], status = '404 Not Found')
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -162,7 +162,7 @@ class ArchivalUrl:
|
|||||||
return "/" + url
|
return "/" + url
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return ArchivalUrl.to_str(self.type, self.mod, self.timestamp, self.url)
|
return self.to_str(self.type, self.mod, self.timestamp, self.url)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return str((self.type, self.timestamp, self.mod, self.url, str(self)))
|
return str((self.type, self.timestamp, self.mod, self.url, str(self)))
|
||||||
|
@ -31,7 +31,7 @@ class WbRequest:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def from_uri(request_uri, env = {}, use_abs_prefix = False):
|
def from_uri(request_uri, env = {}, use_abs_prefix = False):
|
||||||
if not request_uri:
|
if not request_uri:
|
||||||
request_uri = env.get('REQUEST_URI')
|
request_uri = env.get('REL_REQUEST_URI')
|
||||||
|
|
||||||
parts = request_uri.split('/', 2)
|
parts = request_uri.split('/', 2)
|
||||||
|
|
||||||
@ -61,14 +61,14 @@ class WbRequest:
|
|||||||
return rel_prefix
|
return rel_prefix
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, env, request_uri, wb_prefix, wb_url, coll, use_abs_prefix = False):
|
def __init__(self, env, request_uri, wb_prefix, wb_url, coll, use_abs_prefix = False, archivalurl_class = ArchivalUrl):
|
||||||
self.env = env
|
self.env = env
|
||||||
|
|
||||||
self.request_uri = request_uri if request_uri else env.get('REQUEST_URI')
|
self.request_uri = request_uri if request_uri else env.get('REL_REQUEST_URI')
|
||||||
|
|
||||||
self.wb_prefix = wb_prefix if not use_abs_prefix else WbRequest.makeAbsPrefix(env, wb_prefix)
|
self.wb_prefix = wb_prefix if not use_abs_prefix else WbRequest.makeAbsPrefix(env, wb_prefix)
|
||||||
|
|
||||||
self.wb_url = ArchivalUrl(wb_url)
|
self.wb_url = archivalurl_class(wb_url)
|
||||||
|
|
||||||
self.coll = coll
|
self.coll = coll
|
||||||
|
|
||||||
|
11
run.sh
11
run.sh
@ -2,10 +2,17 @@
|
|||||||
|
|
||||||
mypath=$(cd `dirname $0` && pwd)
|
mypath=$(cd `dirname $0` && pwd)
|
||||||
|
|
||||||
app=$1
|
app=$2
|
||||||
cd $mypath/pywb
|
cd $mypath/pywb
|
||||||
if [ -z "$app" ]; then
|
if [ -z "$app" ]; then
|
||||||
app=wbapp.py
|
app=wbapp.py
|
||||||
fi
|
fi
|
||||||
|
|
||||||
uwsgi --static-map /static=$mypath/static --http :8080 --wsgi-file $app
|
if [ -z "$1" ]; then
|
||||||
|
# Standard root config
|
||||||
|
uwsgi --static-map /static=$mypath/static --http-socket :8080 --wsgi-file $app
|
||||||
|
else
|
||||||
|
# Test on non-root mount
|
||||||
|
uwsgi --static-map /static=$mypath/static --http-socket :8080 --mount "$1=$app" --no-default-app --manage-script-name
|
||||||
|
fi
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user