mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
better exception handling, specific status codes for exceptions,
detect access control and not found exceptions more consistently
This commit is contained in:
parent
ebc76c0791
commit
0a2b16407d
@ -19,10 +19,4 @@ class ArchivalRequestRouter:
|
||||
handler, wbrequest = self.parse_request(env)
|
||||
return handler.run(wbrequest)
|
||||
|
||||
def handle_exception(self, env, exc):
|
||||
return WbResponse.text_response('Error: ' + str(exc), status = '400 Bad Request')
|
||||
|
||||
def handle_not_found(self, env):
|
||||
return WbResponse.text_response('Not Found: ' + env['REQUEST_URI'], status = '404 Not Found')
|
||||
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
import urllib
|
||||
import urllib2
|
||||
import wbexceptions
|
||||
|
||||
class RemoteCDXServer:
|
||||
"""
|
||||
@ -27,17 +28,23 @@ class RemoteCDXServer:
|
||||
params.update(**kwvalues)
|
||||
|
||||
urlparams = urllib.urlencode(params)
|
||||
|
||||
try:
|
||||
request = urllib2.Request(self.serverUrl, urlparams)
|
||||
response = urllib2.urlopen(request)
|
||||
except urllib2.HTTPError, e:
|
||||
if e.code == 403:
|
||||
exc_msg = e.read()
|
||||
msg = 'Blocked By Robots' if 'Blocked By Robots' in exc_msg else 'Excluded'
|
||||
raise wbexceptions.AccessException(msg)
|
||||
else:
|
||||
raise e
|
||||
|
||||
if parse_cdx:
|
||||
return map(CDXCaptureResult, response)
|
||||
else:
|
||||
return response
|
||||
|
||||
class InvalidCDXException(Exception):
|
||||
pass
|
||||
|
||||
class CDXCaptureResult:
|
||||
CDX_FORMATS = [["urlkey","timestamp","original","mimetype","statuscode","digest","redirect","robotflags","length","offset","filename"],
|
||||
["urlkey","timestamp","original","mimetype","statuscode","digest","redirect","offset","filename"]]
|
||||
|
9
pywb/utils.py
Normal file
9
pywb/utils.py
Normal file
@ -0,0 +1,9 @@
|
||||
import itertools
|
||||
|
||||
def peek_iter(iterable):
|
||||
try:
|
||||
first = next(iterable)
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
return itertools.chain([first], iterable)
|
@ -3,11 +3,14 @@ from archiveurl import archiveurl
|
||||
from archivalrouter import ArchivalRequestRouter
|
||||
import indexreader
|
||||
import json
|
||||
import wbexceptions
|
||||
import utils
|
||||
|
||||
class WBHandler:
|
||||
def run(self, wbrequest):
|
||||
wburl = archiveurl(wbrequest.wb_url)
|
||||
return WbResponse.text_response(repr(wburl))
|
||||
wbrequest.parsed_url = wburl
|
||||
return WbResponse.text_stream(str(vars(wburl)))
|
||||
|
||||
class QueryHandler:
|
||||
def __init__(self):
|
||||
@ -15,7 +18,6 @@ class QueryHandler:
|
||||
|
||||
@staticmethod
|
||||
def get_query_params(wburl):
|
||||
print wburl.type
|
||||
return {
|
||||
|
||||
archiveurl.QUERY:
|
||||
@ -37,23 +39,27 @@ class QueryHandler:
|
||||
|
||||
def run(self, wbrequest):
|
||||
wburl = archiveurl(wbrequest.wb_url)
|
||||
#wburl = wbresponse.body.parsed_url
|
||||
|
||||
params = QueryHandler.get_query_params(wburl)
|
||||
|
||||
#parse_cdx = (wburl.mod == 'json')
|
||||
cdxlines = self.cdxserver.load(wburl.url, params)
|
||||
|
||||
cdxlines = utils.peek_iter(cdxlines)
|
||||
|
||||
if cdxlines is not None:
|
||||
return WbResponse.text_stream(cdxlines)
|
||||
|
||||
#if parse_cdx:
|
||||
# text = str("\n".join(map(str, cdxlines)))
|
||||
# text = json.dumps(cdxlines, default=lambda o: o.__dict__)
|
||||
#else:
|
||||
# text = cdxlines
|
||||
raise wbexceptions.NotFoundException('WB Does Not Have Url: ' + wburl.url)
|
||||
|
||||
|
||||
|
||||
## ===========
|
||||
parser = ArchivalRequestRouter({'/web/': QueryHandler()}, hostpaths = ['http://localhost:9090/'])
|
||||
parser = ArchivalRequestRouter(
|
||||
{'/t1/' : WBHandler(),
|
||||
'/t2/' : QueryHandler()
|
||||
},
|
||||
hostpaths = ['http://localhost:9090/'])
|
||||
## ===========
|
||||
|
||||
|
||||
@ -63,13 +69,26 @@ def application(env, start_response):
|
||||
try:
|
||||
response = parser.handle_request(env)
|
||||
|
||||
if not response:
|
||||
raise wbexceptions.NotFoundException(env['REQUEST_URI'] + ' was not found')
|
||||
|
||||
except Exception as e:
|
||||
last_exc = e
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
response = parser.handle_exception(env, e)
|
||||
|
||||
if not response:
|
||||
response = parser.handle_not_found(env)
|
||||
response = handle_exception(env, e)
|
||||
|
||||
return response(env, start_response)
|
||||
|
||||
def handle_exception(env, exc):
|
||||
if hasattr(exc, 'status'):
|
||||
status = exc.status()
|
||||
else:
|
||||
status = '400 Bad Request'
|
||||
|
||||
return WbResponse.text_response(status + ' Error: ' + str(exc), status = status)
|
||||
|
||||
#def handle_not_found(env):
|
||||
# return WbResponse.text_response('Not Found: ' + env['REQUEST_URI'], status = '404 Not Found')
|
||||
|
||||
|
||||
|
@ -1,8 +1,20 @@
|
||||
|
||||
class RequestParseException(Exception):
|
||||
pass
|
||||
def status(_):
|
||||
return '400'
|
||||
|
||||
class BadUrlException(Exception):
|
||||
pass
|
||||
def status(_):
|
||||
return '400'
|
||||
|
||||
class AccessException(Exception):
|
||||
def status(_):
|
||||
return '403'
|
||||
|
||||
class InvalidCDXException(Exception):
|
||||
def status(_):
|
||||
return '500'
|
||||
|
||||
class NotFoundException(Exception):
|
||||
def status(_):
|
||||
return '404'
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
#WB Request and Response
|
||||
|
||||
class WbRequest:
|
||||
@ -57,8 +56,8 @@ class WbResponse:
|
||||
return WbResponse(status, value = [text], headersList = [('Content-Type', 'text/plain')])
|
||||
|
||||
@staticmethod
|
||||
def redir_response(location):
|
||||
return WbResponse('302 Redirect', headersList = [('Location', location)])
|
||||
def redir_response(location, status = '302 Redirect'):
|
||||
return WbResponse(status, headersList = [('Location', location)])
|
||||
|
||||
def get_header(self, name):
|
||||
name_upp = name.upper()
|
||||
@ -72,7 +71,12 @@ class WbResponse:
|
||||
# headersList.append((key, value))
|
||||
|
||||
start_response(self.status, self.headersList)
|
||||
|
||||
if hasattr(self.body, '__iter__'):
|
||||
return self.body
|
||||
else:
|
||||
return [str(self.body)]
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return str(vars(self))
|
||||
|
Loading…
x
Reference in New Issue
Block a user