1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

better exception handling, specific status codes for exceptions,

detect access control and not found exceptions more consistently
This commit is contained in:
Ilya Kreymer 2013-12-19 12:06:47 -08:00
parent ebc76c0791
commit 0a2b16407d
6 changed files with 76 additions and 31 deletions

View File

@ -19,10 +19,4 @@ class ArchivalRequestRouter:
handler, wbrequest = self.parse_request(env)
return handler.run(wbrequest)
def handle_exception(self, env, exc):
return WbResponse.text_response('Error: ' + str(exc), status = '400 Bad Request')
def handle_not_found(self, env):
return WbResponse.text_response('Not Found: ' + env['REQUEST_URI'], status = '404 Not Found')

View File

@ -1,5 +1,6 @@
import urllib
import urllib2
import wbexceptions
class RemoteCDXServer:
"""
@ -27,17 +28,23 @@ class RemoteCDXServer:
params.update(**kwvalues)
urlparams = urllib.urlencode(params)
request = urllib2.Request(self.serverUrl, urlparams)
response = urllib2.urlopen(request)
try:
request = urllib2.Request(self.serverUrl, urlparams)
response = urllib2.urlopen(request)
except urllib2.HTTPError, e:
if e.code == 403:
exc_msg = e.read()
msg = 'Blocked By Robots' if 'Blocked By Robots' in exc_msg else 'Excluded'
raise wbexceptions.AccessException(msg)
else:
raise e
if parse_cdx:
return map(CDXCaptureResult, response)
else:
return response
class InvalidCDXException(Exception):
pass
class CDXCaptureResult:
CDX_FORMATS = [["urlkey","timestamp","original","mimetype","statuscode","digest","redirect","robotflags","length","offset","filename"],
["urlkey","timestamp","original","mimetype","statuscode","digest","redirect","offset","filename"]]

9
pywb/utils.py Normal file
View File

@ -0,0 +1,9 @@
import itertools
def peek_iter(iterable):
try:
first = next(iterable)
except StopIteration:
return None
return itertools.chain([first], iterable)

View File

@ -3,11 +3,14 @@ from archiveurl import archiveurl
from archivalrouter import ArchivalRequestRouter
import indexreader
import json
import wbexceptions
import utils
class WBHandler:
def run(self, wbrequest):
wburl = archiveurl(wbrequest.wb_url)
return WbResponse.text_response(repr(wburl))
wbrequest.parsed_url = wburl
return WbResponse.text_stream(str(vars(wburl)))
class QueryHandler:
def __init__(self):
@ -15,7 +18,6 @@ class QueryHandler:
@staticmethod
def get_query_params(wburl):
print wburl.type
return {
archiveurl.QUERY:
@ -37,23 +39,27 @@ class QueryHandler:
def run(self, wbrequest):
wburl = archiveurl(wbrequest.wb_url)
#wburl = wbresponse.body.parsed_url
params = QueryHandler.get_query_params(wburl)
#parse_cdx = (wburl.mod == 'json')
cdxlines = self.cdxserver.load(wburl.url, params)
return WbResponse.text_stream(cdxlines)
cdxlines = utils.peek_iter(cdxlines)
if cdxlines is not None:
return WbResponse.text_stream(cdxlines)
raise wbexceptions.NotFoundException('WB Does Not Have Url: ' + wburl.url)
#if parse_cdx:
# text = str("\n".join(map(str, cdxlines)))
# text = json.dumps(cdxlines, default=lambda o: o.__dict__)
#else:
# text = cdxlines
## ===========
parser = ArchivalRequestRouter({'/web/': QueryHandler()}, hostpaths = ['http://localhost:9090/'])
parser = ArchivalRequestRouter(
{'/t1/' : WBHandler(),
'/t2/' : QueryHandler()
},
hostpaths = ['http://localhost:9090/'])
## ===========
@ -63,13 +69,26 @@ def application(env, start_response):
try:
response = parser.handle_request(env)
if not response:
raise wbexceptions.NotFoundException(env['REQUEST_URI'] + ' was not found')
except Exception as e:
last_exc = e
import traceback
traceback.print_exc()
response = parser.handle_exception(env, e)
if not response:
response = parser.handle_not_found(env)
response = handle_exception(env, e)
return response(env, start_response)
def handle_exception(env, exc):
if hasattr(exc, 'status'):
status = exc.status()
else:
status = '400 Bad Request'
return WbResponse.text_response(status + ' Error: ' + str(exc), status = status)
#def handle_not_found(env):
# return WbResponse.text_response('Not Found: ' + env['REQUEST_URI'], status = '404 Not Found')

View File

@ -1,8 +1,20 @@
class RequestParseException(Exception):
pass
def status(_):
return '400'
class BadUrlException(Exception):
pass
def status(_):
return '400'
class AccessException(Exception):
def status(_):
return '403'
class InvalidCDXException(Exception):
def status(_):
return '500'
class NotFoundException(Exception):
def status(_):
return '404'

View File

@ -1,4 +1,3 @@
#WB Request and Response
class WbRequest:
@ -57,8 +56,8 @@ class WbResponse:
return WbResponse(status, value = [text], headersList = [('Content-Type', 'text/plain')])
@staticmethod
def redir_response(location):
return WbResponse('302 Redirect', headersList = [('Location', location)])
def redir_response(location, status = '302 Redirect'):
return WbResponse(status, headersList = [('Location', location)])
def get_header(self, name):
name_upp = name.upper()
@ -72,7 +71,12 @@ class WbResponse:
# headersList.append((key, value))
start_response(self.status, self.headersList)
return self.body
if hasattr(self.body, '__iter__'):
return self.body
else:
return [str(self.body)]
def __repr__(self):
return str(vars(self))