mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
better exception handling, specific status codes for exceptions,
detect access control and not found exceptions more consistently
This commit is contained in:
parent
ebc76c0791
commit
0a2b16407d
@ -19,10 +19,4 @@ class ArchivalRequestRouter:
|
|||||||
handler, wbrequest = self.parse_request(env)
|
handler, wbrequest = self.parse_request(env)
|
||||||
return handler.run(wbrequest)
|
return handler.run(wbrequest)
|
||||||
|
|
||||||
def handle_exception(self, env, exc):
|
|
||||||
return WbResponse.text_response('Error: ' + str(exc), status = '400 Bad Request')
|
|
||||||
|
|
||||||
def handle_not_found(self, env):
|
|
||||||
return WbResponse.text_response('Not Found: ' + env['REQUEST_URI'], status = '404 Not Found')
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import urllib
|
import urllib
|
||||||
import urllib2
|
import urllib2
|
||||||
|
import wbexceptions
|
||||||
|
|
||||||
class RemoteCDXServer:
|
class RemoteCDXServer:
|
||||||
"""
|
"""
|
||||||
@ -27,17 +28,23 @@ class RemoteCDXServer:
|
|||||||
params.update(**kwvalues)
|
params.update(**kwvalues)
|
||||||
|
|
||||||
urlparams = urllib.urlencode(params)
|
urlparams = urllib.urlencode(params)
|
||||||
request = urllib2.Request(self.serverUrl, urlparams)
|
|
||||||
response = urllib2.urlopen(request)
|
try:
|
||||||
|
request = urllib2.Request(self.serverUrl, urlparams)
|
||||||
|
response = urllib2.urlopen(request)
|
||||||
|
except urllib2.HTTPError, e:
|
||||||
|
if e.code == 403:
|
||||||
|
exc_msg = e.read()
|
||||||
|
msg = 'Blocked By Robots' if 'Blocked By Robots' in exc_msg else 'Excluded'
|
||||||
|
raise wbexceptions.AccessException(msg)
|
||||||
|
else:
|
||||||
|
raise e
|
||||||
|
|
||||||
if parse_cdx:
|
if parse_cdx:
|
||||||
return map(CDXCaptureResult, response)
|
return map(CDXCaptureResult, response)
|
||||||
else:
|
else:
|
||||||
return response
|
return response
|
||||||
|
|
||||||
class InvalidCDXException(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class CDXCaptureResult:
|
class CDXCaptureResult:
|
||||||
CDX_FORMATS = [["urlkey","timestamp","original","mimetype","statuscode","digest","redirect","robotflags","length","offset","filename"],
|
CDX_FORMATS = [["urlkey","timestamp","original","mimetype","statuscode","digest","redirect","robotflags","length","offset","filename"],
|
||||||
["urlkey","timestamp","original","mimetype","statuscode","digest","redirect","offset","filename"]]
|
["urlkey","timestamp","original","mimetype","statuscode","digest","redirect","offset","filename"]]
|
||||||
|
9
pywb/utils.py
Normal file
9
pywb/utils.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
import itertools
|
||||||
|
|
||||||
|
def peek_iter(iterable):
|
||||||
|
try:
|
||||||
|
first = next(iterable)
|
||||||
|
except StopIteration:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return itertools.chain([first], iterable)
|
@ -3,11 +3,14 @@ from archiveurl import archiveurl
|
|||||||
from archivalrouter import ArchivalRequestRouter
|
from archivalrouter import ArchivalRequestRouter
|
||||||
import indexreader
|
import indexreader
|
||||||
import json
|
import json
|
||||||
|
import wbexceptions
|
||||||
|
import utils
|
||||||
|
|
||||||
class WBHandler:
|
class WBHandler:
|
||||||
def run(self, wbrequest):
|
def run(self, wbrequest):
|
||||||
wburl = archiveurl(wbrequest.wb_url)
|
wburl = archiveurl(wbrequest.wb_url)
|
||||||
return WbResponse.text_response(repr(wburl))
|
wbrequest.parsed_url = wburl
|
||||||
|
return WbResponse.text_stream(str(vars(wburl)))
|
||||||
|
|
||||||
class QueryHandler:
|
class QueryHandler:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -15,7 +18,6 @@ class QueryHandler:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_query_params(wburl):
|
def get_query_params(wburl):
|
||||||
print wburl.type
|
|
||||||
return {
|
return {
|
||||||
|
|
||||||
archiveurl.QUERY:
|
archiveurl.QUERY:
|
||||||
@ -37,23 +39,27 @@ class QueryHandler:
|
|||||||
|
|
||||||
def run(self, wbrequest):
|
def run(self, wbrequest):
|
||||||
wburl = archiveurl(wbrequest.wb_url)
|
wburl = archiveurl(wbrequest.wb_url)
|
||||||
|
#wburl = wbresponse.body.parsed_url
|
||||||
|
|
||||||
params = QueryHandler.get_query_params(wburl)
|
params = QueryHandler.get_query_params(wburl)
|
||||||
|
|
||||||
#parse_cdx = (wburl.mod == 'json')
|
|
||||||
cdxlines = self.cdxserver.load(wburl.url, params)
|
cdxlines = self.cdxserver.load(wburl.url, params)
|
||||||
|
|
||||||
return WbResponse.text_stream(cdxlines)
|
cdxlines = utils.peek_iter(cdxlines)
|
||||||
|
|
||||||
|
if cdxlines is not None:
|
||||||
|
return WbResponse.text_stream(cdxlines)
|
||||||
|
|
||||||
|
raise wbexceptions.NotFoundException('WB Does Not Have Url: ' + wburl.url)
|
||||||
|
|
||||||
#if parse_cdx:
|
|
||||||
# text = str("\n".join(map(str, cdxlines)))
|
|
||||||
# text = json.dumps(cdxlines, default=lambda o: o.__dict__)
|
|
||||||
#else:
|
|
||||||
# text = cdxlines
|
|
||||||
|
|
||||||
|
|
||||||
## ===========
|
## ===========
|
||||||
parser = ArchivalRequestRouter({'/web/': QueryHandler()}, hostpaths = ['http://localhost:9090/'])
|
parser = ArchivalRequestRouter(
|
||||||
|
{'/t1/' : WBHandler(),
|
||||||
|
'/t2/' : QueryHandler()
|
||||||
|
},
|
||||||
|
hostpaths = ['http://localhost:9090/'])
|
||||||
## ===========
|
## ===========
|
||||||
|
|
||||||
|
|
||||||
@ -63,13 +69,26 @@ def application(env, start_response):
|
|||||||
try:
|
try:
|
||||||
response = parser.handle_request(env)
|
response = parser.handle_request(env)
|
||||||
|
|
||||||
|
if not response:
|
||||||
|
raise wbexceptions.NotFoundException(env['REQUEST_URI'] + ' was not found')
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
last_exc = e
|
last_exc = e
|
||||||
import traceback
|
import traceback
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
response = parser.handle_exception(env, e)
|
response = handle_exception(env, e)
|
||||||
|
|
||||||
if not response:
|
|
||||||
response = parser.handle_not_found(env)
|
|
||||||
|
|
||||||
return response(env, start_response)
|
return response(env, start_response)
|
||||||
|
|
||||||
|
def handle_exception(env, exc):
|
||||||
|
if hasattr(exc, 'status'):
|
||||||
|
status = exc.status()
|
||||||
|
else:
|
||||||
|
status = '400 Bad Request'
|
||||||
|
|
||||||
|
return WbResponse.text_response(status + ' Error: ' + str(exc), status = status)
|
||||||
|
|
||||||
|
#def handle_not_found(env):
|
||||||
|
# return WbResponse.text_response('Not Found: ' + env['REQUEST_URI'], status = '404 Not Found')
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,8 +1,20 @@
|
|||||||
|
|
||||||
class RequestParseException(Exception):
|
class RequestParseException(Exception):
|
||||||
pass
|
def status(_):
|
||||||
|
return '400'
|
||||||
|
|
||||||
class BadUrlException(Exception):
|
class BadUrlException(Exception):
|
||||||
pass
|
def status(_):
|
||||||
|
return '400'
|
||||||
|
|
||||||
|
class AccessException(Exception):
|
||||||
|
def status(_):
|
||||||
|
return '403'
|
||||||
|
|
||||||
|
class InvalidCDXException(Exception):
|
||||||
|
def status(_):
|
||||||
|
return '500'
|
||||||
|
|
||||||
|
class NotFoundException(Exception):
|
||||||
|
def status(_):
|
||||||
|
return '404'
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
|
|
||||||
#WB Request and Response
|
#WB Request and Response
|
||||||
|
|
||||||
class WbRequest:
|
class WbRequest:
|
||||||
@ -57,8 +56,8 @@ class WbResponse:
|
|||||||
return WbResponse(status, value = [text], headersList = [('Content-Type', 'text/plain')])
|
return WbResponse(status, value = [text], headersList = [('Content-Type', 'text/plain')])
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def redir_response(location):
|
def redir_response(location, status = '302 Redirect'):
|
||||||
return WbResponse('302 Redirect', headersList = [('Location', location)])
|
return WbResponse(status, headersList = [('Location', location)])
|
||||||
|
|
||||||
def get_header(self, name):
|
def get_header(self, name):
|
||||||
name_upp = name.upper()
|
name_upp = name.upper()
|
||||||
@ -72,7 +71,12 @@ class WbResponse:
|
|||||||
# headersList.append((key, value))
|
# headersList.append((key, value))
|
||||||
|
|
||||||
start_response(self.status, self.headersList)
|
start_response(self.status, self.headersList)
|
||||||
return self.body
|
|
||||||
|
if hasattr(self.body, '__iter__'):
|
||||||
|
return self.body
|
||||||
|
else:
|
||||||
|
return [str(self.body)]
|
||||||
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return str(vars(self))
|
return str(vars(self))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user