1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

seq handler: option to filter out bad responses (not 2xx or 3xx) and proceed to next handler. if at last handler, return error response

dockerfile: remove volume, port from base image
error page: add cors headers for error page
This commit is contained in:
Ilya Kreymer 2018-05-09 12:00:03 -07:00
parent 9acad27801
commit be3c3c8778
6 changed files with 42 additions and 10 deletions

View File

@ -19,11 +19,11 @@ RUN python setup.py install
RUN mkdir /webarchive
COPY config.yaml /webarchive/
VOLUME /webarchive
#VOLUME /webarchive
WORKDIR /webarchive
EXPOSE 8080
#EXPOSE 8080
CMD ["uwsgi", "/uwsgi/uwsgi.ini"]

View File

@ -467,7 +467,9 @@ class RewriterApp(object):
def _not_found_response(self, environ, url):
resp = self.not_found_view.render_to_string(environ, url=url)
return WbResponse.text_response(resp, status='404 Not Found', content_type='text/html')
return WbResponse.text_response(resp, status='404 Not Found',
content_type='text/html',
headers=[('Access-Control-Allow-Origin', '*')])
def _error_response(self, environ, msg='', details='', status='404 Not Found'):
resp = self.error_view.render_to_string(environ,

View File

@ -43,11 +43,16 @@ class WbResponse(object):
return WbResponse(status_headers, value=stream)
@staticmethod
def text_response(text, status='200 OK', content_type='text/plain; charset=utf-8'):
def text_response(text, status='200 OK', content_type='text/plain; charset=utf-8', headers=None):
encoded_text = text.encode('utf-8')
status_headers = StatusAndHeaders(status,
[('Content-Type', content_type),
('Content-Length', str(len(encoded_text)))])
def_headers = [('Content-Type', content_type),
('Content-Length', str(len(encoded_text)))]
if headers:
def_headers += headers
status_headers = StatusAndHeaders(status, def_headers)
return WbResponse(status_headers, value=[encoded_text])

View File

@ -7,11 +7,13 @@ from werkzeug.exceptions import HTTPException
import requests
import traceback
import json
import logging
import six
JSON_CT = 'application/json; charset=utf-8'
logger = logging.getLogger('warcserver')
#=============================================================================
class BaseWarcServer(object):
@ -19,6 +21,9 @@ class BaseWarcServer(object):
self.route_dict = {}
self.debug = kwargs.get('debug', False)
if self.debug:
logger.setLevel(logging.DEBUG)
self.url_map = Map()
def list_routes(environ):

View File

@ -84,7 +84,7 @@ class IndexHandler(object):
return None, None, errs
cdx_iter, errs = self._load_index_source(params)
if not cdx_iter:
if not cdx_iter or errs:
return None, None, errs
content_type, res = handler(cdx_iter, fields)
@ -152,8 +152,9 @@ class DefaultResourceHandler(ResourceHandler):
#=============================================================================
class HandlerSeq(object):
def __init__(self, handlers):
def __init__(self, handlers, filter_errors=True):
self.handlers = handlers
self.filter_errors = filter_errors
def get_supported_modes(self):
if self.handlers:
@ -163,12 +164,26 @@ class HandlerSeq(object):
def __call__(self, params):
all_errs = {}
err_res = None
err_out_headers = None
for handler in self.handlers:
out_headers, res, errs = handler(params)
if out_headers and self.filter_errors:
status = out_headers.get('Warcserver-Status')
if status and not status.startswith(('1', '2', '3')):
errs = {'status_error': status}
err_res = res
err_out_headers = out_headers
res = None
all_errs.update(errs)
if res is not None:
return out_headers, res, all_errs
if err_res and err_out_headers:
return err_out_headers, err_res, all_errs
return None, None, all_errs

View File

@ -56,6 +56,9 @@ class BaseLoader(object):
out_headers['Warcserver-Cdx'] = to_native_str(cdx.to_cdxj().rstrip())
out_headers['Warcserver-Source-Coll'] = to_native_str(source)
status = cdx.get('status')
if status:
out_headers['Warcserver-Status'] = str(status)
if not warc_headers:
if other_headers:
@ -318,6 +321,8 @@ class LiveWebLoader(BaseLoader):
status=upstream_res.status,
reason=upstream_res.reason)
cdx['status'] = upstream_res.status
http_headers_buff = status
orig_resp = upstream_res._original_response