1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

seq handler: option to filter out bad responses (not 2xx or 3xx) and proceed to next handler. if at last handler, return error response

dockerfile: remove volume, port from base image
error page: add cors headers for error page
This commit is contained in:
Ilya Kreymer 2018-05-09 12:00:03 -07:00
parent 9acad27801
commit be3c3c8778
6 changed files with 42 additions and 10 deletions

View File

@ -19,11 +19,11 @@ RUN python setup.py install
RUN mkdir /webarchive RUN mkdir /webarchive
COPY config.yaml /webarchive/ COPY config.yaml /webarchive/
VOLUME /webarchive #VOLUME /webarchive
WORKDIR /webarchive WORKDIR /webarchive
EXPOSE 8080 #EXPOSE 8080
CMD ["uwsgi", "/uwsgi/uwsgi.ini"] CMD ["uwsgi", "/uwsgi/uwsgi.ini"]

View File

@ -467,7 +467,9 @@ class RewriterApp(object):
def _not_found_response(self, environ, url): def _not_found_response(self, environ, url):
resp = self.not_found_view.render_to_string(environ, url=url) resp = self.not_found_view.render_to_string(environ, url=url)
return WbResponse.text_response(resp, status='404 Not Found', content_type='text/html') return WbResponse.text_response(resp, status='404 Not Found',
content_type='text/html',
headers=[('Access-Control-Allow-Origin', '*')])
def _error_response(self, environ, msg='', details='', status='404 Not Found'): def _error_response(self, environ, msg='', details='', status='404 Not Found'):
resp = self.error_view.render_to_string(environ, resp = self.error_view.render_to_string(environ,

View File

@ -43,11 +43,16 @@ class WbResponse(object):
return WbResponse(status_headers, value=stream) return WbResponse(status_headers, value=stream)
@staticmethod @staticmethod
def text_response(text, status='200 OK', content_type='text/plain; charset=utf-8'): def text_response(text, status='200 OK', content_type='text/plain; charset=utf-8', headers=None):
encoded_text = text.encode('utf-8') encoded_text = text.encode('utf-8')
status_headers = StatusAndHeaders(status,
[('Content-Type', content_type), def_headers = [('Content-Type', content_type),
('Content-Length', str(len(encoded_text)))]) ('Content-Length', str(len(encoded_text)))]
if headers:
def_headers += headers
status_headers = StatusAndHeaders(status, def_headers)
return WbResponse(status_headers, value=[encoded_text]) return WbResponse(status_headers, value=[encoded_text])

View File

@ -7,11 +7,13 @@ from werkzeug.exceptions import HTTPException
import requests import requests
import traceback import traceback
import json import json
import logging
import six import six
JSON_CT = 'application/json; charset=utf-8' JSON_CT = 'application/json; charset=utf-8'
logger = logging.getLogger('warcserver')
#============================================================================= #=============================================================================
class BaseWarcServer(object): class BaseWarcServer(object):
@ -19,6 +21,9 @@ class BaseWarcServer(object):
self.route_dict = {} self.route_dict = {}
self.debug = kwargs.get('debug', False) self.debug = kwargs.get('debug', False)
if self.debug:
logger.setLevel(logging.DEBUG)
self.url_map = Map() self.url_map = Map()
def list_routes(environ): def list_routes(environ):

View File

@ -84,7 +84,7 @@ class IndexHandler(object):
return None, None, errs return None, None, errs
cdx_iter, errs = self._load_index_source(params) cdx_iter, errs = self._load_index_source(params)
if not cdx_iter: if not cdx_iter or errs:
return None, None, errs return None, None, errs
content_type, res = handler(cdx_iter, fields) content_type, res = handler(cdx_iter, fields)
@ -152,8 +152,9 @@ class DefaultResourceHandler(ResourceHandler):
#============================================================================= #=============================================================================
class HandlerSeq(object): class HandlerSeq(object):
def __init__(self, handlers): def __init__(self, handlers, filter_errors=True):
self.handlers = handlers self.handlers = handlers
self.filter_errors = filter_errors
def get_supported_modes(self): def get_supported_modes(self):
if self.handlers: if self.handlers:
@ -163,12 +164,26 @@ class HandlerSeq(object):
def __call__(self, params): def __call__(self, params):
all_errs = {} all_errs = {}
err_res = None
err_out_headers = None
for handler in self.handlers: for handler in self.handlers:
out_headers, res, errs = handler(params) out_headers, res, errs = handler(params)
if out_headers and self.filter_errors:
status = out_headers.get('Warcserver-Status')
if status and not status.startswith(('1', '2', '3')):
errs = {'status_error': status}
err_res = res
err_out_headers = out_headers
res = None
all_errs.update(errs) all_errs.update(errs)
if res is not None: if res is not None:
return out_headers, res, all_errs return out_headers, res, all_errs
if err_res and err_out_headers:
return err_out_headers, err_res, all_errs
return None, None, all_errs return None, None, all_errs

View File

@ -56,6 +56,9 @@ class BaseLoader(object):
out_headers['Warcserver-Cdx'] = to_native_str(cdx.to_cdxj().rstrip()) out_headers['Warcserver-Cdx'] = to_native_str(cdx.to_cdxj().rstrip())
out_headers['Warcserver-Source-Coll'] = to_native_str(source) out_headers['Warcserver-Source-Coll'] = to_native_str(source)
status = cdx.get('status')
if status:
out_headers['Warcserver-Status'] = str(status)
if not warc_headers: if not warc_headers:
if other_headers: if other_headers:
@ -318,6 +321,8 @@ class LiveWebLoader(BaseLoader):
status=upstream_res.status, status=upstream_res.status,
reason=upstream_res.reason) reason=upstream_res.reason)
cdx['status'] = upstream_res.status
http_headers_buff = status http_headers_buff = status
orig_resp = upstream_res._original_response orig_resp = upstream_res._original_response