diff --git a/recorder/warcwriter.py b/recorder/warcwriter.py index 7b82d6a0..896ff626 100644 --- a/recorder/warcwriter.py +++ b/recorder/warcwriter.py @@ -128,7 +128,7 @@ class BaseWARCWriter(object): warcinfo.seek(0) record = ArcWarcRecord('warc', 'warcinfo', headers, warcinfo, - None, '', len(warcinfo.getbuffer())) + None, '', len(warcinfo.getvalue())) return record diff --git a/setup.py b/setup.py index 75d69c2e..6bc77d7a 100755 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ setup( 'pywb>=0.30.0', ], dependency_links=[ - 'git+https://github.com/ikreymer/pywb.git@develop#egg=pywb-0.30.0-develop', + #'git+https://github.com/ikreymer/pywb.git@develop#egg=pywb-0.30.0-develop', ], zip_safe=True, entry_points=""" diff --git a/webagg/app.py b/webagg/app.py index c640b654..cb5e8bb0 100644 --- a/webagg/app.py +++ b/webagg/app.py @@ -6,6 +6,7 @@ import traceback import json from six.moves.urllib.parse import parse_qsl +import six JSON_CT = 'application/json; charset=utf-8' @@ -66,31 +67,32 @@ class ResAggApp(object): out_headers, res, errs = result - if res: - if isinstance(res, dict): - res = json.dumps(res).encode('utf-8') - out_headers['Content-Type'] = JSON_CT - out_headers['Content-Length'] = str(len(res)) - res = [res] + if not res: + return self.send_error(errs, start_response) - if errs: - out_headers['ResErrors'] = json.dumps(errs) + if isinstance(res, dict): + res = self.json_encode(res, out_headers) - start_response('200 OK', list(out_headers.items())) - return res + if errs: + out_headers['ResErrors'] = json.dumps(errs) - else: - return self.send_error(out_headers, errs, start_response) + start_response('200 OK', list(out_headers.items())) + return res except Exception as e: message = 'Internal Error: ' + str(e) status = 500 - return self.send_error({}, {}, start_response, + return self.send_error({}, start_response, message=message, status=status) + def json_encode(self, res, out_headers): + res = json.dumps(res).encode('utf-8') + out_headers['Content-Type'] = JSON_CT + out_headers['Content-Length'] = str(len(res)) + return [res] - def send_error(self, out_headers, errs, start_response, + def send_error(self, errs, start_response, message='No Resource Found', status=404): last_exc = errs.pop('last_exc', None) if last_exc: @@ -104,12 +106,15 @@ class ResAggApp(object): if errs: res['errors'] = errs - err_msg = json.dumps(res) + out_headers = {} + res = self.json_encode(res, out_headers) - headers = [('Content-Type', JSON_CT), - ('Content-Length', str(len(err_msg))), - ('ResErrors', err_msg) - ] + if six.PY3: + out_headers['ResErrors'] = res[0].decode('utf-8') + else: + out_headers['ResErrors'] = res[0] + message = message.encode('utf-8') - start_response(str(status) + ' ' + message, headers) - return [err_msg.encode('utf-8')] + message = str(status) + ' ' + message + start_response(message, list(out_headers.items())) + return res diff --git a/webagg/handlers.py b/webagg/handlers.py index 9385e21c..e7e1acf0 100644 --- a/webagg/handlers.py +++ b/webagg/handlers.py @@ -6,6 +6,8 @@ from pywb.utils.wbexception import NotFoundException from pywb.cdx.query import CDXQuery from pywb.cdx.cdxdomainspecific import load_domain_specific_cdx_rules +import six + #============================================================================= def to_cdxj(cdx_iter, fields): @@ -112,11 +114,11 @@ class IndexHandler(object): content_type, res = handler(cdx_iter, fields) out_headers = {'Content-Type': content_type} - def check_str(res): - for x in res: - if isinstance(x, str): - x = x.encode('utf-8') - yield x + def check_str(lines): + for line in lines: + if isinstance(line, six.text_type): + line = line.encode('utf-8') + yield line return out_headers, check_str(res), errs