From 7a828017d1e0a84bc79b6459f2c1478cdf1f40b8 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sat, 12 Mar 2016 22:18:01 -0800 Subject: [PATCH] recorder: clean up logging, ReadFullyStream moves to utils, get_request_uri to inputreq --- recorder/recorderapp.py | 26 +++++++------------------- recorder/warcrecorder.py | 2 -- 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/recorder/recorderapp.py b/recorder/recorderapp.py index b7d91251..3d968edd 100644 --- a/recorder/recorderapp.py +++ b/recorder/recorderapp.py @@ -1,8 +1,8 @@ #from gevent import monkey; monkey.patch_all() -from requests import request as remote_request from requests.structures import CaseInsensitiveDict +import requests -from webagg.liverec import ReadFullyStream +from webagg.utils import ReadFullyStream from webagg.responseloader import StreamIter from webagg.inputrequest import DirectWSGIInputRequest @@ -13,7 +13,7 @@ from pywb.warc.recordloader import ArcWarcRecordLoader from recorder.warcrecorder import SingleFileWARCRecorder, PerRecordWARCRecorder from recorder.redisindexer import WritableRedisIndexer -from six.moves.urllib.parse import parse_qsl, quote +from six.moves.urllib.parse import parse_qsl import json import tempfile @@ -51,7 +51,6 @@ class RecorderApp(object): req_head, req_pay, resp_head, resp_pay, params = result if not self.rx_accept_colls.match(resp_head.get('WebAgg-Source-Coll', '')): - print('COLL', resp_head) return req = self._create_req_record(req_head, req_pay, 'request') @@ -104,24 +103,11 @@ class RecorderApp(object): start_response('400 Bad Request', headers) return [message.encode('utf-8')] - def _get_request_uri(self, env): - req_uri = env.get('REQUEST_URI') - if req_uri: - return req_uri - - req_uri = quote(env.get('PATH_INFO', ''), safe='/~!$&\'()*+,;=:@') - query = env.get('QUERY_STRING') - if query: - req_uri += '?' + query - - return req_uri - def __call__(self, environ, start_response): - request_uri = self._get_request_uri(environ) - input_req = DirectWSGIInputRequest(environ) headers = input_req.get_req_headers() method = input_req.get_req_method() + request_uri = input_req.get_full_request_uri() input_buff = input_req.get_req_body() @@ -130,7 +116,7 @@ class RecorderApp(object): req_stream = ReqWrapper(input_buff, headers) try: - res = remote_request(url=self.upstream_host + request_uri, + res = requests.request(url=self.upstream_host + request_uri, method=method, data=req_stream, headers=headers, @@ -205,3 +191,5 @@ class ReqWrapper(Wrapper): for n in req_headers.keys(): if not n.upper().startswith('WARC-'): del self.headers[n] + + diff --git a/recorder/warcrecorder.py b/recorder/warcrecorder.py index c24e9f63..e75bff05 100644 --- a/recorder/warcrecorder.py +++ b/recorder/warcrecorder.py @@ -258,8 +258,6 @@ class PerRecordWARCRecorder(BaseWARCRecorder): req_uuid = req.rec_headers['WARC-Record-ID'].split(':')[-1].strip('<> ') formatter = ParamFormatter(params, name=self.rec_source_name) - print(params) - print(formatter.name) full_dir = formatter.format(self.warcdir) try: