1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

recorder: clean up logging, ReadFullyStream moves to utils, get_request_uri to inputreq

This commit is contained in:
Ilya Kreymer 2016-03-12 22:18:01 -08:00
parent 49b6ae78a8
commit 7a828017d1
2 changed files with 7 additions and 21 deletions

View File

@ -1,8 +1,8 @@
#from gevent import monkey; monkey.patch_all() #from gevent import monkey; monkey.patch_all()
from requests import request as remote_request
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
import requests
from webagg.liverec import ReadFullyStream from webagg.utils import ReadFullyStream
from webagg.responseloader import StreamIter from webagg.responseloader import StreamIter
from webagg.inputrequest import DirectWSGIInputRequest from webagg.inputrequest import DirectWSGIInputRequest
@ -13,7 +13,7 @@ from pywb.warc.recordloader import ArcWarcRecordLoader
from recorder.warcrecorder import SingleFileWARCRecorder, PerRecordWARCRecorder from recorder.warcrecorder import SingleFileWARCRecorder, PerRecordWARCRecorder
from recorder.redisindexer import WritableRedisIndexer from recorder.redisindexer import WritableRedisIndexer
from six.moves.urllib.parse import parse_qsl, quote from six.moves.urllib.parse import parse_qsl
import json import json
import tempfile import tempfile
@ -51,7 +51,6 @@ class RecorderApp(object):
req_head, req_pay, resp_head, resp_pay, params = result req_head, req_pay, resp_head, resp_pay, params = result
if not self.rx_accept_colls.match(resp_head.get('WebAgg-Source-Coll', '')): if not self.rx_accept_colls.match(resp_head.get('WebAgg-Source-Coll', '')):
print('COLL', resp_head)
return return
req = self._create_req_record(req_head, req_pay, 'request') req = self._create_req_record(req_head, req_pay, 'request')
@ -104,24 +103,11 @@ class RecorderApp(object):
start_response('400 Bad Request', headers) start_response('400 Bad Request', headers)
return [message.encode('utf-8')] return [message.encode('utf-8')]
def _get_request_uri(self, env):
req_uri = env.get('REQUEST_URI')
if req_uri:
return req_uri
req_uri = quote(env.get('PATH_INFO', ''), safe='/~!$&\'()*+,;=:@')
query = env.get('QUERY_STRING')
if query:
req_uri += '?' + query
return req_uri
def __call__(self, environ, start_response): def __call__(self, environ, start_response):
request_uri = self._get_request_uri(environ)
input_req = DirectWSGIInputRequest(environ) input_req = DirectWSGIInputRequest(environ)
headers = input_req.get_req_headers() headers = input_req.get_req_headers()
method = input_req.get_req_method() method = input_req.get_req_method()
request_uri = input_req.get_full_request_uri()
input_buff = input_req.get_req_body() input_buff = input_req.get_req_body()
@ -130,7 +116,7 @@ class RecorderApp(object):
req_stream = ReqWrapper(input_buff, headers) req_stream = ReqWrapper(input_buff, headers)
try: try:
res = remote_request(url=self.upstream_host + request_uri, res = requests.request(url=self.upstream_host + request_uri,
method=method, method=method,
data=req_stream, data=req_stream,
headers=headers, headers=headers,
@ -205,3 +191,5 @@ class ReqWrapper(Wrapper):
for n in req_headers.keys(): for n in req_headers.keys():
if not n.upper().startswith('WARC-'): if not n.upper().startswith('WARC-'):
del self.headers[n] del self.headers[n]

View File

@ -258,8 +258,6 @@ class PerRecordWARCRecorder(BaseWARCRecorder):
req_uuid = req.rec_headers['WARC-Record-ID'].split(':')[-1].strip('<> ') req_uuid = req.rec_headers['WARC-Record-ID'].split(':')[-1].strip('<> ')
formatter = ParamFormatter(params, name=self.rec_source_name) formatter = ParamFormatter(params, name=self.rec_source_name)
print(params)
print(formatter.name)
full_dir = formatter.format(self.warcdir) full_dir = formatter.format(self.warcdir)
try: try: