mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
recorder: clean up logging, ReadFullyStream moves to utils, get_request_uri to inputreq
This commit is contained in:
parent
49b6ae78a8
commit
7a828017d1
@ -1,8 +1,8 @@
|
|||||||
#from gevent import monkey; monkey.patch_all()
|
#from gevent import monkey; monkey.patch_all()
|
||||||
from requests import request as remote_request
|
|
||||||
from requests.structures import CaseInsensitiveDict
|
from requests.structures import CaseInsensitiveDict
|
||||||
|
import requests
|
||||||
|
|
||||||
from webagg.liverec import ReadFullyStream
|
from webagg.utils import ReadFullyStream
|
||||||
from webagg.responseloader import StreamIter
|
from webagg.responseloader import StreamIter
|
||||||
from webagg.inputrequest import DirectWSGIInputRequest
|
from webagg.inputrequest import DirectWSGIInputRequest
|
||||||
|
|
||||||
@ -13,7 +13,7 @@ from pywb.warc.recordloader import ArcWarcRecordLoader
|
|||||||
from recorder.warcrecorder import SingleFileWARCRecorder, PerRecordWARCRecorder
|
from recorder.warcrecorder import SingleFileWARCRecorder, PerRecordWARCRecorder
|
||||||
from recorder.redisindexer import WritableRedisIndexer
|
from recorder.redisindexer import WritableRedisIndexer
|
||||||
|
|
||||||
from six.moves.urllib.parse import parse_qsl, quote
|
from six.moves.urllib.parse import parse_qsl
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import tempfile
|
import tempfile
|
||||||
@ -51,7 +51,6 @@ class RecorderApp(object):
|
|||||||
req_head, req_pay, resp_head, resp_pay, params = result
|
req_head, req_pay, resp_head, resp_pay, params = result
|
||||||
|
|
||||||
if not self.rx_accept_colls.match(resp_head.get('WebAgg-Source-Coll', '')):
|
if not self.rx_accept_colls.match(resp_head.get('WebAgg-Source-Coll', '')):
|
||||||
print('COLL', resp_head)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
req = self._create_req_record(req_head, req_pay, 'request')
|
req = self._create_req_record(req_head, req_pay, 'request')
|
||||||
@ -104,24 +103,11 @@ class RecorderApp(object):
|
|||||||
start_response('400 Bad Request', headers)
|
start_response('400 Bad Request', headers)
|
||||||
return [message.encode('utf-8')]
|
return [message.encode('utf-8')]
|
||||||
|
|
||||||
def _get_request_uri(self, env):
|
|
||||||
req_uri = env.get('REQUEST_URI')
|
|
||||||
if req_uri:
|
|
||||||
return req_uri
|
|
||||||
|
|
||||||
req_uri = quote(env.get('PATH_INFO', ''), safe='/~!$&\'()*+,;=:@')
|
|
||||||
query = env.get('QUERY_STRING')
|
|
||||||
if query:
|
|
||||||
req_uri += '?' + query
|
|
||||||
|
|
||||||
return req_uri
|
|
||||||
|
|
||||||
def __call__(self, environ, start_response):
|
def __call__(self, environ, start_response):
|
||||||
request_uri = self._get_request_uri(environ)
|
|
||||||
|
|
||||||
input_req = DirectWSGIInputRequest(environ)
|
input_req = DirectWSGIInputRequest(environ)
|
||||||
headers = input_req.get_req_headers()
|
headers = input_req.get_req_headers()
|
||||||
method = input_req.get_req_method()
|
method = input_req.get_req_method()
|
||||||
|
request_uri = input_req.get_full_request_uri()
|
||||||
|
|
||||||
input_buff = input_req.get_req_body()
|
input_buff = input_req.get_req_body()
|
||||||
|
|
||||||
@ -130,7 +116,7 @@ class RecorderApp(object):
|
|||||||
req_stream = ReqWrapper(input_buff, headers)
|
req_stream = ReqWrapper(input_buff, headers)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
res = remote_request(url=self.upstream_host + request_uri,
|
res = requests.request(url=self.upstream_host + request_uri,
|
||||||
method=method,
|
method=method,
|
||||||
data=req_stream,
|
data=req_stream,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
@ -205,3 +191,5 @@ class ReqWrapper(Wrapper):
|
|||||||
for n in req_headers.keys():
|
for n in req_headers.keys():
|
||||||
if not n.upper().startswith('WARC-'):
|
if not n.upper().startswith('WARC-'):
|
||||||
del self.headers[n]
|
del self.headers[n]
|
||||||
|
|
||||||
|
|
||||||
|
@ -258,8 +258,6 @@ class PerRecordWARCRecorder(BaseWARCRecorder):
|
|||||||
req_uuid = req.rec_headers['WARC-Record-ID'].split(':')[-1].strip('<> ')
|
req_uuid = req.rec_headers['WARC-Record-ID'].split(':')[-1].strip('<> ')
|
||||||
|
|
||||||
formatter = ParamFormatter(params, name=self.rec_source_name)
|
formatter = ParamFormatter(params, name=self.rec_source_name)
|
||||||
print(params)
|
|
||||||
print(formatter.name)
|
|
||||||
full_dir = formatter.format(self.warcdir)
|
full_dir = formatter.format(self.warcdir)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user