mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Merge pull request #45 from vbanos/return-capture-timestamp
Return capture timestamp
This commit is contained in:
commit
57d7795ced
@ -555,6 +555,22 @@ def test_limits(http_daemon, warcprox_, archiving_proxies):
|
||||
assert response.headers["content-type"] == "text/plain;charset=utf-8"
|
||||
assert response.raw.data == b"request rejected by warcprox: reached limit test_limits_bucket/total/urls=10\n"
|
||||
|
||||
def test_return_capture_timestamp(http_daemon, warcprox_, archiving_proxies):
|
||||
url = 'http://localhost:{}/i/j'.format(http_daemon.server_port)
|
||||
request_meta = {"accept": ["capture-metadata"]}
|
||||
headers = {"Warcprox-Meta": json.dumps(request_meta)}
|
||||
response = requests.get(url, proxies=archiving_proxies, headers=headers, stream=True)
|
||||
assert response.status_code == 200
|
||||
assert response.headers['Warcprox-Meta']
|
||||
data = json.loads(response.headers['Warcprox-Meta'])
|
||||
assert data['capture-metadata']
|
||||
try:
|
||||
dt = datetime.datetime.strptime(data['capture-metadata']['timestamp'],
|
||||
'%Y-%m-%dT%H:%M:%SZ')
|
||||
assert dt
|
||||
except ValueError:
|
||||
pytest.fail('Invalid capture-timestamp format %s', data['capture-timestamp'])
|
||||
|
||||
def test_dedup_buckets(https_daemon, http_daemon, warcprox_, archiving_proxies, playback_proxies):
|
||||
url1 = 'http://localhost:{}/k/l'.format(http_daemon.server_port)
|
||||
url2 = 'https://localhost:{}/k/l'.format(https_daemon.server_port)
|
||||
|
@ -45,6 +45,7 @@ try:
|
||||
http_client._MAXLINE = 4194304 # 4 MiB
|
||||
except ImportError:
|
||||
import httplib as http_client
|
||||
import json
|
||||
import socket
|
||||
import logging
|
||||
import ssl
|
||||
@ -163,13 +164,17 @@ class ProxyingRecordingHTTPResponse(http_client.HTTPResponse):
|
||||
self.fp, proxy_client, digest_algorithm, url=url)
|
||||
self.fp = self.recorder
|
||||
|
||||
def begin(self):
|
||||
def begin(self, extra_response_headers={}):
|
||||
http_client.HTTPResponse.begin(self) # reads status line, headers
|
||||
|
||||
status_and_headers = 'HTTP/1.1 {} {}\r\n'.format(
|
||||
self.status, self.reason)
|
||||
self.msg['Via'] = via_header_value(
|
||||
self.msg.get('Via'), '%0.1f' % (self.version / 10.0))
|
||||
if extra_response_headers:
|
||||
for header, value in extra_response_headers.items():
|
||||
self.msg[header] = value
|
||||
|
||||
for k,v in self.msg.items():
|
||||
if k.lower() not in (
|
||||
'connection', 'proxy-connection', 'keep-alive',
|
||||
@ -361,12 +366,16 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
self.logger.error("exception proxying request", exc_info=True)
|
||||
raise
|
||||
|
||||
def _proxy_request(self):
|
||||
def _proxy_request(self, extra_response_headers={}):
|
||||
'''
|
||||
Sends the request to the remote server, then uses a ProxyingRecorder to
|
||||
read the response and send it to the proxy client, while recording the
|
||||
bytes in transit. Returns a tuple (request, response) where request is
|
||||
the raw request bytes, and response is a ProxyingRecorder.
|
||||
|
||||
:param extra_response_headers: generated on warcprox._proxy_request.
|
||||
It may contain extra HTTP headers such as ``Warcprox-Meta`` which
|
||||
are written in the WARC record for this request.
|
||||
'''
|
||||
# Build request
|
||||
req_str = '{} {} {}\r\n'.format(
|
||||
@ -407,7 +416,7 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
self._remote_server_sock, proxy_client=self.connection,
|
||||
digest_algorithm=self.server.digest_algorithm,
|
||||
url=self.url, method=self.command)
|
||||
prox_rec_res.begin()
|
||||
prox_rec_res.begin(extra_response_headers=extra_response_headers)
|
||||
|
||||
buf = prox_rec_res.read(8192)
|
||||
while buf != b'':
|
||||
|
@ -179,9 +179,14 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
||||
|
||||
remote_ip = self._remote_server_sock.getpeername()[0]
|
||||
timestamp = datetime.datetime.utcnow()
|
||||
extra_response_headers = {}
|
||||
if warcprox_meta and 'accept' in warcprox_meta and \
|
||||
'capture-metadata' in warcprox_meta['accept']:
|
||||
rmeta = {'capture-metadata': {'timestamp': timestamp.strftime('%Y-%m-%dT%H:%M:%SZ')}}
|
||||
extra_response_headers['Warcprox-Meta'] = json.dumps(rmeta, separators=',:')
|
||||
|
||||
req, prox_rec_res = warcprox.mitmproxy.MitmProxyHandler._proxy_request(
|
||||
self)
|
||||
self, extra_response_headers=extra_response_headers)
|
||||
|
||||
content_type = None
|
||||
try:
|
||||
|
Loading…
x
Reference in New Issue
Block a user