mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Merge pull request #124 from nlevitt/incomplete-read
IncompleteRead fix with test
This commit is contained in:
commit
10327d28c9
@ -68,7 +68,6 @@ import certauth.certauth
|
||||
import warcprox
|
||||
import warcprox.main
|
||||
|
||||
|
||||
try:
|
||||
import http.client as http_client
|
||||
except ImportError:
|
||||
@ -282,6 +281,15 @@ class _TestHttpRequestHandler(http_server.BaseHTTPRequestHandler):
|
||||
payload = b'Test.'
|
||||
actual_headers = (b'Content-Type: text/plain\r\n'
|
||||
+ b'Content-Length: ' + str(len(payload)).encode('ascii') + b'\r\n')
|
||||
elif self.path == '/incomplete-read':
|
||||
headers = (b'HTTP/1.1 200 OK\r\n'
|
||||
+ b'Content-Type: text/plain\r\n'
|
||||
+ b'Transfer-Encoding: chunked\r\n'
|
||||
+ b'\r\n')
|
||||
# payload = b'''1\r\na'''
|
||||
payload = chunkify(
|
||||
b'Server closes connection when client expects next chunk')
|
||||
payload = payload[:-7]
|
||||
else:
|
||||
payload = b'404 Not Found\n'
|
||||
headers = (b'HTTP/1.1 404 Not Found\r\n'
|
||||
@ -295,7 +303,9 @@ class _TestHttpRequestHandler(http_server.BaseHTTPRequestHandler):
|
||||
headers, payload = self.build_response()
|
||||
self.connection.sendall(headers)
|
||||
self.connection.sendall(payload)
|
||||
if self.path in ('/missing-content-length', '/empty-response'):
|
||||
if self.path in (
|
||||
'/missing-content-length', '/empty-response',
|
||||
'/incomplete-read'):
|
||||
# server must close the connection, else client has no idea if
|
||||
# there is more data coming
|
||||
self.connection.shutdown(socket.SHUT_RDWR)
|
||||
@ -1614,13 +1624,11 @@ def test_controller_with_defaults():
|
||||
assert not wwp.writer_pool.default_warc_writer.record_builder.base32
|
||||
assert wwp.writer_pool.default_warc_writer.record_builder.digest_algorithm == 'sha1'
|
||||
|
||||
|
||||
class EarlyPlugin(warcprox.BaseStandardPostfetchProcessor):
|
||||
CHAIN_POSITION = 'early'
|
||||
def _process_url(self):
|
||||
pass
|
||||
|
||||
|
||||
def test_load_plugin():
|
||||
options = warcprox.Options(port=0, plugins=[
|
||||
'warcprox.stats.RunningStats',
|
||||
@ -2226,6 +2234,18 @@ def test_dedup_min_binary_size(http_daemon, warcprox_, archiving_proxies):
|
||||
with pytest.raises(StopIteration):
|
||||
next(rec_iter)
|
||||
|
||||
def test_incomplete_read(http_daemon, warcprox_, archiving_proxies):
|
||||
urls_before = warcprox_.proxy.running_stats.urls
|
||||
|
||||
# see https://github.com/internetarchive/warcprox/pull/123
|
||||
url = 'http://localhost:%s/incomplete-read' % http_daemon.server_port
|
||||
with pytest.raises(requests.exceptions.ChunkedEncodingError):
|
||||
response = requests.get(
|
||||
url, proxies=archiving_proxies, verify=False, timeout=10)
|
||||
|
||||
# wait for postfetch chain
|
||||
wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main()
|
||||
|
||||
|
@ -487,9 +487,14 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
tmp_file_max_memory_size=self._tmp_file_max_memory_size)
|
||||
prox_rec_res.begin(extra_response_headers=extra_response_headers)
|
||||
|
||||
buf = prox_rec_res.read(65536)
|
||||
buf = None
|
||||
while buf != b'':
|
||||
buf = prox_rec_res.read(65536)
|
||||
try:
|
||||
buf = prox_rec_res.read(65536)
|
||||
except http_client.IncompleteRead as e:
|
||||
self.logger.warn('%s from %s', e, self.url)
|
||||
buf = b''
|
||||
|
||||
if (self._max_resource_size and
|
||||
prox_rec_res.recorder.len > self._max_resource_size):
|
||||
prox_rec_res.truncated = b'length'
|
||||
|
Loading…
x
Reference in New Issue
Block a user