Merge pull request #124 from nlevitt/incomplete-read

IncompleteRead fix with test
This commit is contained in:
Noah Levitt 2019-04-13 18:10:14 -07:00 committed by GitHub
commit 10327d28c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 31 additions and 6 deletions

View File

@ -68,7 +68,6 @@ import certauth.certauth
import warcprox
import warcprox.main
try:
import http.client as http_client
except ImportError:
@ -282,6 +281,15 @@ class _TestHttpRequestHandler(http_server.BaseHTTPRequestHandler):
payload = b'Test.'
actual_headers = (b'Content-Type: text/plain\r\n'
+ b'Content-Length: ' + str(len(payload)).encode('ascii') + b'\r\n')
elif self.path == '/incomplete-read':
headers = (b'HTTP/1.1 200 OK\r\n'
+ b'Content-Type: text/plain\r\n'
+ b'Transfer-Encoding: chunked\r\n'
+ b'\r\n')
# payload = b'''1\r\na'''
payload = chunkify(
b'Server closes connection when client expects next chunk')
payload = payload[:-7]
else:
payload = b'404 Not Found\n'
headers = (b'HTTP/1.1 404 Not Found\r\n'
@ -295,7 +303,9 @@ class _TestHttpRequestHandler(http_server.BaseHTTPRequestHandler):
headers, payload = self.build_response()
self.connection.sendall(headers)
self.connection.sendall(payload)
if self.path in ('/missing-content-length', '/empty-response'):
if self.path in (
'/missing-content-length', '/empty-response',
'/incomplete-read'):
# server must close the connection, else client has no idea if
# there is more data coming
self.connection.shutdown(socket.SHUT_RDWR)
@ -1614,13 +1624,11 @@ def test_controller_with_defaults():
assert not wwp.writer_pool.default_warc_writer.record_builder.base32
assert wwp.writer_pool.default_warc_writer.record_builder.digest_algorithm == 'sha1'
class EarlyPlugin(warcprox.BaseStandardPostfetchProcessor):
CHAIN_POSITION = 'early'
def _process_url(self):
pass
def test_load_plugin():
options = warcprox.Options(port=0, plugins=[
'warcprox.stats.RunningStats',
@ -2226,6 +2234,18 @@ def test_dedup_min_binary_size(http_daemon, warcprox_, archiving_proxies):
with pytest.raises(StopIteration):
next(rec_iter)
def test_incomplete_read(http_daemon, warcprox_, archiving_proxies):
urls_before = warcprox_.proxy.running_stats.urls
# see https://github.com/internetarchive/warcprox/pull/123
url = 'http://localhost:%s/incomplete-read' % http_daemon.server_port
with pytest.raises(requests.exceptions.ChunkedEncodingError):
response = requests.get(
url, proxies=archiving_proxies, verify=False, timeout=10)
# wait for postfetch chain
wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 1)
if __name__ == '__main__':
pytest.main()

View File

@ -487,9 +487,14 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
tmp_file_max_memory_size=self._tmp_file_max_memory_size)
prox_rec_res.begin(extra_response_headers=extra_response_headers)
buf = prox_rec_res.read(65536)
buf = None
while buf != b'':
buf = prox_rec_res.read(65536)
try:
buf = prox_rec_res.read(65536)
except http_client.IncompleteRead as e:
self.logger.warn('%s from %s', e, self.url)
buf = b''
if (self._max_resource_size and
prox_rec_res.recorder.len > self._max_resource_size):
prox_rec_res.truncated = b'length'