mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Merge pull request #124 from nlevitt/incomplete-read
IncompleteRead fix with test
This commit is contained in:
commit
10327d28c9
@ -68,7 +68,6 @@ import certauth.certauth
|
|||||||
import warcprox
|
import warcprox
|
||||||
import warcprox.main
|
import warcprox.main
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import http.client as http_client
|
import http.client as http_client
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@ -282,6 +281,15 @@ class _TestHttpRequestHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
payload = b'Test.'
|
payload = b'Test.'
|
||||||
actual_headers = (b'Content-Type: text/plain\r\n'
|
actual_headers = (b'Content-Type: text/plain\r\n'
|
||||||
+ b'Content-Length: ' + str(len(payload)).encode('ascii') + b'\r\n')
|
+ b'Content-Length: ' + str(len(payload)).encode('ascii') + b'\r\n')
|
||||||
|
elif self.path == '/incomplete-read':
|
||||||
|
headers = (b'HTTP/1.1 200 OK\r\n'
|
||||||
|
+ b'Content-Type: text/plain\r\n'
|
||||||
|
+ b'Transfer-Encoding: chunked\r\n'
|
||||||
|
+ b'\r\n')
|
||||||
|
# payload = b'''1\r\na'''
|
||||||
|
payload = chunkify(
|
||||||
|
b'Server closes connection when client expects next chunk')
|
||||||
|
payload = payload[:-7]
|
||||||
else:
|
else:
|
||||||
payload = b'404 Not Found\n'
|
payload = b'404 Not Found\n'
|
||||||
headers = (b'HTTP/1.1 404 Not Found\r\n'
|
headers = (b'HTTP/1.1 404 Not Found\r\n'
|
||||||
@ -295,7 +303,9 @@ class _TestHttpRequestHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
headers, payload = self.build_response()
|
headers, payload = self.build_response()
|
||||||
self.connection.sendall(headers)
|
self.connection.sendall(headers)
|
||||||
self.connection.sendall(payload)
|
self.connection.sendall(payload)
|
||||||
if self.path in ('/missing-content-length', '/empty-response'):
|
if self.path in (
|
||||||
|
'/missing-content-length', '/empty-response',
|
||||||
|
'/incomplete-read'):
|
||||||
# server must close the connection, else client has no idea if
|
# server must close the connection, else client has no idea if
|
||||||
# there is more data coming
|
# there is more data coming
|
||||||
self.connection.shutdown(socket.SHUT_RDWR)
|
self.connection.shutdown(socket.SHUT_RDWR)
|
||||||
@ -1614,13 +1624,11 @@ def test_controller_with_defaults():
|
|||||||
assert not wwp.writer_pool.default_warc_writer.record_builder.base32
|
assert not wwp.writer_pool.default_warc_writer.record_builder.base32
|
||||||
assert wwp.writer_pool.default_warc_writer.record_builder.digest_algorithm == 'sha1'
|
assert wwp.writer_pool.default_warc_writer.record_builder.digest_algorithm == 'sha1'
|
||||||
|
|
||||||
|
|
||||||
class EarlyPlugin(warcprox.BaseStandardPostfetchProcessor):
|
class EarlyPlugin(warcprox.BaseStandardPostfetchProcessor):
|
||||||
CHAIN_POSITION = 'early'
|
CHAIN_POSITION = 'early'
|
||||||
def _process_url(self):
|
def _process_url(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def test_load_plugin():
|
def test_load_plugin():
|
||||||
options = warcprox.Options(port=0, plugins=[
|
options = warcprox.Options(port=0, plugins=[
|
||||||
'warcprox.stats.RunningStats',
|
'warcprox.stats.RunningStats',
|
||||||
@ -2226,6 +2234,18 @@ def test_dedup_min_binary_size(http_daemon, warcprox_, archiving_proxies):
|
|||||||
with pytest.raises(StopIteration):
|
with pytest.raises(StopIteration):
|
||||||
next(rec_iter)
|
next(rec_iter)
|
||||||
|
|
||||||
|
def test_incomplete_read(http_daemon, warcprox_, archiving_proxies):
|
||||||
|
urls_before = warcprox_.proxy.running_stats.urls
|
||||||
|
|
||||||
|
# see https://github.com/internetarchive/warcprox/pull/123
|
||||||
|
url = 'http://localhost:%s/incomplete-read' % http_daemon.server_port
|
||||||
|
with pytest.raises(requests.exceptions.ChunkedEncodingError):
|
||||||
|
response = requests.get(
|
||||||
|
url, proxies=archiving_proxies, verify=False, timeout=10)
|
||||||
|
|
||||||
|
# wait for postfetch chain
|
||||||
|
wait(lambda: warcprox_.proxy.running_stats.urls - urls_before == 1)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
pytest.main()
|
pytest.main()
|
||||||
|
|
||||||
|
@ -487,9 +487,14 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
tmp_file_max_memory_size=self._tmp_file_max_memory_size)
|
tmp_file_max_memory_size=self._tmp_file_max_memory_size)
|
||||||
prox_rec_res.begin(extra_response_headers=extra_response_headers)
|
prox_rec_res.begin(extra_response_headers=extra_response_headers)
|
||||||
|
|
||||||
buf = prox_rec_res.read(65536)
|
buf = None
|
||||||
while buf != b'':
|
while buf != b'':
|
||||||
buf = prox_rec_res.read(65536)
|
try:
|
||||||
|
buf = prox_rec_res.read(65536)
|
||||||
|
except http_client.IncompleteRead as e:
|
||||||
|
self.logger.warn('%s from %s', e, self.url)
|
||||||
|
buf = b''
|
||||||
|
|
||||||
if (self._max_resource_size and
|
if (self._max_resource_size and
|
||||||
prox_rec_res.recorder.len > self._max_resource_size):
|
prox_rec_res.recorder.len > self._max_resource_size):
|
||||||
prox_rec_res.truncated = b'length'
|
prox_rec_res.truncated = b'length'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user