mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
hopefully fix test failing occasionally apparently due to race condition by checking that the file we're waiting for has some content
This commit is contained in:
parent
3a0f6e0947
commit
ffc8a268ab
2
setup.py
2
setup.py
@ -51,7 +51,7 @@ except:
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='warcprox',
|
name='warcprox',
|
||||||
version='2.2.1b2.dev114',
|
version='2.2.1b2.dev115',
|
||||||
description='WARC writing MITM HTTP/S proxy',
|
description='WARC writing MITM HTTP/S proxy',
|
||||||
url='https://github.com/internetarchive/warcprox',
|
url='https://github.com/internetarchive/warcprox',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
@ -1441,11 +1441,14 @@ def test_crawl_log(warcprox_, http_daemon, archiving_proxies):
|
|||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
file = os.path.join(warcprox_.options.crawl_log_dir, 'test_crawl_log_1.log')
|
||||||
while time.time() - start < 10:
|
while time.time() - start < 10:
|
||||||
if os.path.exists(os.path.join(
|
if os.path.exists(file) and os.stat(file).st_size > 0:
|
||||||
warcprox_.options.crawl_log_dir, 'test_crawl_log_1.log')):
|
|
||||||
break
|
break
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
assert os.path.exists(file)
|
||||||
|
assert os.path.exists(os.path.join(
|
||||||
|
warcprox_.options.crawl_log_dir, 'crawl.log'))
|
||||||
|
|
||||||
crawl_log = open(os.path.join(
|
crawl_log = open(os.path.join(
|
||||||
warcprox_.options.crawl_log_dir, 'crawl.log'), 'rb').read()
|
warcprox_.options.crawl_log_dir, 'crawl.log'), 'rb').read()
|
||||||
@ -1499,14 +1502,14 @@ def test_crawl_log(warcprox_, http_daemon, archiving_proxies):
|
|||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
file = os.path.join(warcprox_.options.crawl_log_dir, 'test_crawl_log_2.log')
|
||||||
while time.time() - start < 10:
|
while time.time() - start < 10:
|
||||||
if os.path.exists(os.path.join(
|
if os.path.exists(file) and os.stat(file).st_size > 0:
|
||||||
warcprox_.options.crawl_log_dir, 'test_crawl_log_2.log')):
|
|
||||||
break
|
break
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
assert os.path.exists(file)
|
||||||
|
|
||||||
crawl_log_2 = open(os.path.join(
|
crawl_log_2 = open(file, 'rb').read()
|
||||||
warcprox_.options.crawl_log_dir, 'test_crawl_log_2.log'), 'rb').read()
|
|
||||||
|
|
||||||
assert re.match(b'\A2[^\n]+\n\Z', crawl_log_2)
|
assert re.match(b'\A2[^\n]+\n\Z', crawl_log_2)
|
||||||
assert crawl_log_2[24:31] == b' 200 '
|
assert crawl_log_2[24:31] == b' 200 '
|
||||||
@ -1533,16 +1536,15 @@ def test_crawl_log(warcprox_, http_daemon, archiving_proxies):
|
|||||||
headers = {'Warcprox-Meta': json.dumps({'warc-prefix': 'test_crawl_log_3'})}
|
headers = {'Warcprox-Meta': json.dumps({'warc-prefix': 'test_crawl_log_3'})}
|
||||||
response = requests.head(url, proxies=archiving_proxies, headers=headers)
|
response = requests.head(url, proxies=archiving_proxies, headers=headers)
|
||||||
|
|
||||||
|
file = os.path.join(warcprox_.options.crawl_log_dir, 'test_crawl_log_3.log')
|
||||||
start = time.time()
|
start = time.time()
|
||||||
while time.time() - start < 10:
|
while time.time() - start < 10:
|
||||||
if os.path.exists(os.path.join(
|
if os.path.exists(file) and os.stat(file).st_size > 0:
|
||||||
warcprox_.options.crawl_log_dir, 'test_crawl_log_3.log')):
|
|
||||||
break
|
break
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
|
||||||
crawl_log_3 = open(os.path.join(
|
assert os.path.exists(file)
|
||||||
warcprox_.options.crawl_log_dir, 'test_crawl_log_3.log'), 'rb').read()
|
crawl_log_3 = open(file, 'rb').read()
|
||||||
|
|
||||||
assert re.match(b'\A2[^\n]+\n\Z', crawl_log_3)
|
assert re.match(b'\A2[^\n]+\n\Z', crawl_log_3)
|
||||||
assert crawl_log_3[24:31] == b' 200 '
|
assert crawl_log_3[24:31] == b' 200 '
|
||||||
assert crawl_log_3[31:42] == b' 0 '
|
assert crawl_log_3[31:42] == b' 0 '
|
||||||
@ -1575,14 +1577,14 @@ def test_crawl_log(warcprox_, http_daemon, archiving_proxies):
|
|||||||
assert response.status_code == 204
|
assert response.status_code == 204
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
file = os.path.join(warcprox_.options.crawl_log_dir, 'test_crawl_log_4.log')
|
||||||
while time.time() - start < 10:
|
while time.time() - start < 10:
|
||||||
if os.path.exists(os.path.join(
|
if os.path.exists(file) and os.stat(file).st_size > 0:
|
||||||
warcprox_.options.crawl_log_dir, 'test_crawl_log_4.log')):
|
|
||||||
break
|
break
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
|
||||||
crawl_log_4 = open(os.path.join(
|
assert os.path.exists(file)
|
||||||
warcprox_.options.crawl_log_dir, 'test_crawl_log_4.log'), 'rb').read()
|
crawl_log_4 = open(file, 'rb').read()
|
||||||
|
|
||||||
assert re.match(b'\A2[^\n]+\n\Z', crawl_log_4)
|
assert re.match(b'\A2[^\n]+\n\Z', crawl_log_4)
|
||||||
assert crawl_log_4[24:31] == b' 204 '
|
assert crawl_log_4[24:31] == b' 204 '
|
||||||
|
Loading…
x
Reference in New Issue
Block a user