2017-12-21 14:31:54 -08:00
|
|
|
'''
|
|
|
|
tests/test_writer.py - warcprox warc writing tests
|
|
|
|
|
2019-01-07 15:54:35 -08:00
|
|
|
Copyright (C) 2017-2019 Internet Archive
|
2017-12-21 14:31:54 -08:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU General Public License
|
|
|
|
as published by the Free Software Foundation; either version 2
|
|
|
|
of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
|
|
|
USA.
|
|
|
|
'''
|
|
|
|
|
2017-10-28 14:36:16 +03:00
|
|
|
import os
|
|
|
|
import fcntl
|
|
|
|
from multiprocessing import Process, Queue
|
2018-07-21 11:20:49 +00:00
|
|
|
from datetime import datetime, timedelta
|
2017-10-28 14:36:16 +03:00
|
|
|
import pytest
|
2018-01-10 20:38:06 +00:00
|
|
|
import re
|
2017-10-28 14:36:16 +03:00
|
|
|
from warcprox.mitmproxy import ProxyingRecorder
|
|
|
|
from warcprox.warcproxy import RecordedUrl
|
|
|
|
from warcprox.writer import WarcWriter
|
|
|
|
from warcprox import Options
|
2017-12-21 14:31:54 -08:00
|
|
|
import time
|
|
|
|
import warcprox
|
|
|
|
import io
|
|
|
|
import tempfile
|
|
|
|
import logging
|
2018-08-16 11:06:58 -07:00
|
|
|
import hashlib
|
2018-10-30 13:05:45 -07:00
|
|
|
import queue
|
2019-01-08 11:27:11 -08:00
|
|
|
import sys
|
|
|
|
|
|
|
|
logging.basicConfig(
|
|
|
|
stream=sys.stdout, level=logging.TRACE,
|
|
|
|
format='%(asctime)s %(process)d %(levelname)s %(threadName)s '
|
|
|
|
'%(name)s.%(funcName)s(%(filename)s:%(lineno)d) %(message)s')
|
2017-10-28 14:36:16 +03:00
|
|
|
|
2018-10-30 13:05:45 -07:00
|
|
|
def lock_file(q, filename):
|
2017-10-28 14:36:16 +03:00
|
|
|
"""Try to lock file and return 1 if successful, else return 0.
|
|
|
|
It is necessary to run this method in a different process to test locking.
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
fi = open(filename, 'ab')
|
2017-10-28 21:13:23 +03:00
|
|
|
fcntl.lockf(fi, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
2017-10-28 14:36:16 +03:00
|
|
|
fi.close()
|
2018-10-30 13:05:45 -07:00
|
|
|
q.put('OBTAINED LOCK')
|
2017-10-28 14:36:16 +03:00
|
|
|
except IOError:
|
2018-10-30 13:05:45 -07:00
|
|
|
q.put('FAILED TO OBTAIN LOCK')
|
2017-10-28 14:36:16 +03:00
|
|
|
|
2017-11-01 17:50:46 +00:00
|
|
|
def test_warc_writer_locking(tmpdir):
|
2017-10-28 14:36:16 +03:00
|
|
|
"""Test if WarcWriter is locking WARC files.
|
|
|
|
When we don't have the .open suffix, WarcWriter locks the file and the
|
|
|
|
external process trying to ``lock_file`` fails (result=0).
|
|
|
|
"""
|
2017-12-21 14:31:54 -08:00
|
|
|
recorder = ProxyingRecorder(None, None, 'sha1', url='http://example.com')
|
|
|
|
recorded_url = RecordedUrl(
|
|
|
|
url='http://example.com', content_type='text/plain', status=200,
|
|
|
|
client_ip='127.0.0.2', request_data=b'abc',
|
|
|
|
response_recorder=recorder, remote_ip='127.0.0.3',
|
2018-08-16 11:06:58 -07:00
|
|
|
timestamp=datetime.utcnow(), payload_digest=hashlib.sha1())
|
2017-12-21 14:31:54 -08:00
|
|
|
|
2017-10-28 15:32:04 +03:00
|
|
|
dirname = os.path.dirname(str(tmpdir.mkdir('test-warc-writer')))
|
2018-02-05 17:22:09 -08:00
|
|
|
wwriter = WarcWriter(Options(
|
2019-01-07 15:54:35 -08:00
|
|
|
directory=dirname, no_warc_open_suffix=True))
|
2017-10-28 14:36:16 +03:00
|
|
|
wwriter.write_records(recorded_url)
|
2017-11-01 17:50:46 +00:00
|
|
|
warcs = [fn for fn in os.listdir(dirname) if fn.endswith('.warc')]
|
2017-10-28 14:36:16 +03:00
|
|
|
assert warcs
|
|
|
|
target_warc = os.path.join(dirname, warcs[0])
|
|
|
|
# launch another process and try to lock WARC file
|
2018-10-30 13:05:45 -07:00
|
|
|
q = Queue()
|
|
|
|
p = Process(target=lock_file, args=(q, target_warc))
|
2017-10-28 14:36:16 +03:00
|
|
|
p.start()
|
|
|
|
p.join()
|
2018-10-30 13:05:45 -07:00
|
|
|
assert q.get() == 'FAILED TO OBTAIN LOCK'
|
2019-01-07 15:54:35 -08:00
|
|
|
wwriter.close()
|
2017-11-01 17:50:46 +00:00
|
|
|
|
|
|
|
# locking must succeed after writer has closed the WARC file.
|
2018-10-30 13:05:45 -07:00
|
|
|
p = Process(target=lock_file, args=(q, target_warc))
|
2017-11-01 17:50:46 +00:00
|
|
|
p.start()
|
|
|
|
p.join()
|
2018-10-30 13:05:45 -07:00
|
|
|
assert q.get() == 'OBTAINED LOCK'
|
2017-12-21 14:31:54 -08:00
|
|
|
|
|
|
|
def wait(callback, timeout):
|
|
|
|
start = time.time()
|
|
|
|
while time.time() - start < timeout:
|
|
|
|
if callback():
|
|
|
|
return
|
|
|
|
time.sleep(0.5)
|
|
|
|
raise Exception('timed out waiting for %s to return truthy' % callback)
|
|
|
|
|
|
|
|
def test_special_dont_write_prefix():
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
|
|
logging.debug('cd %s', tmpdir)
|
|
|
|
os.chdir(tmpdir)
|
|
|
|
|
2019-01-07 15:54:35 -08:00
|
|
|
wwt = warcprox.writerthread.WarcWriterProcessor(Options(prefix='-'))
|
2018-10-30 13:05:45 -07:00
|
|
|
wwt.inq = queue.Queue(maxsize=1)
|
|
|
|
wwt.outq = queue.Queue(maxsize=1)
|
2017-12-21 14:31:54 -08:00
|
|
|
try:
|
|
|
|
wwt.start()
|
|
|
|
# not to be written due to default prefix
|
|
|
|
recorder = ProxyingRecorder(io.BytesIO(b'some payload'), None)
|
|
|
|
recorder.read()
|
2018-01-17 15:33:41 -08:00
|
|
|
wwt.inq.put(RecordedUrl(
|
2017-12-21 14:31:54 -08:00
|
|
|
url='http://example.com/no', content_type='text/plain',
|
|
|
|
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
|
|
|
response_recorder=recorder, remote_ip='127.0.0.3',
|
|
|
|
timestamp=datetime.utcnow(),
|
|
|
|
payload_digest=recorder.block_digest))
|
|
|
|
# to be written due to warcprox-meta prefix
|
|
|
|
recorder = ProxyingRecorder(io.BytesIO(b'some payload'), None)
|
|
|
|
recorder.read()
|
2018-01-17 15:33:41 -08:00
|
|
|
wwt.inq.put(RecordedUrl(
|
2017-12-21 14:31:54 -08:00
|
|
|
url='http://example.com/yes', content_type='text/plain',
|
|
|
|
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
|
|
|
response_recorder=recorder, remote_ip='127.0.0.3',
|
|
|
|
timestamp=datetime.utcnow(),
|
|
|
|
payload_digest=recorder.block_digest,
|
|
|
|
warcprox_meta={'warc-prefix': 'normal-warc-prefix'}))
|
2018-01-17 15:33:41 -08:00
|
|
|
recorded_url = wwt.outq.get(timeout=10)
|
2018-01-15 14:37:27 -08:00
|
|
|
assert not recorded_url.warc_records
|
2018-01-17 15:33:41 -08:00
|
|
|
recorded_url = wwt.outq.get(timeout=10)
|
2018-01-15 14:37:27 -08:00
|
|
|
assert recorded_url.warc_records
|
2018-01-17 15:33:41 -08:00
|
|
|
assert wwt.outq.empty()
|
2017-12-21 14:31:54 -08:00
|
|
|
finally:
|
|
|
|
wwt.stop.set()
|
|
|
|
wwt.join()
|
|
|
|
|
2018-02-05 17:22:09 -08:00
|
|
|
wwt = warcprox.writerthread.WarcWriterProcessor(
|
2019-01-07 15:54:35 -08:00
|
|
|
Options(blackout_period=60, prefix='foo'))
|
2018-10-30 13:05:45 -07:00
|
|
|
wwt.inq = queue.Queue(maxsize=1)
|
|
|
|
wwt.outq = queue.Queue(maxsize=1)
|
2017-12-21 14:31:54 -08:00
|
|
|
try:
|
|
|
|
wwt.start()
|
|
|
|
# to be written due to default prefix
|
|
|
|
recorder = ProxyingRecorder(io.BytesIO(b'some payload'), None)
|
|
|
|
recorder.read()
|
2018-01-17 15:33:41 -08:00
|
|
|
wwt.inq.put(RecordedUrl(
|
2017-12-21 14:31:54 -08:00
|
|
|
url='http://example.com/yes', content_type='text/plain',
|
|
|
|
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
|
|
|
response_recorder=recorder, remote_ip='127.0.0.3',
|
|
|
|
timestamp=datetime.utcnow(),
|
|
|
|
payload_digest=recorder.block_digest))
|
|
|
|
# not to be written due to warcprox-meta prefix
|
|
|
|
recorder = ProxyingRecorder(io.BytesIO(b'some payload'), None)
|
|
|
|
recorder.read()
|
2018-01-17 15:33:41 -08:00
|
|
|
wwt.inq.put(RecordedUrl(
|
2017-12-21 14:31:54 -08:00
|
|
|
url='http://example.com/no', content_type='text/plain',
|
|
|
|
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
|
|
|
response_recorder=recorder, remote_ip='127.0.0.3',
|
|
|
|
timestamp=datetime.utcnow(),
|
|
|
|
payload_digest=recorder.block_digest,
|
|
|
|
warcprox_meta={'warc-prefix': '-'}))
|
2018-01-17 15:33:41 -08:00
|
|
|
recorded_url = wwt.outq.get(timeout=10)
|
2018-01-15 14:37:27 -08:00
|
|
|
assert recorded_url.warc_records
|
2018-01-17 15:33:41 -08:00
|
|
|
recorded_url = wwt.outq.get(timeout=10)
|
2018-01-15 14:37:27 -08:00
|
|
|
assert not recorded_url.warc_records
|
2018-01-17 15:33:41 -08:00
|
|
|
assert wwt.outq.empty()
|
2018-07-21 11:20:49 +00:00
|
|
|
|
|
|
|
# test blackout_period option. Write first revisit record because
|
|
|
|
# its outside the blackout_period (60). Do not write the second
|
|
|
|
# because its inside the blackout_period.
|
|
|
|
recorder = ProxyingRecorder(io.BytesIO(b'test1'), None)
|
|
|
|
recorder.read()
|
|
|
|
old = datetime.utcnow() - timedelta(0, 3600)
|
|
|
|
ru = RecordedUrl(
|
2018-07-24 07:16:21 +00:00
|
|
|
url='http://example.com/dup',
|
2018-07-21 11:20:49 +00:00
|
|
|
content_type='text/plain',
|
|
|
|
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
|
|
|
response_recorder=recorder, remote_ip='127.0.0.3',
|
|
|
|
timestamp=datetime.utcnow(),
|
|
|
|
payload_digest=recorder.block_digest)
|
|
|
|
ru.dedup_info = dict(id=b'1', url=b'http://example.com/dup',
|
|
|
|
date=old.strftime('%Y-%m-%dT%H:%M:%SZ').encode('utf-8'))
|
|
|
|
wwt.inq.put(ru)
|
|
|
|
recorded_url = wwt.outq.get(timeout=10)
|
|
|
|
recorder = ProxyingRecorder(io.BytesIO(b'test2'), None)
|
|
|
|
recorder.read()
|
|
|
|
recent = datetime.utcnow() - timedelta(0, 5)
|
|
|
|
ru = RecordedUrl(
|
2018-07-24 07:16:21 +00:00
|
|
|
url='http://example.com/dup', content_type='text/plain',
|
2018-07-21 11:20:49 +00:00
|
|
|
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
|
|
|
response_recorder=recorder, remote_ip='127.0.0.3',
|
|
|
|
timestamp=datetime.utcnow(),
|
|
|
|
payload_digest=recorder.block_digest)
|
|
|
|
ru.dedup_info = dict(id=b'2', url=b'http://example.com/dup',
|
|
|
|
date=recent.strftime('%Y-%m-%dT%H:%M:%SZ').encode('utf-8'))
|
|
|
|
wwt.inq.put(ru)
|
|
|
|
assert recorded_url.warc_records
|
|
|
|
recorded_url = wwt.outq.get(timeout=10)
|
|
|
|
assert not recorded_url.warc_records
|
|
|
|
assert wwt.outq.empty()
|
|
|
|
|
2018-02-27 22:23:40 -08:00
|
|
|
finally:
|
|
|
|
wwt.stop.set()
|
|
|
|
wwt.join()
|
|
|
|
|
|
|
|
def test_do_not_archive():
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
|
|
logging.debug('cd %s', tmpdir)
|
|
|
|
os.chdir(tmpdir)
|
|
|
|
|
2019-01-07 15:54:35 -08:00
|
|
|
wwt = warcprox.writerthread.WarcWriterProcessor()
|
2018-10-30 13:05:45 -07:00
|
|
|
wwt.inq = queue.Queue(maxsize=1)
|
|
|
|
wwt.outq = queue.Queue(maxsize=1)
|
2018-02-27 22:23:40 -08:00
|
|
|
try:
|
|
|
|
wwt.start()
|
|
|
|
# to be written -- default do_not_archive False
|
|
|
|
recorder = ProxyingRecorder(io.BytesIO(b'some payload'), None)
|
|
|
|
recorder.read()
|
|
|
|
wwt.inq.put(RecordedUrl(
|
|
|
|
url='http://example.com/yes', content_type='text/plain',
|
|
|
|
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
|
|
|
response_recorder=recorder, remote_ip='127.0.0.3',
|
|
|
|
timestamp=datetime.utcnow(),
|
|
|
|
payload_digest=recorder.block_digest))
|
|
|
|
# not to be written -- do_not_archive set True
|
|
|
|
recorder = ProxyingRecorder(io.BytesIO(b'some payload'), None)
|
|
|
|
recorder.read()
|
|
|
|
wwt.inq.put(RecordedUrl(
|
|
|
|
url='http://example.com/no', content_type='text/plain',
|
|
|
|
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
|
|
|
response_recorder=recorder, remote_ip='127.0.0.3',
|
|
|
|
timestamp=datetime.utcnow(),
|
|
|
|
payload_digest=recorder.block_digest,
|
|
|
|
warcprox_meta={'warc-prefix': '-'},
|
|
|
|
do_not_archive=True))
|
|
|
|
recorded_url = wwt.outq.get(timeout=10)
|
|
|
|
assert recorded_url.warc_records
|
|
|
|
recorded_url = wwt.outq.get(timeout=10)
|
|
|
|
assert not recorded_url.warc_records
|
|
|
|
assert wwt.outq.empty()
|
2017-12-21 14:31:54 -08:00
|
|
|
finally:
|
|
|
|
wwt.stop.set()
|
|
|
|
wwt.join()
|
|
|
|
|
2018-01-09 12:54:42 +00:00
|
|
|
def test_warc_writer_filename(tmpdir):
|
|
|
|
"""Test if WarcWriter is writing WARC files with custom filenames.
|
|
|
|
"""
|
|
|
|
recorder = ProxyingRecorder(None, None, 'sha1', url='http://example.com')
|
|
|
|
recorded_url = RecordedUrl(
|
|
|
|
url='http://example.com', content_type='text/plain', status=200,
|
|
|
|
client_ip='127.0.0.2', request_data=b'abc',
|
|
|
|
response_recorder=recorder, remote_ip='127.0.0.3',
|
2018-08-16 11:06:58 -07:00
|
|
|
timestamp=datetime.utcnow(), payload_digest=hashlib.sha1())
|
2018-01-09 12:54:42 +00:00
|
|
|
|
|
|
|
dirname = os.path.dirname(str(tmpdir.mkdir('test-warc-writer')))
|
|
|
|
wwriter = WarcWriter(Options(directory=dirname, prefix='foo',
|
2019-01-07 15:54:35 -08:00
|
|
|
warc_filename='{timestamp17}_{prefix}_{timestamp14}_{serialno}'))
|
2018-01-09 12:54:42 +00:00
|
|
|
wwriter.write_records(recorded_url)
|
|
|
|
warcs = [fn for fn in os.listdir(dirname)]
|
|
|
|
assert warcs
|
2018-02-05 17:22:09 -08:00
|
|
|
assert re.search(
|
2019-01-07 15:54:35 -08:00
|
|
|
r'\d{17}_foo_\d{14}_00000.warc.open', wwriter.path)
|
2019-01-08 11:27:11 -08:00
|
|
|
|
|
|
|
def test_close_for_prefix(tmpdir):
|
|
|
|
wwp = warcprox.writerthread.WarcWriterProcessor(
|
|
|
|
Options(directory=str(tmpdir)))
|
|
|
|
wwp.inq = queue.Queue(maxsize=1)
|
|
|
|
wwp.outq = queue.Queue(maxsize=1)
|
|
|
|
|
|
|
|
try:
|
|
|
|
wwp.start()
|
|
|
|
|
|
|
|
# write a record to the default prefix
|
|
|
|
recorder = ProxyingRecorder(io.BytesIO(b'some payload'), None)
|
|
|
|
recorder.read()
|
|
|
|
wwp.inq.put(RecordedUrl(
|
|
|
|
url='http://example.com/1', content_type='text/plain',
|
|
|
|
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
|
|
|
response_recorder=recorder, remote_ip='127.0.0.3',
|
|
|
|
timestamp=datetime.utcnow(),
|
|
|
|
payload_digest=recorder.block_digest))
|
|
|
|
time.sleep(0.5)
|
|
|
|
rurl = wwp.outq.get() # wait for it to finish
|
|
|
|
|
|
|
|
assert rurl.url == b'http://example.com/1'
|
|
|
|
assert len(tmpdir.listdir()) == 1
|
|
|
|
assert tmpdir.listdir()[0].basename.startswith('warcprox-')
|
|
|
|
assert tmpdir.listdir()[0].basename.endswith('-00000.warc.open')
|
|
|
|
assert tmpdir.listdir()[0].basename == wwp.writer_pool.default_warc_writer.finalname + '.open'
|
|
|
|
|
|
|
|
# request close of default warc
|
|
|
|
wwp.close_for_prefix()
|
|
|
|
|
|
|
|
# write a record to some other prefix
|
|
|
|
recorder = ProxyingRecorder(io.BytesIO(b'some payload'), None)
|
|
|
|
recorder.read()
|
|
|
|
wwp.inq.put(RecordedUrl(
|
|
|
|
url='http://example.com/2', content_type='text/plain',
|
|
|
|
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
|
|
|
response_recorder=recorder, remote_ip='127.0.0.3',
|
|
|
|
timestamp=datetime.utcnow(),
|
|
|
|
payload_digest=recorder.block_digest,
|
|
|
|
warcprox_meta={'warc-prefix': 'some-prefix'}))
|
|
|
|
time.sleep(0.5)
|
|
|
|
rurl = wwp.outq.get() # wait for it to finish
|
|
|
|
|
|
|
|
assert rurl.url == b'http://example.com/2'
|
|
|
|
assert len(tmpdir.listdir()) == 2
|
|
|
|
basenames = sorted(f.basename for f in tmpdir.listdir())
|
|
|
|
assert basenames[0].startswith('some-prefix-')
|
|
|
|
assert basenames[0].endswith('-00000.warc.open')
|
|
|
|
assert basenames[1].startswith('warcprox-')
|
|
|
|
assert basenames[1].endswith('-00000.warc')
|
|
|
|
|
|
|
|
# request close of warc with prefix
|
|
|
|
wwp.close_for_prefix('some-prefix')
|
|
|
|
|
|
|
|
# write another record to the default prefix
|
|
|
|
recorder = ProxyingRecorder(io.BytesIO(b'some payload'), None)
|
|
|
|
recorder.read()
|
|
|
|
wwp.inq.put(RecordedUrl(
|
|
|
|
url='http://example.com/3', content_type='text/plain',
|
|
|
|
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
|
|
|
response_recorder=recorder, remote_ip='127.0.0.3',
|
|
|
|
timestamp=datetime.utcnow(),
|
|
|
|
payload_digest=recorder.block_digest))
|
|
|
|
time.sleep(0.5)
|
|
|
|
rurl = wwp.outq.get() # wait for it to finish
|
|
|
|
|
|
|
|
assert rurl.url == b'http://example.com/3'
|
|
|
|
# now some-prefix warc is closed and a new default prefix warc is open
|
|
|
|
basenames = sorted(f.basename for f in tmpdir.listdir())
|
|
|
|
assert len(basenames) == 3
|
|
|
|
assert basenames[0].startswith('some-prefix-')
|
|
|
|
assert basenames[0].endswith('-00000.warc')
|
|
|
|
assert basenames[1].startswith('warcprox-')
|
|
|
|
assert basenames[1].endswith('-00000.warc')
|
|
|
|
assert basenames[2].startswith('warcprox-')
|
|
|
|
assert basenames[2].endswith('-00001.warc.open')
|
|
|
|
|
|
|
|
# write another record to with prefix "some-prefix"
|
|
|
|
recorder = ProxyingRecorder(io.BytesIO(b'some payload'), None)
|
|
|
|
recorder.read()
|
|
|
|
wwp.inq.put(RecordedUrl(
|
|
|
|
url='http://example.com/4', content_type='text/plain',
|
|
|
|
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
|
|
|
response_recorder=recorder, remote_ip='127.0.0.3',
|
|
|
|
timestamp=datetime.utcnow(),
|
|
|
|
payload_digest=recorder.block_digest,
|
|
|
|
warcprox_meta={'warc-prefix': 'some-prefix'}))
|
|
|
|
time.sleep(0.5)
|
|
|
|
rurl = wwp.outq.get() # wait for it to finish
|
|
|
|
|
|
|
|
assert rurl.url == b'http://example.com/4'
|
|
|
|
# new some-prefix warc will have a new random token and start over at
|
|
|
|
# serial 00000
|
|
|
|
basenames = sorted(f.basename for f in tmpdir.listdir())
|
|
|
|
assert len(basenames) == 4
|
|
|
|
assert basenames[0].startswith('some-prefix-')
|
|
|
|
assert basenames[1].startswith('some-prefix-')
|
|
|
|
# order of these two warcs depends on random token so we don't know
|
|
|
|
# which is which
|
|
|
|
assert basenames[0][-5:] != basenames[1][-5:]
|
|
|
|
assert '-00000.' in basenames[0]
|
|
|
|
assert '-00000.' in basenames[1]
|
|
|
|
|
|
|
|
assert basenames[2].startswith('warcprox-')
|
|
|
|
assert basenames[2].endswith('-00000.warc')
|
|
|
|
assert basenames[3].startswith('warcprox-')
|
|
|
|
assert basenames[3].endswith('-00001.warc.open')
|
|
|
|
|
|
|
|
finally:
|
|
|
|
wwp.stop.set()
|
|
|
|
wwp.join()
|