mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
test for special warc prefix "-" which means "do not archive"
This commit is contained in:
parent
399853dea0
commit
9784c91459
2
setup.py
2
setup.py
@ -52,7 +52,7 @@ except:
|
||||
|
||||
setuptools.setup(
|
||||
name='warcprox',
|
||||
version='2.3.1b4.dev128',
|
||||
version='2.3.1b4.dev129',
|
||||
description='WARC writing MITM HTTP/S proxy',
|
||||
url='https://github.com/internetarchive/warcprox',
|
||||
author='Noah Levitt',
|
||||
|
@ -1,3 +1,24 @@
|
||||
'''
|
||||
tests/test_writer.py - warcprox warc writing tests
|
||||
|
||||
Copyright (C) 2017 Internet Archive
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||
USA.
|
||||
'''
|
||||
|
||||
import os
|
||||
import fcntl
|
||||
from multiprocessing import Process, Queue
|
||||
@ -7,16 +28,11 @@ from warcprox.mitmproxy import ProxyingRecorder
|
||||
from warcprox.warcproxy import RecordedUrl
|
||||
from warcprox.writer import WarcWriter
|
||||
from warcprox import Options
|
||||
|
||||
recorder = ProxyingRecorder(None, None, 'sha1', url='http://example.com')
|
||||
|
||||
recorded_url = RecordedUrl(url='http://example.com', content_type='text/plain',
|
||||
status=200, client_ip='127.0.0.2',
|
||||
request_data=b'abc',
|
||||
response_recorder=recorder,
|
||||
remote_ip='127.0.0.3',
|
||||
timestamp=datetime.utcnow())
|
||||
|
||||
import time
|
||||
import warcprox
|
||||
import io
|
||||
import tempfile
|
||||
import logging
|
||||
|
||||
def lock_file(queue, filename):
|
||||
"""Try to lock file and return 1 if successful, else return 0.
|
||||
@ -36,6 +52,13 @@ def test_warc_writer_locking(tmpdir):
|
||||
When we don't have the .open suffix, WarcWriter locks the file and the
|
||||
external process trying to ``lock_file`` fails (result=0).
|
||||
"""
|
||||
recorder = ProxyingRecorder(None, None, 'sha1', url='http://example.com')
|
||||
recorded_url = RecordedUrl(
|
||||
url='http://example.com', content_type='text/plain', status=200,
|
||||
client_ip='127.0.0.2', request_data=b'abc',
|
||||
response_recorder=recorder, remote_ip='127.0.0.3',
|
||||
timestamp=datetime.utcnow())
|
||||
|
||||
dirname = os.path.dirname(str(tmpdir.mkdir('test-warc-writer')))
|
||||
wwriter = WarcWriter(Options(directory=dirname, no_warc_open_suffix=True))
|
||||
wwriter.write_records(recorded_url)
|
||||
@ -55,3 +78,88 @@ def test_warc_writer_locking(tmpdir):
|
||||
p.start()
|
||||
p.join()
|
||||
assert queue.get() == 'OBTAINED LOCK'
|
||||
|
||||
def wait(callback, timeout):
|
||||
start = time.time()
|
||||
while time.time() - start < timeout:
|
||||
if callback():
|
||||
return
|
||||
time.sleep(0.5)
|
||||
raise Exception('timed out waiting for %s to return truthy' % callback)
|
||||
|
||||
def test_special_dont_write_prefix():
|
||||
class NotifyMe:
|
||||
def __init__(self):
|
||||
self.the_list = []
|
||||
def notify(self, recorded_url, records):
|
||||
self.the_list.append((recorded_url, records))
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
logging.debug('cd %s', tmpdir)
|
||||
os.chdir(tmpdir)
|
||||
|
||||
q = warcprox.TimestampedQueue(maxsize=1)
|
||||
listener = NotifyMe()
|
||||
wwt = warcprox.writerthread.WarcWriterThread(
|
||||
recorded_url_q=q, options=Options(prefix='-'),
|
||||
listeners=[listener])
|
||||
try:
|
||||
wwt.start()
|
||||
# not to be written due to default prefix
|
||||
recorder = ProxyingRecorder(io.BytesIO(b'some payload'), None)
|
||||
recorder.read()
|
||||
q.put(RecordedUrl(
|
||||
url='http://example.com/no', content_type='text/plain',
|
||||
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
||||
response_recorder=recorder, remote_ip='127.0.0.3',
|
||||
timestamp=datetime.utcnow(),
|
||||
payload_digest=recorder.block_digest))
|
||||
# to be written due to warcprox-meta prefix
|
||||
recorder = ProxyingRecorder(io.BytesIO(b'some payload'), None)
|
||||
recorder.read()
|
||||
q.put(RecordedUrl(
|
||||
url='http://example.com/yes', content_type='text/plain',
|
||||
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
||||
response_recorder=recorder, remote_ip='127.0.0.3',
|
||||
timestamp=datetime.utcnow(),
|
||||
payload_digest=recorder.block_digest,
|
||||
warcprox_meta={'warc-prefix': 'normal-warc-prefix'}))
|
||||
wait(lambda: len(listener.the_list) == 2, 10.0)
|
||||
assert not listener.the_list[0][1]
|
||||
assert listener.the_list[1][1]
|
||||
finally:
|
||||
wwt.stop.set()
|
||||
wwt.join()
|
||||
|
||||
q = warcprox.TimestampedQueue(maxsize=1)
|
||||
listener = NotifyMe()
|
||||
wwt = warcprox.writerthread.WarcWriterThread(
|
||||
recorded_url_q=q, listeners=[listener])
|
||||
try:
|
||||
wwt.start()
|
||||
# to be written due to default prefix
|
||||
recorder = ProxyingRecorder(io.BytesIO(b'some payload'), None)
|
||||
recorder.read()
|
||||
q.put(RecordedUrl(
|
||||
url='http://example.com/yes', content_type='text/plain',
|
||||
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
||||
response_recorder=recorder, remote_ip='127.0.0.3',
|
||||
timestamp=datetime.utcnow(),
|
||||
payload_digest=recorder.block_digest))
|
||||
# not to be written due to warcprox-meta prefix
|
||||
recorder = ProxyingRecorder(io.BytesIO(b'some payload'), None)
|
||||
recorder.read()
|
||||
q.put(RecordedUrl(
|
||||
url='http://example.com/no', content_type='text/plain',
|
||||
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
||||
response_recorder=recorder, remote_ip='127.0.0.3',
|
||||
timestamp=datetime.utcnow(),
|
||||
payload_digest=recorder.block_digest,
|
||||
warcprox_meta={'warc-prefix': '-'}))
|
||||
wait(lambda: len(listener.the_list) == 2, 10.0)
|
||||
assert listener.the_list[0][1]
|
||||
assert not listener.the_list[1][1]
|
||||
finally:
|
||||
wwt.stop.set()
|
||||
wwt.join()
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user