mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Merge branch 'do_not_archive' into qa
This commit is contained in:
commit
1edab7a0ca
@ -1426,11 +1426,19 @@ def test_controller_with_defaults():
|
||||
assert not wwp.writer_pool.default_warc_writer.record_builder.base32
|
||||
assert wwp.writer_pool.default_warc_writer.record_builder.digest_algorithm == 'sha1'
|
||||
|
||||
|
||||
class MyEarlyPlugin(warcprox.BaseStandardPostfetchProcessor):
|
||||
CHAIN_POSITION = 'early'
|
||||
def _process_put(self):
|
||||
pass
|
||||
|
||||
|
||||
def test_load_plugin():
|
||||
options = warcprox.Options(port=0, plugins=[
|
||||
'warcprox.stats.RunningStats',
|
||||
'warcprox.BaseStandardPostfetchProcessor',
|
||||
'warcprox.BaseBatchPostfetchProcessor',])
|
||||
'warcprox.BaseBatchPostfetchProcessor',
|
||||
'%s.%s' % (__name__, MyEarlyPlugin.__name__),])
|
||||
controller = warcprox.controller.WarcproxController(options)
|
||||
assert isinstance(
|
||||
controller._postfetch_chain[-1],
|
||||
@ -1451,6 +1459,10 @@ def test_load_plugin():
|
||||
assert isinstance(
|
||||
controller._postfetch_chain[-4].listener,
|
||||
warcprox.stats.RunningStats)
|
||||
# MyEarlyPlugin
|
||||
assert isinstance(
|
||||
controller._postfetch_chain[0],
|
||||
warcprox.BaseStandardPostfetchProcessor)
|
||||
|
||||
def test_choose_a_port_for_me(warcprox_):
|
||||
options = warcprox.Options()
|
||||
|
@ -163,6 +163,47 @@ def test_special_dont_write_prefix():
|
||||
wwt.join()
|
||||
|
||||
|
||||
def test_do_not_archive():
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
logging.debug('cd %s', tmpdir)
|
||||
os.chdir(tmpdir)
|
||||
|
||||
wwt = warcprox.writerthread.WarcWriterProcessor(
|
||||
Options(writer_threads=1))
|
||||
wwt.inq = warcprox.TimestampedQueue(maxsize=1)
|
||||
wwt.outq = warcprox.TimestampedQueue(maxsize=1)
|
||||
try:
|
||||
wwt.start()
|
||||
# to be written -- default do_not_archive False
|
||||
recorder = ProxyingRecorder(io.BytesIO(b'some payload'), None)
|
||||
recorder.read()
|
||||
wwt.inq.put(RecordedUrl(
|
||||
url='http://example.com/yes', content_type='text/plain',
|
||||
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
||||
response_recorder=recorder, remote_ip='127.0.0.3',
|
||||
timestamp=datetime.utcnow(),
|
||||
payload_digest=recorder.block_digest))
|
||||
# not to be written -- do_not_archive set True
|
||||
recorder = ProxyingRecorder(io.BytesIO(b'some payload'), None)
|
||||
recorder.read()
|
||||
wwt.inq.put(RecordedUrl(
|
||||
url='http://example.com/no', content_type='text/plain',
|
||||
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
||||
response_recorder=recorder, remote_ip='127.0.0.3',
|
||||
timestamp=datetime.utcnow(),
|
||||
payload_digest=recorder.block_digest,
|
||||
warcprox_meta={'warc-prefix': '-'},
|
||||
do_not_archive=True))
|
||||
recorded_url = wwt.outq.get(timeout=10)
|
||||
assert recorded_url.warc_records
|
||||
recorded_url = wwt.outq.get(timeout=10)
|
||||
assert not recorded_url.warc_records
|
||||
assert wwt.outq.empty()
|
||||
finally:
|
||||
wwt.stop.set()
|
||||
wwt.join()
|
||||
|
||||
|
||||
def test_warc_writer_filename(tmpdir):
|
||||
"""Test if WarcWriter is writing WARC files with custom filenames.
|
||||
"""
|
||||
|
@ -213,7 +213,7 @@ class WarcproxController(object):
|
||||
warcprox.ListenerPostfetchProcessor(
|
||||
plugin, self.options))
|
||||
elif hasattr(plugin, 'CHAIN_POSITION') and plugin.CHAIN_POSITION == 'early':
|
||||
self._postfetch_chain.insert(0, plugin) # or insert early but later than 0?
|
||||
self._postfetch_chain.insert(0, plugin)
|
||||
else:
|
||||
self._postfetch_chain.append(plugin)
|
||||
|
||||
|
@ -1 +0,0 @@
|
||||
1.4-20160105052702-f79e744
|
Loading…
x
Reference in New Issue
Block a user