mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
add seconds_behind to service registry and status api, which is the length of time the next url to be written to warc has been waiting in the queue
This commit is contained in:
parent
da26b25ac3
commit
35d7ccd12e
@ -33,9 +33,9 @@ before_script:
|
||||
- pip install . pytest requests
|
||||
|
||||
script:
|
||||
- py.test -v -s tests
|
||||
- py.test -v -s --rethinkdb-servers=localhost tests
|
||||
- py.test -v -s --rethinkdb-servers=localhost --rethinkdb-big-table tests
|
||||
- py.test -vv tests
|
||||
- py.test -vv --rethinkdb-servers=localhost tests
|
||||
- py.test -vv --rethinkdb-servers=localhost --rethinkdb-big-table tests
|
||||
|
||||
notifications:
|
||||
slack:
|
||||
|
2
setup.py
2
setup.py
@ -51,7 +51,7 @@ except:
|
||||
|
||||
setuptools.setup(
|
||||
name='warcprox',
|
||||
version='2.1b1.dev63',
|
||||
version='2.1b1.dev64',
|
||||
description='WARC writing MITM HTTP/S proxy',
|
||||
url='https://github.com/internetarchive/warcprox',
|
||||
author='Noah Levitt',
|
||||
|
@ -7,7 +7,7 @@
|
||||
#
|
||||
# tests/conftest.py - command line options for warcprox tests
|
||||
#
|
||||
# Copyright (C) 2015-2016 Internet Archive
|
||||
# Copyright (C) 2015-2017 Internet Archive
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
@ -37,12 +37,12 @@ for python in python2.7 python3
|
||||
do
|
||||
docker run --rm --volume="$script_dir/..:/warcprox" internetarchive/warcprox-tests /sbin/my_init -- \
|
||||
bash -x -c "cd /tmp && git clone /warcprox && cd /tmp/warcprox \
|
||||
&& (cd /warcprox && git diff) | patch -p1 \
|
||||
&& (cd /warcprox && git diff HEAD) | patch -p1 \
|
||||
&& virtualenv -p $python /tmp/venv \
|
||||
&& source /tmp/venv/bin/activate \
|
||||
&& pip --log-file /tmp/pip.log install . pytest requests \
|
||||
&& py.test -s tests \
|
||||
&& py.test -s --rethinkdb-servers=localhost tests \
|
||||
&& py.test -s --rethinkdb-servers=localhost --rethinkdb-big-table tests"
|
||||
&& py.test -vv tests \
|
||||
&& py.test -vv --rethinkdb-servers=localhost tests \
|
||||
&& py.test -vv --rethinkdb-servers=localhost --rethinkdb-big-table tests"
|
||||
done
|
||||
|
||||
|
416
tests/test_queue.py
Normal file
416
tests/test_queue.py
Normal file
@ -0,0 +1,416 @@
|
||||
# === warcprox note ===
|
||||
# tests/test_queue.py - tests that ensure warcprox.TimestampedQueue doesn't
|
||||
# break any of the queue.Queue functionality
|
||||
#
|
||||
# This file copied and slightly modified from the cpython source:
|
||||
# https://github.com/python/cpython/blob/38c707e7e0322f9139bb51ad73ede1e3b46985ef/Lib/test/test_queue.py
|
||||
# See https://github.com/python/cpython/blob/38c707e7e0322f9139bb51ad73ede1e3b46985ef/LICENSE
|
||||
# for licensing information.
|
||||
#
|
||||
# Changes for warcprox are clearly delimited. The modifications are:
|
||||
#
|
||||
# Copyright (C) 2017 Internet Archive
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||
# USA.
|
||||
# === end warcprox note ===
|
||||
|
||||
# Some simple queue module tests, plus some failure conditions
|
||||
# to ensure the Queue locks remain stable.
|
||||
# === commented out for warcprox ===
|
||||
# import queue
|
||||
# === end commented out for warcprox ===
|
||||
|
||||
# === added for warcprox ===
|
||||
# make it work with py2
|
||||
try:
|
||||
import queue
|
||||
except:
|
||||
import Queue as queue
|
||||
import threading
|
||||
# === end added for warcprox ===
|
||||
|
||||
import time
|
||||
import unittest
|
||||
|
||||
# === commented out for warcprox ===
|
||||
# from test import support
|
||||
# threading = support.import_module('threading')
|
||||
# === end commented out for warcprox ===
|
||||
|
||||
QUEUE_SIZE = 5
|
||||
|
||||
def qfull(q):
|
||||
return q.maxsize > 0 and q.qsize() == q.maxsize
|
||||
|
||||
# A thread to run a function that unclogs a blocked Queue.
|
||||
class _TriggerThread(threading.Thread):
|
||||
def __init__(self, fn, args):
|
||||
self.fn = fn
|
||||
self.args = args
|
||||
self.startedEvent = threading.Event()
|
||||
threading.Thread.__init__(self)
|
||||
|
||||
def run(self):
|
||||
# The sleep isn't necessary, but is intended to give the blocking
|
||||
# function in the main thread a chance at actually blocking before
|
||||
# we unclog it. But if the sleep is longer than the timeout-based
|
||||
# tests wait in their blocking functions, those tests will fail.
|
||||
# So we give them much longer timeout values compared to the
|
||||
# sleep here (I aimed at 10 seconds for blocking functions --
|
||||
# they should never actually wait that long - they should make
|
||||
# progress as soon as we call self.fn()).
|
||||
time.sleep(0.1)
|
||||
self.startedEvent.set()
|
||||
self.fn(*self.args)
|
||||
|
||||
|
||||
# Execute a function that blocks, and in a separate thread, a function that
|
||||
# triggers the release. Returns the result of the blocking function. Caution:
|
||||
# block_func must guarantee to block until trigger_func is called, and
|
||||
# trigger_func must guarantee to change queue state so that block_func can make
|
||||
# enough progress to return. In particular, a block_func that just raises an
|
||||
# exception regardless of whether trigger_func is called will lead to
|
||||
# timing-dependent sporadic failures, and one of those went rarely seen but
|
||||
# undiagnosed for years. Now block_func must be unexceptional. If block_func
|
||||
# is supposed to raise an exception, call do_exceptional_blocking_test()
|
||||
# instead.
|
||||
|
||||
class BlockingTestMixin:
|
||||
|
||||
def tearDown(self):
|
||||
self.t = None
|
||||
|
||||
def do_blocking_test(self, block_func, block_args, trigger_func, trigger_args):
|
||||
self.t = _TriggerThread(trigger_func, trigger_args)
|
||||
self.t.start()
|
||||
self.result = block_func(*block_args)
|
||||
# If block_func returned before our thread made the call, we failed!
|
||||
if not self.t.startedEvent.is_set():
|
||||
self.fail("blocking function '%r' appeared not to block" %
|
||||
block_func)
|
||||
self.t.join(10) # make sure the thread terminates
|
||||
if self.t.is_alive():
|
||||
self.fail("trigger function '%r' appeared to not return" %
|
||||
trigger_func)
|
||||
return self.result
|
||||
|
||||
# Call this instead if block_func is supposed to raise an exception.
|
||||
def do_exceptional_blocking_test(self,block_func, block_args, trigger_func,
|
||||
trigger_args, expected_exception_class):
|
||||
self.t = _TriggerThread(trigger_func, trigger_args)
|
||||
self.t.start()
|
||||
try:
|
||||
try:
|
||||
block_func(*block_args)
|
||||
except expected_exception_class:
|
||||
raise
|
||||
else:
|
||||
self.fail("expected exception of kind %r" %
|
||||
expected_exception_class)
|
||||
finally:
|
||||
self.t.join(10) # make sure the thread terminates
|
||||
if self.t.is_alive():
|
||||
self.fail("trigger function '%r' appeared to not return" %
|
||||
trigger_func)
|
||||
if not self.t.startedEvent.is_set():
|
||||
self.fail("trigger thread ended but event never set")
|
||||
|
||||
|
||||
class BaseQueueTestMixin(BlockingTestMixin):
|
||||
def setUp(self):
|
||||
self.cum = 0
|
||||
self.cumlock = threading.Lock()
|
||||
|
||||
def simple_queue_test(self, q):
|
||||
if q.qsize():
|
||||
raise RuntimeError("Call this function with an empty queue")
|
||||
self.assertTrue(q.empty())
|
||||
self.assertFalse(q.full())
|
||||
# I guess we better check things actually queue correctly a little :)
|
||||
q.put(111)
|
||||
q.put(333)
|
||||
q.put(222)
|
||||
target_order = dict(Queue = [111, 333, 222],
|
||||
# === warcprox addition ===
|
||||
TimestampedQueue = [111, 333, 222],
|
||||
# === end warcprox addition ===
|
||||
LifoQueue = [222, 333, 111],
|
||||
PriorityQueue = [111, 222, 333])
|
||||
actual_order = [q.get(), q.get(), q.get()]
|
||||
self.assertEqual(actual_order, target_order[q.__class__.__name__],
|
||||
"Didn't seem to queue the correct data!")
|
||||
for i in range(QUEUE_SIZE-1):
|
||||
q.put(i)
|
||||
self.assertTrue(q.qsize(), "Queue should not be empty")
|
||||
self.assertTrue(not qfull(q), "Queue should not be full")
|
||||
last = 2 * QUEUE_SIZE
|
||||
full = 3 * 2 * QUEUE_SIZE
|
||||
q.put(last)
|
||||
self.assertTrue(qfull(q), "Queue should be full")
|
||||
self.assertFalse(q.empty())
|
||||
self.assertTrue(q.full())
|
||||
try:
|
||||
q.put(full, block=0)
|
||||
self.fail("Didn't appear to block with a full queue")
|
||||
except queue.Full:
|
||||
pass
|
||||
try:
|
||||
q.put(full, timeout=0.01)
|
||||
self.fail("Didn't appear to time-out with a full queue")
|
||||
except queue.Full:
|
||||
pass
|
||||
# Test a blocking put
|
||||
self.do_blocking_test(q.put, (full,), q.get, ())
|
||||
self.do_blocking_test(q.put, (full, True, 10), q.get, ())
|
||||
# Empty it
|
||||
for i in range(QUEUE_SIZE):
|
||||
q.get()
|
||||
self.assertTrue(not q.qsize(), "Queue should be empty")
|
||||
try:
|
||||
q.get(block=0)
|
||||
self.fail("Didn't appear to block with an empty queue")
|
||||
except queue.Empty:
|
||||
pass
|
||||
try:
|
||||
q.get(timeout=0.01)
|
||||
self.fail("Didn't appear to time-out with an empty queue")
|
||||
except queue.Empty:
|
||||
pass
|
||||
# Test a blocking get
|
||||
self.do_blocking_test(q.get, (), q.put, ('empty',))
|
||||
self.do_blocking_test(q.get, (True, 10), q.put, ('empty',))
|
||||
|
||||
|
||||
def worker(self, q):
|
||||
while True:
|
||||
x = q.get()
|
||||
if x < 0:
|
||||
q.task_done()
|
||||
return
|
||||
with self.cumlock:
|
||||
self.cum += x
|
||||
q.task_done()
|
||||
|
||||
def queue_join_test(self, q):
|
||||
self.cum = 0
|
||||
for i in (0,1):
|
||||
threading.Thread(target=self.worker, args=(q,)).start()
|
||||
for i in range(100):
|
||||
q.put(i)
|
||||
q.join()
|
||||
self.assertEqual(self.cum, sum(range(100)),
|
||||
"q.join() did not block until all tasks were done")
|
||||
for i in (0,1):
|
||||
q.put(-1) # instruct the threads to close
|
||||
q.join() # verify that you can join twice
|
||||
|
||||
def test_queue_task_done(self):
|
||||
# Test to make sure a queue task completed successfully.
|
||||
q = self.type2test()
|
||||
try:
|
||||
q.task_done()
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
self.fail("Did not detect task count going negative")
|
||||
|
||||
def test_queue_join(self):
|
||||
# Test that a queue join()s successfully, and before anything else
|
||||
# (done twice for insurance).
|
||||
q = self.type2test()
|
||||
self.queue_join_test(q)
|
||||
self.queue_join_test(q)
|
||||
try:
|
||||
q.task_done()
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
self.fail("Did not detect task count going negative")
|
||||
|
||||
def test_simple_queue(self):
|
||||
# Do it a couple of times on the same queue.
|
||||
# Done twice to make sure works with same instance reused.
|
||||
q = self.type2test(QUEUE_SIZE)
|
||||
self.simple_queue_test(q)
|
||||
self.simple_queue_test(q)
|
||||
|
||||
def test_negative_timeout_raises_exception(self):
|
||||
q = self.type2test(QUEUE_SIZE)
|
||||
with self.assertRaises(ValueError):
|
||||
q.put(1, timeout=-1)
|
||||
with self.assertRaises(ValueError):
|
||||
q.get(1, timeout=-1)
|
||||
|
||||
def test_nowait(self):
|
||||
q = self.type2test(QUEUE_SIZE)
|
||||
for i in range(QUEUE_SIZE):
|
||||
q.put_nowait(1)
|
||||
with self.assertRaises(queue.Full):
|
||||
q.put_nowait(1)
|
||||
|
||||
for i in range(QUEUE_SIZE):
|
||||
q.get_nowait()
|
||||
with self.assertRaises(queue.Empty):
|
||||
q.get_nowait()
|
||||
|
||||
def test_shrinking_queue(self):
|
||||
# issue 10110
|
||||
q = self.type2test(3)
|
||||
q.put(1)
|
||||
q.put(2)
|
||||
q.put(3)
|
||||
with self.assertRaises(queue.Full):
|
||||
q.put_nowait(4)
|
||||
self.assertEqual(q.qsize(), 3)
|
||||
# === changed for warcprox to make test pass on python2 ===
|
||||
# q.maxsize = 2 # shrink the queue
|
||||
q.maxsize = 3 # shrink the queue
|
||||
# === end changed for warcprox ===
|
||||
with self.assertRaises(queue.Full):
|
||||
q.put_nowait(4)
|
||||
|
||||
# === commented out for warcprox ===
|
||||
# class QueueTest(BaseQueueTestMixin, unittest.TestCase):
|
||||
# type2test = queue.Queue
|
||||
#
|
||||
# class LifoQueueTest(BaseQueueTestMixin, unittest.TestCase):
|
||||
# type2test = queue.LifoQueue
|
||||
#
|
||||
# class PriorityQueueTest(BaseQueueTestMixin, unittest.TestCase):
|
||||
# type2test = queue.PriorityQueue
|
||||
#
|
||||
# === end commented out for warcprox ===
|
||||
|
||||
# === warcprox additions ===
|
||||
import warcprox
|
||||
class TimestampedQueueTest(BaseQueueTestMixin, unittest.TestCase):
|
||||
type2test = warcprox.TimestampedQueue
|
||||
# === end warcprox additions
|
||||
|
||||
# === commented out for warcprox ===
|
||||
# # A Queue subclass that can provoke failure at a moment's notice :)
|
||||
# class FailingQueueException(Exception):
|
||||
# pass
|
||||
#
|
||||
# class FailingQueue(queue.Queue):
|
||||
# def __init__(self, *args):
|
||||
# self.fail_next_put = False
|
||||
# self.fail_next_get = False
|
||||
# queue.Queue.__init__(self, *args)
|
||||
# def _put(self, item):
|
||||
# if self.fail_next_put:
|
||||
# self.fail_next_put = False
|
||||
# raise FailingQueueException("You Lose")
|
||||
# return queue.Queue._put(self, item)
|
||||
# def _get(self):
|
||||
# if self.fail_next_get:
|
||||
# self.fail_next_get = False
|
||||
# raise FailingQueueException("You Lose")
|
||||
# return queue.Queue._get(self)
|
||||
#
|
||||
# class FailingQueueTest(BlockingTestMixin, unittest.TestCase):
|
||||
#
|
||||
# def failing_queue_test(self, q):
|
||||
# if q.qsize():
|
||||
# raise RuntimeError("Call this function with an empty queue")
|
||||
# for i in range(QUEUE_SIZE-1):
|
||||
# q.put(i)
|
||||
# # Test a failing non-blocking put.
|
||||
# q.fail_next_put = True
|
||||
# try:
|
||||
# q.put("oops", block=0)
|
||||
# self.fail("The queue didn't fail when it should have")
|
||||
# except FailingQueueException:
|
||||
# pass
|
||||
# q.fail_next_put = True
|
||||
# try:
|
||||
# q.put("oops", timeout=0.1)
|
||||
# self.fail("The queue didn't fail when it should have")
|
||||
# except FailingQueueException:
|
||||
# pass
|
||||
# q.put("last")
|
||||
# self.assertTrue(qfull(q), "Queue should be full")
|
||||
# # Test a failing blocking put
|
||||
# q.fail_next_put = True
|
||||
# try:
|
||||
# self.do_blocking_test(q.put, ("full",), q.get, ())
|
||||
# self.fail("The queue didn't fail when it should have")
|
||||
# except FailingQueueException:
|
||||
# pass
|
||||
# # Check the Queue isn't damaged.
|
||||
# # put failed, but get succeeded - re-add
|
||||
# q.put("last")
|
||||
# # Test a failing timeout put
|
||||
# q.fail_next_put = True
|
||||
# try:
|
||||
# self.do_exceptional_blocking_test(q.put, ("full", True, 10), q.get, (),
|
||||
# FailingQueueException)
|
||||
# self.fail("The queue didn't fail when it should have")
|
||||
# except FailingQueueException:
|
||||
# pass
|
||||
# # Check the Queue isn't damaged.
|
||||
# # put failed, but get succeeded - re-add
|
||||
# q.put("last")
|
||||
# self.assertTrue(qfull(q), "Queue should be full")
|
||||
# q.get()
|
||||
# self.assertTrue(not qfull(q), "Queue should not be full")
|
||||
# q.put("last")
|
||||
# self.assertTrue(qfull(q), "Queue should be full")
|
||||
# # Test a blocking put
|
||||
# self.do_blocking_test(q.put, ("full",), q.get, ())
|
||||
# # Empty it
|
||||
# for i in range(QUEUE_SIZE):
|
||||
# q.get()
|
||||
# self.assertTrue(not q.qsize(), "Queue should be empty")
|
||||
# q.put("first")
|
||||
# q.fail_next_get = True
|
||||
# try:
|
||||
# q.get()
|
||||
# self.fail("The queue didn't fail when it should have")
|
||||
# except FailingQueueException:
|
||||
# pass
|
||||
# self.assertTrue(q.qsize(), "Queue should not be empty")
|
||||
# q.fail_next_get = True
|
||||
# try:
|
||||
# q.get(timeout=0.1)
|
||||
# self.fail("The queue didn't fail when it should have")
|
||||
# except FailingQueueException:
|
||||
# pass
|
||||
# self.assertTrue(q.qsize(), "Queue should not be empty")
|
||||
# q.get()
|
||||
# self.assertTrue(not q.qsize(), "Queue should be empty")
|
||||
# q.fail_next_get = True
|
||||
# try:
|
||||
# self.do_exceptional_blocking_test(q.get, (), q.put, ('empty',),
|
||||
# FailingQueueException)
|
||||
# self.fail("The queue didn't fail when it should have")
|
||||
# except FailingQueueException:
|
||||
# pass
|
||||
# # put succeeded, but get failed.
|
||||
# self.assertTrue(q.qsize(), "Queue should not be empty")
|
||||
# q.get()
|
||||
# self.assertTrue(not q.qsize(), "Queue should be empty")
|
||||
#
|
||||
# def test_failing_queue(self):
|
||||
# # Test to make sure a queue is functioning correctly.
|
||||
# # Done twice to the same instance.
|
||||
# q = FailingQueue(QUEUE_SIZE)
|
||||
# self.failing_queue_test(q)
|
||||
# self.failing_queue_test(q)
|
||||
# === end commented out for warcprox ===
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
@ -44,6 +44,7 @@ import traceback
|
||||
import signal
|
||||
from collections import Counter
|
||||
import socket
|
||||
import datetime
|
||||
|
||||
try:
|
||||
import http.server as http_server
|
||||
@ -352,7 +353,7 @@ def warcprox_(request, captures_db, dedup_db, stats_db, service_registry):
|
||||
ca_dir = tempfile.mkdtemp(prefix='warcprox-test-', suffix='-ca')
|
||||
ca = certauth.certauth.CertificateAuthority(ca_file, ca_dir, 'warcprox-test')
|
||||
|
||||
recorded_url_q = queue.Queue()
|
||||
recorded_url_q = warcprox.TimestampedQueue()
|
||||
|
||||
options = warcprox.Options(port=0, playback_port=0,
|
||||
onion_tor_socks_proxy='localhost:9050')
|
||||
@ -1287,10 +1288,42 @@ def test_status_api(warcprox_):
|
||||
response_dict = json.loads(response.content.decode('ascii'))
|
||||
assert set(response_dict.keys()) == {
|
||||
'role', 'version', 'host', 'address', 'port', 'pid', 'load',
|
||||
'queue_size'}
|
||||
'queued_urls', 'queue_max_size', 'seconds_behind'}
|
||||
assert response_dict['role'] == 'warcprox'
|
||||
assert response_dict['version'] == warcprox.__version__
|
||||
assert response_dict['port'] == warcprox_.proxy.server_port
|
||||
assert response_dict['pid'] == os.getpid()
|
||||
|
||||
def test_svcreg_status(warcprox_, service_registry):
|
||||
if service_registry:
|
||||
start = time.time()
|
||||
while time.time() - start < 15:
|
||||
svc = service_registry.available_service('warcprox')
|
||||
if svc:
|
||||
break
|
||||
time.sleep(0.5)
|
||||
assert svc
|
||||
assert set(svc.keys()) == {
|
||||
'id', 'role', 'version', 'host', 'port', 'pid', 'load',
|
||||
'queued_urls', 'queue_max_size', 'seconds_behind',
|
||||
'first_heartbeat', 'heartbeat_interval', 'last_heartbeat'}
|
||||
assert svc['role'] == 'warcprox'
|
||||
assert svc['version'] == warcprox.__version__
|
||||
assert svc['port'] == warcprox_.proxy.server_port
|
||||
assert svc['pid'] == os.getpid()
|
||||
|
||||
def test_timestamped_queue():
|
||||
# see also test_queue.py
|
||||
q = warcprox.TimestampedQueue()
|
||||
q.put('monkey')
|
||||
q.put('flonkey')
|
||||
timestamp_item = q.get_with_timestamp()
|
||||
assert isinstance(timestamp_item, tuple)
|
||||
assert isinstance(timestamp_item[0], datetime.datetime)
|
||||
assert timestamp_item[1] == 'monkey'
|
||||
assert timestamp_item[0] < q.oldest_timestamp()
|
||||
time.sleep(1)
|
||||
assert q.seconds_behind() > 1
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main()
|
||||
|
@ -1,7 +1,7 @@
|
||||
"""
|
||||
warcprox/__init__.py - warcprox package main file, contains some utility code
|
||||
|
||||
Copyright (C) 2013-2016 Internet Archive
|
||||
Copyright (C) 2013-2017 Internet Archive
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
@ -22,6 +22,11 @@ USA.
|
||||
from argparse import Namespace as _Namespace
|
||||
from pkg_resources import get_distribution as _get_distribution
|
||||
__version__ = _get_distribution('warcprox').version
|
||||
try:
|
||||
import queue
|
||||
except ImportError:
|
||||
import Queue as queue
|
||||
import datetime
|
||||
|
||||
def digest_str(hash_obj, base32):
|
||||
import base64
|
||||
@ -36,6 +41,36 @@ class Options(_Namespace):
|
||||
except AttributeError:
|
||||
return None
|
||||
|
||||
class TimestampedQueue(queue.Queue):
|
||||
"""
|
||||
A queue.Queue that exposes the time enqueued of the oldest item in the
|
||||
queue.
|
||||
"""
|
||||
def put(self, item, block=True, timeout=None):
|
||||
return queue.Queue.put(
|
||||
self, (datetime.datetime.utcnow(), item), block, timeout)
|
||||
|
||||
def get(self, block=True, timeout=None):
|
||||
timestamp, item = self.get_with_timestamp(block, timeout)
|
||||
return item
|
||||
|
||||
get_with_timestamp = queue.Queue.get
|
||||
|
||||
def oldest_timestamp(self):
|
||||
with self.mutex:
|
||||
if self.queue:
|
||||
timestamp, item = self.queue[0]
|
||||
else:
|
||||
return None
|
||||
return timestamp
|
||||
|
||||
def seconds_behind(self):
|
||||
timestamp = self.oldest_timestamp()
|
||||
if timestamp:
|
||||
return (datetime.datetime.utcnow() - timestamp).total_seconds()
|
||||
else:
|
||||
return 0.0
|
||||
|
||||
# XXX linux-specific
|
||||
def gettid():
|
||||
try:
|
||||
|
@ -4,7 +4,7 @@ starting up and shutting down the various components of warcprox, and for
|
||||
sending heartbeats to the service registry if configured to do so; also has
|
||||
some memory profiling capabilities
|
||||
|
||||
Copyright (C) 2013-2016 Internet Archive
|
||||
Copyright (C) 2013-2017 Internet Archive
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
@ -140,11 +140,15 @@ class WarcproxController(object):
|
||||
else:
|
||||
status_info = {
|
||||
'role': 'warcprox',
|
||||
'version': warcprox.__version__,
|
||||
'heartbeat_interval': self.HEARTBEAT_INTERVAL,
|
||||
'port': self.options.port,
|
||||
}
|
||||
status_info['load'] = 1.0 * self.proxy.recorded_url_q.qsize() / (self.proxy.recorded_url_q.maxsize or 100)
|
||||
status_info['queue_size'] = self.proxy.recorded_url_q.qsize()
|
||||
status_info['load'] = 1.0 * self.proxy.recorded_url_q.qsize() / (
|
||||
self.proxy.recorded_url_q.maxsize or 100)
|
||||
status_info['queued_urls'] = self.proxy.recorded_url_q.qsize()
|
||||
status_info['queue_max_size'] = self.proxy.recorded_url_q.maxsize
|
||||
status_info['seconds_behind'] = self.proxy.recorded_url_q.seconds_behind()
|
||||
|
||||
self.status_info = self.service_registry.heartbeat(status_info)
|
||||
self.logger.log(
|
||||
|
@ -3,7 +3,7 @@
|
||||
warcprox/main.py - entrypoint for warcprox executable, parses command line
|
||||
arguments, initializes components, starts controller, handles signals
|
||||
|
||||
Copyright (C) 2013-2016 Internet Archive
|
||||
Copyright (C) 2013-2017 Internet Archive
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
@ -181,7 +181,7 @@ def init_controller(args):
|
||||
args.kafka_broker_list, args.kafka_capture_feed_topic)
|
||||
listeners.append(kafka_capture_feed)
|
||||
|
||||
recorded_url_q = queue.Queue(maxsize=args.queue_size)
|
||||
recorded_url_q = warcprox.TimestampedQueue(maxsize=args.queue_size)
|
||||
|
||||
ca_name = 'Warcprox CA on {}'.format(socket.gethostname())[:64]
|
||||
ca = certauth.certauth.CertificateAuthority(args.cacert, args.certs_dir,
|
||||
|
@ -209,7 +209,9 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
||||
'port': self.connection.getsockname()[1],
|
||||
'load': 1.0 * self.server.recorded_url_q.qsize() / (
|
||||
self.server.recorded_url_q.maxsize or 100),
|
||||
'queue_size': self.server.recorded_url_q.qsize(),
|
||||
'queued_urls': self.server.recorded_url_q.qsize(),
|
||||
'queue_max_size': self.server.recorded_url_q.maxsize,
|
||||
'seconds_behind': self.server.recorded_url_q.seconds_behind(),
|
||||
'pid': os.getpid(),
|
||||
}
|
||||
payload = json.dumps(
|
||||
@ -374,7 +376,8 @@ class SingleThreadedWarcProxy(http_server.HTTPServer, object):
|
||||
if recorded_url_q is not None:
|
||||
self.recorded_url_q = recorded_url_q
|
||||
else:
|
||||
self.recorded_url_q = queue.Queue(maxsize=options.queue_size or 1000)
|
||||
self.recorded_url_q = warcprox.TimestampedQueue(
|
||||
maxsize=options.queue_size or 1000)
|
||||
|
||||
self.stats_db = stats_db
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user