minimize impact of down server

The last approach was not good, timeout of 0.1 seconds was too short. A
bunch of stuff has to happen in the timeout period inside of
rethinkdb.connect(). It doesn't offer a way to set only the socket
timeout. Even a timeout of 0.5 seconds results in a noticeable error
rate.

The new approach is to put a server in the penalty box for 5 minutes
when it errors. While the server is in the penalty box, we don't try to
connect to it, unless all the servers are in the penalty box, in which
case we try the server that errored least recently.
This commit is contained in:
Noah Levitt 2018-11-02 18:05:18 +00:00
parent 72d6e4d39b
commit 71221dbe54

View File

@ -113,21 +113,22 @@ class Rethinker(object):
else:
self.servers = servers
self.dbname = db
self.last_error = {} # {server: time}
# https://github.com/rethinkdb/rethinkdb-example-webpy-blog/blob/master/model.py
# "Best practices: Managing connections: a connection per request"
def _random_server_connection(self):
retry_wait = 0.01
while True:
server = random.choice(self.servers)
server = random.choice(self._server_whitelist())
try:
try:
host, port = server.split(':')
return r.connect(
host=host, port=port, timeout=max(0.1, retry_wait))
return r.connect(host=host, port=port)
except ValueError:
return r.connect(host=server, timeout=max(0.1, retry_wait))
return r.connect(host=server)
except Exception as e:
self.last_error[server] = time.time()
self.logger.warn(
'will keep trying after failure connecting to '
'rethinkdb server at %s: %s (sleeping for %s sec)',
@ -135,6 +136,24 @@ class Rethinker(object):
time.sleep(retry_wait)
retry_wait = min(retry_wait * 2, 10.0)
# https://en.wikipedia.org/wiki/Penalty_(ice_hockey)#Major_penalty
PENALTY_BOX_TIME = 300
def _server_whitelist(self):
'''
Returns list of servers that have not errored in the last five minutes.
If all servers have errored in the last five minutes, returns list with
one item, the server that errored least recently.
'''
whitelist = []
for server in self.servers:
if (server not in self.last_error
or self.last_error[server] < time.time() - self.PENALTY_BOX_TIME):
whitelist.append(server)
if not whitelist:
whitelist.append(sorted(
self.last_error.items(), key=lambda kv: kv[1])[0][0])
return whitelist
def wrap(self, delegate):
if isinstance(delegate, (types.FunctionType, types.MethodType)):
def wrapper(*args, **kwargs):