mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
minimize impact of down server
The last approach was not good, timeout of 0.1 seconds was too short. A bunch of stuff has to happen in the timeout period inside of rethinkdb.connect(). It doesn't offer a way to set only the socket timeout. Even a timeout of 0.5 seconds results in a noticeable error rate. The new approach is to put a server in the penalty box for 5 minutes when it errors. While the server is in the penalty box, we don't try to connect to it, unless all the servers are in the penalty box, in which case we try the server that errored least recently.
This commit is contained in:
parent
72d6e4d39b
commit
71221dbe54
@ -113,21 +113,22 @@ class Rethinker(object):
|
||||
else:
|
||||
self.servers = servers
|
||||
self.dbname = db
|
||||
self.last_error = {} # {server: time}
|
||||
|
||||
# https://github.com/rethinkdb/rethinkdb-example-webpy-blog/blob/master/model.py
|
||||
# "Best practices: Managing connections: a connection per request"
|
||||
def _random_server_connection(self):
|
||||
retry_wait = 0.01
|
||||
while True:
|
||||
server = random.choice(self.servers)
|
||||
server = random.choice(self._server_whitelist())
|
||||
try:
|
||||
try:
|
||||
host, port = server.split(':')
|
||||
return r.connect(
|
||||
host=host, port=port, timeout=max(0.1, retry_wait))
|
||||
return r.connect(host=host, port=port)
|
||||
except ValueError:
|
||||
return r.connect(host=server, timeout=max(0.1, retry_wait))
|
||||
return r.connect(host=server)
|
||||
except Exception as e:
|
||||
self.last_error[server] = time.time()
|
||||
self.logger.warn(
|
||||
'will keep trying after failure connecting to '
|
||||
'rethinkdb server at %s: %s (sleeping for %s sec)',
|
||||
@ -135,6 +136,24 @@ class Rethinker(object):
|
||||
time.sleep(retry_wait)
|
||||
retry_wait = min(retry_wait * 2, 10.0)
|
||||
|
||||
# https://en.wikipedia.org/wiki/Penalty_(ice_hockey)#Major_penalty
|
||||
PENALTY_BOX_TIME = 300
|
||||
def _server_whitelist(self):
|
||||
'''
|
||||
Returns list of servers that have not errored in the last five minutes.
|
||||
If all servers have errored in the last five minutes, returns list with
|
||||
one item, the server that errored least recently.
|
||||
'''
|
||||
whitelist = []
|
||||
for server in self.servers:
|
||||
if (server not in self.last_error
|
||||
or self.last_error[server] < time.time() - self.PENALTY_BOX_TIME):
|
||||
whitelist.append(server)
|
||||
if not whitelist:
|
||||
whitelist.append(sorted(
|
||||
self.last_error.items(), key=lambda kv: kv[1])[0][0])
|
||||
return whitelist
|
||||
|
||||
def wrap(self, delegate):
|
||||
if isinstance(delegate, (types.FunctionType, types.MethodType)):
|
||||
def wrapper(*args, **kwargs):
|
||||
|
Loading…
x
Reference in New Issue
Block a user