1
0
mirror of https://github.com/internetarchive/warcprox.git synced 2025-01-18 13:22:09 +01:00

better smarter ORM with more tests

This commit is contained in:
Noah Levitt 2017-02-23 16:07:14 -08:00
parent d76e219e7b
commit abdecc46b8
4 changed files with 242 additions and 73 deletions

@ -1,43 +1,69 @@
.. image:: https://travis-ci.org/nlevitt/rethinkstuff.svg?branch=master
:target: https://travis-ci.org/nlevitt/rethinkstuff
rethinkstuff
============
Rudimentary rethinkdb python library with some smarts (and maybe some
dumbs)
RethinkDB python library. Provides connection manager and ORM framework
(object-relational mapping, sometimes called ODM or OM for nosql databases).
What? Why?
----------
Connection Manager
------------------
As of now there is a very small amount of code here. I had three
projects using the Rethinker class, and had enough code churn inside the
class that it became too painful to keep the three copies in sync. Thus,
a library shared among them.
Three main purposes:
Three main purposes:
- round-robin connections among database servers
- make sure connections close at proper time
- round-robin connections among database servers
- make sure connections close at proper time
- retry retry-able queries on failure
Not really a connection pool, because it doesnt keep any connections
open, but it does take care of connection management.
Not currently a connection pool, because it doesnt keep any connections open.
Should be possible to implement connection pooling without changing the API.
Service Registry
~~~~~~~~~~~~~~~~
Now also has a ServiceRegistry class, a lightweight solution for service
discovery for distributed services. Maintains service info and status in
a rethinkdb table called “services”.
Usage
-----
Usage Example
~~~~~~~~~~~~~
::
import rethinkstuff
r = rethinkstuff.Rethinker(['db0.foo.com', 'db0.foo.com:38015', 'db1.foo.com'], 'my_db')
r.table('my_table').insert({'foo':'bar','baz':2}).run()
for result in r.table('my_table'):
r.table('mytable').insert({'foo':'bar','baz':2}).run()
for result in r.table('mytable'):
print("result={}".format(result))
ORM
---
Simple yet powerful ORM system. *Does not enforce a schema.*
Usage Example
~~~~~~~~~~~~~
::
import rethinkstuff
r = rethinkstuff.Rethinker(['db0.foo.com', 'db0.foo.com:38015', 'db1.foo.com'], 'my_db')
class MyTable(rethinkstuff.Document):
pass
MyTable.table_create()
doc1 = MyTable(r, {'animal': 'elephant', 'size': 'large'})
doc1.save()
doc1_copy = MyTable.get(r, doc1.id)
doc1_copy.food = 'bread'
doc1_copy.save()
doc1.first_name = 'Frankworth'
doc1.save()
doc1.refresh()
Service Registry
----------------
Now also has a ServiceRegistry class, a lightweight solution for service
discovery for distributed services. Maintains service info and status in
a rethinkdb table called “services”.

@ -18,6 +18,7 @@ limitations under the License.
import rethinkdb as r
import logging
import rethinkstuff
class WatchedDict(dict):
def __init__(self, d, callback, field):
@ -119,6 +120,13 @@ def watch(obj, callback, field):
else:
return obj
class classproperty(object):
def __init__(self, fget):
self.fget = fget
def __get__(self, owner_self, owner_cls):
return self.fget(owner_cls)
class Document(dict, object):
'''
Base class for ORM.
@ -134,16 +142,47 @@ class Document(dict, object):
field. For example, if your document starts as {'a': {'b': 'c'}}, then
you run d['a']['x'] = 'y', then the update will replace the whole 'a'
field. Nested field updates get too complicated any other way.
The primary key must be `id`, the rethinkdb default. (XXX we could find out
what the primary key is from the "table_config" system table.)
'''
@classproperty
def table(cls):
'''
Returns default table name, which is the class name, lowercased.
Subclasses can override this default more simply:
class Something(rethinkstuff.Document):
table = 'my_table_name'
'''
return cls.__name__.lower()
@classmethod
def get(cls, rethinker, pk):
'''
Retrieve an instance from the database.
'''
doc = cls(rethinker)
doc[doc.pk_field] = pk
doc.refresh()
return doc
@classmethod
def table_create(cls, rethinker):
'''
Creates the table.
Can be run on an instance of the class: `my_doc.table_create
Subclasses may want to override this method to do more things, such as
creating indexes.
'''
rethinker.table_create(cls.table).run()
def __init__(self, rethinker, d={}):
dict.__setattr__(self, '_r', rethinker)
for k in d:
dict.__setitem__(
self, k, watch(d[k], callback=self._updated, field=k))
dict.__setattr__(self, '_pk', None)
self._clear_updates()
for k in d:
self[k] = watch(d[k], callback=self._updated, field=k)
def _clear_updates(self):
dict.__setattr__(self, '_updates', {})
@ -163,7 +202,7 @@ class Document(dict, object):
if key in self._updates:
del self._updates[key]
# XXX do we need the other stuff like in WatchedDict?
# XXX probably need the other stuff like in WatchedDict
def _updated(self, field):
# callback for all updates
@ -172,47 +211,84 @@ class Document(dict, object):
self._deletes.remove(field)
@property
def table(self):
def pk_field(self):
'''
Name of the rethinkdb table.
Defaults to the name of the class, lowercased. Can be overridden.
Name of the primary key field as retrieved from rethinkdb table
metadata, 'id' by default. Should not be overridden. Override
`table_create` if you want to use a nonstandard field as the primary
key.
'''
return self.__class__.__name__.lower()
if not self._pk:
try:
pk = self._r.db('rethinkdb').table('table_config').filter({
'db': self._r.dbname, 'name': self.table}).get_field(
'primary_key')[0].run()
dict.__setattr__(self, '_pk', pk)
except Exception as e:
raise Exception(
'problem determining primary key for table %s.%s: %s',
self._r.dbname, self.table, e)
return self._pk
def table_create(self):
@property
def pk_value(self):
'''
Creates the table.
Subclasses may want to override this method to do more things, such as
creating indexes.
Value of primary key field.
'''
self._r.table_create(self.table).run()
return getattr(self, self.pk_field)
def insert(self):
result = self._r.table(self.table).insert(self).run()
if 'generated_keys' in result:
dict.__setitem__(self, 'id', result['generated_keys'][0])
self._clear_updates()
def save(self):
'''
Saves
'''
should_insert = False
try:
self.pk_value # raise KeyError if unset
if self._updates:
# r.literal() to replace, not merge with, nested fields
updates = {field: r.literal(self._updates[field])
for field in self._updates}
query = self._r.table(self.table).get(
self.pk_value).update(updates)
result = query.run()
if result['skipped']: # primary key not found
should_insert = True
elif result['errors'] or result['deleted']:
raise Exception(
'unexpected result %s from rethinkdb query %s' % (
result, query))
if not should_insert and self._deletes:
self._r.table(self.table).replace(
r.row.without(self._deletes)).run()
if result['errors']: # primary key not found
should_insert = True
elif not result['replaced'] == 0:
raise Exception(
'unexpected result %s from rethinkdb query %s' % (
result, query))
except KeyError:
should_insert = True
if should_insert:
query = self._r.table(self.table).insert(self)
result = query.run()
if result['inserted'] != 1:
raise Exception(
'unexpected result %s from rethinkdb query %s' % (
result, query))
if 'generated_keys' in result:
dict.__setitem__(
self, self.pk_field, result['generated_keys'][0])
def update(self):
# hmm, masks dict.update()
if self._updates:
# r.literal() to replace, not merge with, nested fields
updates = {
field: r.literal(
self._updates[field]) for field in self._updates}
self._r.table(self.table).get(self.id).update(updates).run()
if self._deletes:
self._r.table(self.table).replace(
r.row.without(self._deletes)).run()
self._clear_updates()
def refresh(self):
'''
Refresh from the database.
'''
d = self._r.table(self.table).get(self.id).run()
d = self._r.table(self.table).get(self.pk_value).run()
if d is None:
raise KeyError
for k in d:
dict.__setitem__(
self, k, watch(d[k], callback=self._updated, field=k))

@ -3,7 +3,7 @@ import codecs
setuptools.setup(
name='rethinkstuff',
version='0.2.0.dev59',
version='0.2.0.dev60',
packages=['rethinkstuff'],
classifiers=[
'Programming Language :: Python :: 2.7',

@ -43,10 +43,14 @@ class RethinkerForTesting(rethinkstuff.Rethinker):
@pytest.fixture(scope="module")
def r():
r = RethinkerForTesting()
result = r.db_create("my_db").run()
try:
r.db_drop("rethinkstuff_test_db").run()
except rethinkdb.errors.ReqlOpFailedError:
pass
result = r.db_create("rethinkstuff_test_db").run()
assert not r.last_conn.is_open()
assert result["dbs_created"] == 1
return RethinkerForTesting(db="my_db")
return RethinkerForTesting(db="rethinkstuff_test_db")
@pytest.fixture(scope="module")
def my_table(r):
@ -275,18 +279,24 @@ def test_utcnow():
## XXX what else can we test without jumping through hoops?
class SomeDoc(rethinkstuff.Document):
pass
def test_orm(r):
class SomeDoc(rethinkstuff.Document):
table = 'some_doc'
SomeDoc.table_create(r)
with pytest.raises(Exception):
SomeDoc.table_create(r)
# test that overriding Document.table works
assert 'some_doc' in r.table_list().run()
assert not 'somedoc' in r.table_list().run()
d = SomeDoc(rethinker=r, d={
'a': 'b',
'c': {'d': 'e'},
'f': ['g', 'h'],
'i': ['j', {'k': 'l'}]})
d.table_create()
d.insert()
d.save()
assert d._updates == {}
d.m = 'n'
@ -355,9 +365,66 @@ def test_orm(r):
'f': ['u', 'v', {'w': 'x', 'y': 'z'}], 'i': 't'}
expected = dict(d)
d.update()
d.save()
assert d._updates == {}
assert d._deletes == set()
d.refresh()
assert d == expected
d_copy = SomeDoc.get(r, d.id)
assert d == d_copy
d['zuh'] = 'toot'
d.save()
assert d != d_copy
d_copy.refresh()
assert d == d_copy
def test_orm_pk(r):
class NonstandardPrimaryKey(rethinkstuff.Document):
@classmethod
def table_create(cls, rethinker):
rethinker.table_create(cls.table, primary_key='not_id').run()
with pytest.raises(Exception):
NonstandardPrimaryKey.get(r, 'no_such_thing')
NonstandardPrimaryKey.table_create(r)
# new empty doc
f = NonstandardPrimaryKey(r, {})
f.save()
assert f.pk_value
assert 'not_id' in f
assert f.not_id == f.pk_value
assert len(f.keys()) == 1
with pytest.raises(KeyError):
NonstandardPrimaryKey.get(r, 'no_such_thing')
# new doc with (only) primary key
d = NonstandardPrimaryKey(r, {'not_id': 1})
assert d.not_id == 1
assert d.pk_value == 1
d.save()
d_copy = NonstandardPrimaryKey.get(r, 1)
assert d == d_copy
# new doc with something in it
e = NonstandardPrimaryKey(r, {'some_field': 'something'})
with pytest.raises(KeyError):
e.not_id
with pytest.raises(KeyError):
e['not_id']
e.save()
assert e.not_id
e_copy = NonstandardPrimaryKey.get(r, e.not_id)
assert e == e_copy
e_copy['blah'] = 'toot'
e_copy.save()
e.refresh()
assert e['blah'] == 'toot'
assert e == e_copy