Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to integrate SQLAlchemy and a subclassed Numpy.ndarray smoothly and in a pythonic way?

I would like to store NumPy arrays with annotations (like name) via SQLAlchemy within a relational database. To do so,

  • I separate the NumPy array from its data via a data transfer object (DTONumpy as part of MyNumpy).
  • NumPy objects are collected with Container.

What would be a nice and pythonic way to modify Container (from the example below) in a way that it provides as a list directly MyNumpy objects instead of DTONumpy which is provided by SQLAlchemy?

Here is an illustration of the problem:

import numpy as np
import zlib

import sqlalchemy as sa
from sqlalchemy.orm import relationship, scoped_session, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.types import TypeDecorator, CHAR

DBSession = scoped_session(sessionmaker())
Base = declarative_base()

#### New SQLAlchemy-Type #####################
class NumpyType (sa.types.TypeDecorator):
    impl = sa.types.LargeBinary

    def process_bind_param(self, value, dialect):
        return zlib.compress(value.dumps(), 9)

    def process_result_value(self, value, dialect):
        return np.loads(zlib.decompress(value))
##############################################


class DTONumpy(Base):
    __tablename__ = 'dtos_numpy'
    id = sa.Column(sa.Integer, primary_key=True)
    amount = sa.Column('amount', NumpyType)
    name = sa.Column('name', sa.String, default='')
    container_id = sa.Column(sa.ForeignKey('containers.id'))

    container_object = relationship(
        "Container",
        uselist=False,
        backref='dto_numpy_objects'
        )

    def __init__(self, amount, name=None):
        self.amount = np.array(amount)
        self.name = name


class Container(Base):
    __tablename__ = 'containers'
    id = sa.Column(sa.Integer, primary_key=True)
    name = sa.Column(sa.String, unique=True)

    # HERE: how to access DTONumpy BUT as MyNumpy objects in a way that MyNumpy
    # is smoothly integrated into SQLAlchemy?


class MyNumpy(np.ndarray):
    _DTO = DTONumpy
    def __new__(cls, amount, name=''):
        dto = cls._DTO(amount=amount, name=name)
        return cls.newByDTO(dto)

    @classmethod
    def newByDTO(cls, dto):
        obj = np.array(dto.amount).view(cls)
        obj.setflags(write=False) # Immutable
        obj._dto = dto
        return obj

    @property
    def name(self):
        return self._dto.name


if __name__ == '__main__':
    engine = sa.create_engine('sqlite:///:memory:', echo=True)
    DBSession.configure(bind=engine)
    Base.metadata.create_all(engine)
    session = DBSession()

    mn1 = MyNumpy ([1,2,3], "good data")
    mn2 = MyNumpy ([2,3,4], "bad data")

    # Save MyNumpy objects
    c1 = Container()
    c1.name = "Test-Container"
    c1.dto_numpy_objects += [mn1._dto, mn2._dto] # not a good ui
    session.add(c1)
    session.commit()

    # Load MyNumpy objects
    c2 = session.query(Container).filter_by(name="Test-Container").first()
    # Ugly UI:
    mn3 = MyNumpy.newByDTO(c2.dto_numpy_objects[0])
    mn4 = MyNumpy.newByDTO(c2.dto_numpy_objects[1])
    name3 = mn3._dto.name
    name4 = mn4._dto.name

Container should now provide a list of MyNumpy objects and MyNumpy a reference to the according Container object (the list and the reference would have to take the SQLAlchemy mapping into account):

type (c2.my_numpy_objects[0]) == MyNumpy
>>> True
c2.my_numpy_objects.append(MyNumpy ([7,2,5,6], "new data")
print c2.dto_numpy_objects[-1].name
>>> "new data"
like image 776
Philipp der Rautenberg Avatar asked Jan 20 '12 11:01

Philipp der Rautenberg


1 Answers

Using the ListView-answer from that question, I came up with the following solution:

First, modify Container by adding a ListView-property on top of the SQLAlchemy-property dto_numpy_objects:

  def __init__(self, name):
    self.name = name
    """
    At this point, the following code doesn't work:
    ---------------------
    self.my_numpies = ListView(
        self.dto_numpy_objects, # see `DTO_Numpy.container_object`
        MyNumpy.newByDTO,
        MyNumpy.getDTO)
    ---------------------
    SQLAlchemy seems to change the `dto_numypy_object`-object after the
    init-call. Thus, `my_numpies._data` doesn't reference `dto_numpy_objects`
    anymore. One solution is to implement a property that initalizes `ListView`
    on first access. See below, property `Container.my_numpies`.
    """

  @property
  def my_numpies(self):
    if not hasattr(self, '_my_numpies'):
      # The following part can not be exe
      self._my_numpies = ListView(
          self.dto_numpy_objects, # see `DTO_Numpy.container_object`
          MyNumpy.newByDTO,
          MyNumpy.getDTO)

    return self._my_numpies

Second, add method getDTO which can be used as new2raw-converter MyNumpy:

  def getDTO(self):
    return self._dto

In order to use the backref container_object also from MyNumpy implement it as a wrapper by adding the following method:

  def __getattr__(self, attr):
    return getattr(self._dto, attr)

All together, the code looks like this:

import numpy as np
import zlib

import sqlalchemy as sa
from sqlalchemy.orm import relationship, scoped_session, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.types import TypeDecorator, CHAR

DBSession = scoped_session(sessionmaker())
Base = declarative_base()


class ListView(list):
  def __init__(self, raw_list, raw2new, new2raw):
    self._data = raw_list
    self.converters = {'raw2new': raw2new,
        'new2raw': new2raw}

  def __repr__(self):
    repr_list = [self.converters['raw2new'](item) for item in self._data]
    repr_str = "["
    for element in repr_list:
      repr_str += element.__repr__() + ",\n "
    repr_str = repr_str[:-3] + "]"
    return repr_str

  def append(self, item):
    self._data.append(self.converters['new2raw'](item))

  def pop(self, index):
    self._data.pop(index)

  def __getitem__(self, index):
    return self.converters['raw2new'](self._data[index])

  def __setitem__(self, key, value):
    self._data.__setitem__(key, self.converters['new2raw'](value))

  def __delitem__(self, key):
    return self._data.__delitem__(key)

  def __getslice__(self, i, j):
    return ListView(self._data.__getslice__(i,j), **self.converters)

  def __contains__(self, item):
    return self._data.__contains__(self.converters['new2raw'](item))

  def __add__(self, other_list_view):
    assert self.converters == other_list_view.converters
    return ListView(
        self._data + other_list_view._data,
        **self.converters)

  def __len__(self):
    return len(self._data)

  def __iter__(self):
    return iter([self.converters['raw2new'](item) for item in self._data])

  def __eq__(self, other):
    return self._data == other._data


#### New SQLAlchemy-Type #####################
class NumpyType (sa.types.TypeDecorator):
  impl = sa.types.LargeBinary

  def process_bind_param(self, value, dialect):
    return zlib.compress(value.dumps(), 9)

  def process_result_value(self, value, dialect):
    return np.loads(zlib.decompress(value))
##############################################


class DTONumpy(Base):
  __tablename__ = 'dtos_numpy'
  id = sa.Column(sa.Integer, primary_key=True)
  amount = sa.Column('amount', NumpyType)
  name = sa.Column('name', sa.String, default='')
  container_id = sa.Column(sa.ForeignKey('containers.id'))

  container_object = relationship(
      "Container",
      uselist=False,
      backref='dto_numpy_objects'
      )

  def __init__(self, amount, name=None):
    self.amount = np.array(amount)
    self.name = name

  def reprInitParams(self):
    return "(%r, %r)" %(self.amount, self.name)

  def __repr__(self):
    return "%s%s" %(
        self.__class__.__name__,
        self.reprInitParams())


class Container(Base):
  __tablename__ = 'containers'
  id = sa.Column(sa.Integer, primary_key=True)
  name = sa.Column(sa.String, unique=True)

  def __init__(self, name):
    self.name = name
    super(Container, self).__init__()

  @property
  def my_numpies(self):
    if not hasattr(self, '_my_numpies'):
      # The following part can not be exe
      self._my_numpies = ListView(
          self.dto_numpy_objects, # see `DTO_Numpy.container_object`
          MyNumpy.newByDTO,
          MyNumpy.getDTO)

    return self._my_numpies


class MyNumpy(np.ndarray):
  _DTO = DTONumpy
  def __new__(cls, amount, name=''):
    dto = cls._DTO(amount=amount, name=name)
    return cls.newByDTO(dto)

  @classmethod
  def newByDTO(cls, dto):
    obj = np.array(dto.amount).view(cls)
    obj.setflags(write=False) # Immutable
    obj._dto = dto
    return obj

  @property
  def name(self):
    return self._dto.name

  def getDTO(self):
    return self._dto

  def __getattr__(self, attr):
    return getattr(self._dto, attr)

  def __repr__(self):
    return "%s%s" %(
        self.__class__.__name__,
        self._dto.reprInitParams())


if __name__ == '__main__':
  engine = sa.create_engine('sqlite:///:memory:', echo=True)
  DBSession.configure(bind=engine)
  Base.metadata.create_all(engine)
  session = DBSession()

  mn1 = MyNumpy ([1,2,3], "good data")
  mn2 = MyNumpy ([2,3,4], "bad data")

  # Save MyNumpy-Objects
  c1 = Container("Test-Container")
  c1.my_numpies.append(mn1)
  c1.my_numpies.append(mn2)
  session.add(c1)
  session.commit()

  # Load MyNumpy-Objects
  c2 = session.query(Container).filter_by(name="Test-Container").first()
  mn3 = c1.my_numpies[0]
  mn4 = c1.my_numpies[1]

For better representation I added

  • DTONumpy.reprInitParams
  • DTONumpy.__repr__
  • MyNumpy.__repr__

One thing that still doesn't work:

  c1.my_numpies += [mn1, mn2.dto]
like image 122
Philipp der Rautenberg Avatar answered Oct 16 '22 15:10

Philipp der Rautenberg