Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Make parent object not appearing within session.dirty of before_flush event listener

I have been playing around with SQLAlchemy and found out that I cannot track reliably what is being changed within database.

I have created an example that explains what my concern is:

import re
import datetime

from sqlalchemy import create_engine

from sqlalchemy.ext.declarative import (
    declarative_base,
    declared_attr,
    )

from sqlalchemy import (
    create_engine,
    event,
    Column,
    Boolean,
    Integer,
    String,
    Unicode,
    DateTime,
    Index,
    ForeignKey,
    CheckConstraint,
    )

from sqlalchemy.orm import (
    scoped_session,
    sessionmaker,
    Session,
    relationship,
    backref,
    )

import transaction

from zope.sqlalchemy import ZopeTransactionExtension

class ExtendedSession(Session):
    my_var = None

DBSession = scoped_session(
    sessionmaker(extension=ZopeTransactionExtension(),
        class_=ExtendedSession
        )
    )

class BaseModel(object):
    query = DBSession.query_property()

    id = Column(
        Integer,
        primary_key=True,
        )

    @declared_attr
    def __tablename__(cls):
        class_name = re.sub(r"([A-Z])", r"_\1", cls.__name__).lower()[1:]
        return "{0}".format(
            class_name,
            )

Base = declarative_base(cls=BaseModel)

def initialize_sql(engine):
    DBSession.configure(bind=engine)
    Base.metadata.bind = engine

engine = create_engine("sqlite://")
initialize_sql(engine)

class Parent(Base):
    # *** Columns
    col1 = Column (
        String,
        nullable=False,
        )
    # *** Relationships
    # *** Methods
    def __repr__(self):
        return "<Parent(id: '{0}', col1: '{1}')>".format(
            self.id,\
            self.col1,\
            )

class Child(Base):
    # *** Columns
    col1 = Column (
        String,
        nullable=False,
        )
    parent_id = Column (
        Integer,
        ForeignKey (
            Parent.id,
            ondelete="CASCADE",
            ),
        nullable=False,
        )
    # *** Relationships
    parent = relationship (
        Parent,
        backref=backref(
            "child_elements",
            uselist=True,
            cascade="save-update, delete",
            lazy="dynamic",
            ),
        # If below is uncommented then instance of Parent won't appear in session.dirty
        # However this relationship will never be loaded (even if needed)
        #lazy="noload",
        )
    # *** Methods
    def __repr__(self):
        return "<Child(id: '{0}', col1: '{1}', parent_id: '{2}')>".format(
            self.id,\
            self.col1,\
            self.parent_id,\
            )

@event.listens_for(DBSession, 'before_flush')
def before_flush(session, flush_context, instances):
    time_stamp = datetime.datetime.utcnow()

    if session.new:
        for elem in session.new:
            print(" ### NEW {0}".format(repr(elem)))

    if session.dirty:
        for elem in session.dirty:
            print(" ### DIRTY {0}".format(repr(elem)))

    if session.deleted:
        for elem in session.deleted:
            print(" ### DELETED {0}".format(repr(elem)))

Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)

with transaction.manager:
    parent = Parent(col1="parent")
    DBSession.add(parent)
    DBSession.flush()

    # Below loop is to demonstrate that
    # each time child object is created and linked to parent
    # parent is also marked as modified
    # how to avoid that?
    # or optionally is it possible to detect this in before_flush event
    # without issuing additional SQL query?
    for i in range(0, 10):
        parent=Parent.query.filter(Parent.col1 == "parent").first()
        child = Child(col1="{0}".format(i))
        child.parent = parent
        DBSession.add(child)
        DBSession.flush()

    # Below update will not cause associated instance of Parent appearing in session.dirty
    child = Child.query.filter(Child.col1=="3").first()
    child.col1="updated"
    DBSession.add(child)
    DBSession.flush()

In short - there are two objects:

  • Parent
  • Child - linked to Parent

Each time I add new instance of Child and link it with instance of Parent that instance of Parent also appears within session.dirty of before_flush event.

SQLAlchemy community adviced this behavior is expected (although I think there must be an option to change default behavior - I could not find it within doco)

So here is my question: is it possible to configure relationship such way that when I add a new instance of Child and link it to instance of Parent then that instance of Parent won't appear within session.dirty?

I have tried setting relationship as lazy="noload" and it is not an option since I may need to use that relationship (so I may need to load it)

I would also accept a solution that would allow me to detect that Parent have not been changed within before_load event handler - however I do not want to trigger additional query to achieve this.

I would appreciate your help,

Greg

like image 739
Greg0ry Avatar asked Sep 28 '22 00:09

Greg0ry


1 Answers

After hours of research and a hint from SQLAlchemy community I found solution that seems to work the way I need (notice additional condition within session.dirty block).

@event.listens_for(DBSession, 'before_flush')
def before_flush(session, flush_context, instances):
    time_stamp = datetime.datetime.utcnow()

    if session.new:
        for elem in session.new:
            print(" ### NEW {0}".format(repr(elem)))

    if session.dirty:
        for elem in session.dirty:
            # Below check was added to solve the problem
            if ( session.is_modified(elem, include_collections=False) ):
                print(" ### DIRTY {0}".format(repr(elem)))

    if session.deleted:
        for elem in session.deleted:
            print(" ### DELETED {0}".format(repr(elem)))

The documentation related to my solution can be found here: http://docs.sqlalchemy.org/en/latest/orm/session_api.html#sqlalchemy.orm.session.Session.is_modified

In short - specifying include_collections=False within session.is_modified makes SQLAlchemy to ignore situations where multivalued collections have been changed (in my case if child was changed then parent would be filtered out by that additional check).

like image 92
Greg0ry Avatar answered Oct 07 '22 21:10

Greg0ry