I have been playing around with SQLAlchemy and found out that I cannot track reliably what is being changed within database.
I have created an example that explains what my concern is:
import re
import datetime
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import (
declarative_base,
declared_attr,
)
from sqlalchemy import (
create_engine,
event,
Column,
Boolean,
Integer,
String,
Unicode,
DateTime,
Index,
ForeignKey,
CheckConstraint,
)
from sqlalchemy.orm import (
scoped_session,
sessionmaker,
Session,
relationship,
backref,
)
import transaction
from zope.sqlalchemy import ZopeTransactionExtension
class ExtendedSession(Session):
my_var = None
DBSession = scoped_session(
sessionmaker(extension=ZopeTransactionExtension(),
class_=ExtendedSession
)
)
class BaseModel(object):
query = DBSession.query_property()
id = Column(
Integer,
primary_key=True,
)
@declared_attr
def __tablename__(cls):
class_name = re.sub(r"([A-Z])", r"_\1", cls.__name__).lower()[1:]
return "{0}".format(
class_name,
)
Base = declarative_base(cls=BaseModel)
def initialize_sql(engine):
DBSession.configure(bind=engine)
Base.metadata.bind = engine
engine = create_engine("sqlite://")
initialize_sql(engine)
class Parent(Base):
# *** Columns
col1 = Column (
String,
nullable=False,
)
# *** Relationships
# *** Methods
def __repr__(self):
return "<Parent(id: '{0}', col1: '{1}')>".format(
self.id,\
self.col1,\
)
class Child(Base):
# *** Columns
col1 = Column (
String,
nullable=False,
)
parent_id = Column (
Integer,
ForeignKey (
Parent.id,
ondelete="CASCADE",
),
nullable=False,
)
# *** Relationships
parent = relationship (
Parent,
backref=backref(
"child_elements",
uselist=True,
cascade="save-update, delete",
lazy="dynamic",
),
# If below is uncommented then instance of Parent won't appear in session.dirty
# However this relationship will never be loaded (even if needed)
#lazy="noload",
)
# *** Methods
def __repr__(self):
return "<Child(id: '{0}', col1: '{1}', parent_id: '{2}')>".format(
self.id,\
self.col1,\
self.parent_id,\
)
@event.listens_for(DBSession, 'before_flush')
def before_flush(session, flush_context, instances):
time_stamp = datetime.datetime.utcnow()
if session.new:
for elem in session.new:
print(" ### NEW {0}".format(repr(elem)))
if session.dirty:
for elem in session.dirty:
print(" ### DIRTY {0}".format(repr(elem)))
if session.deleted:
for elem in session.deleted:
print(" ### DELETED {0}".format(repr(elem)))
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
with transaction.manager:
parent = Parent(col1="parent")
DBSession.add(parent)
DBSession.flush()
# Below loop is to demonstrate that
# each time child object is created and linked to parent
# parent is also marked as modified
# how to avoid that?
# or optionally is it possible to detect this in before_flush event
# without issuing additional SQL query?
for i in range(0, 10):
parent=Parent.query.filter(Parent.col1 == "parent").first()
child = Child(col1="{0}".format(i))
child.parent = parent
DBSession.add(child)
DBSession.flush()
# Below update will not cause associated instance of Parent appearing in session.dirty
child = Child.query.filter(Child.col1=="3").first()
child.col1="updated"
DBSession.add(child)
DBSession.flush()
In short - there are two objects:
Each time I add new instance of Child and link it with instance of Parent that instance of Parent also appears within session.dirty of before_flush event.
SQLAlchemy community adviced this behavior is expected (although I think there must be an option to change default behavior - I could not find it within doco)
So here is my question: is it possible to configure relationship such way that when I add a new instance of Child and link it to instance of Parent then that instance of Parent won't appear within session.dirty?
I have tried setting relationship as lazy="noload"
and it is not an option since I may need to use that relationship (so I may need to load it)
I would also accept a solution that would allow me to detect that Parent have not been changed within before_load event handler - however I do not want to trigger additional query to achieve this.
I would appreciate your help,
Greg
After hours of research and a hint from SQLAlchemy community I found solution that seems to work the way I need (notice additional condition within session.dirty
block).
@event.listens_for(DBSession, 'before_flush')
def before_flush(session, flush_context, instances):
time_stamp = datetime.datetime.utcnow()
if session.new:
for elem in session.new:
print(" ### NEW {0}".format(repr(elem)))
if session.dirty:
for elem in session.dirty:
# Below check was added to solve the problem
if ( session.is_modified(elem, include_collections=False) ):
print(" ### DIRTY {0}".format(repr(elem)))
if session.deleted:
for elem in session.deleted:
print(" ### DELETED {0}".format(repr(elem)))
The documentation related to my solution can be found here: http://docs.sqlalchemy.org/en/latest/orm/session_api.html#sqlalchemy.orm.session.Session.is_modified
In short - specifying include_collections=False
within session.is_modified
makes SQLAlchemy to ignore situations where multivalued collections have been changed (in my case if child was changed then parent would be filtered out by that additional check).
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With