Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

SQLAlchemy - Adding a ForeignKeyConstraint to a many-to-many table that is based on another relationship

Forgive me if this has been answered elsewhere. I've been searching SO and haven't been able to translate the seemingly relevant Q&As to my scenerio.

I'm working on a fun personal project where I have 4 main schemas (barring relationships for now):

  • Persona (name, bio)
  • Episode (title, plot)
  • Clip (url, timestamp)
  • Image (url)

Restrictions (Basis of Relationships):

  1. A Persona can show up in multiple episodes, as well as multiple clips and images from those episodes (but might not be in all clips/images related to an episode).
  2. An Episode can contain multiple personas, clips, and images.
  3. An Image/Clip can only be related to a single Episode, but can be related to multiple personas.
  4. If a Persona is already assigned to episode(s), then any clip/image assigned to the persona can only be from one of those episodes or (if new) must only be capable of having one of the episodes that the persona appeared in associated to the clip/image.
  5. If an Episode is already assigned persona(s), then any clip/image assigned to the episode must be related to aleast one of those personas or (if new) must only be capable of having one or more of the personas from the episode associated to the clip/image.

I've designed the database structure like so: DB Schema

This generates the following sql:

DROP TABLE IF EXISTS episodes;
DROP TABLE IF EXISTS personas;
DROP TABLE IF EXISTS personas_episodes;
DROP TABLE IF EXISTS clips;
DROP TABLE IF EXISTS personas_clips;
DROP TABLE IF EXISTS images;
DROP TABLE IF EXISTS personas_images;


CREATE TABLE episodes (
id INT NOT NULL PRIMARY KEY,
title VARCHAR(120) NOT NULL UNIQUE,
plot TEXT,
tmdb_id VARCHAR(10) NOT NULL,
tvdb_id VARCHAR(10) NOT NULL,
imdb_id VARCHAR(10) NOT NULL);

CREATE TABLE personas (
id INT NOT NULL PRIMARY KEY,
name VARCHAR(30) NOT NULL,
bio TEXT NOT NULL);

CREATE TABLE personas_episodes (
persona_id INT NOT NULL,
episode_id INT NOT NULL,
PRIMARY KEY (persona_id,episode_id),
FOREIGN KEY(persona_id) REFERENCES personas(id),
FOREIGN KEY(episode_id) REFERENCES episodes(id));

CREATE TABLE clips (
id INT NOT NULL PRIMARY KEY,
title VARCHAR(100) NOT NULL,
timestamp VARCHAR(7) NOT NULL,
link VARCHAR(100) NOT NULL,
episode_id INT NOT NULL,
FOREIGN KEY(episode_id) REFERENCES episodes(id));

CREATE TABLE personas_clips (
clip_id INT NOT NULL,
persona_id INT NOT NULL,
PRIMARY KEY (clip_id,persona_id),
FOREIGN KEY(clip_id) REFERENCES clips(id),
FOREIGN KEY(persona_id) REFERENCES personas(id));

CREATE TABLE images (
id INT NOT NULL PRIMARY KEY,
link VARCHAR(120) NOT NULL UNIQUE,
path VARCHAR(120) NOT NULL UNIQUE,
episode_id INT NOT NULL,
FOREIGN KEY(episode_id) REFERENCES episodes(id));

CREATE TABLE personas_images (
persona_id INT NOT NULL,
image_id INT NOT NULL,
PRIMARY KEY (persona_id,image_id),
FOREIGN KEY(persona_id) REFERENCES personas(id),
FOREIGN KEY(image_id) REFERENCES images(id));

And I've attempted to create the same schema in SQLAchemy models (keeping in mind SQLite for testing, PostgreSQL for production) like so:

# db is a configured Flask-SQLAlchemy instance
from app import db
# Alias common SQLAlchemy names
Column = db.Column
relationship = db.relationship


class PkModel(Model):
    """Base model class that adds a 'primary key' column named ``id``."""
 
    __abstract__ = True
    id = Column(db.Integer, primary_key=True)
 
 
def reference_col(
    tablename, nullable=False, pk_name="id", foreign_key_kwargs=None, column_kwargs=None
):
    """Column that adds primary key foreign key reference.
 
    Usage: ::
 
        category_id = reference_col('category')
        category = relationship('Category', backref='categories')
    """
    foreign_key_kwargs = foreign_key_kwargs or {}
    column_kwargs = column_kwargs or {}
 
    return Column(
        db.ForeignKey(f"{tablename}.{pk_name}", **foreign_key_kwargs),
        nullable=nullable,
        **column_kwargs,
    )

personas_episodes = db.Table(
    "personas_episodes",
    db.Column("persona_id", db.ForeignKey("personas.id"), primary_key=True),
    db.Column("episode_id", db.ForeignKey("episodes.id"), primary_key=True),
)
 
personas_clips = db.Table(
    "personas_clips",
    db.Column("persona_id", db.ForeignKey("personas.id"), primary_key=True),
    db.Column("clip_id", db.ForeignKey("clips.id"), primary_key=True),
)
 
personas_images = db.Table(
    "personas_images",
    db.Column("persona_id", db.ForeignKey("personas.id"), primary_key=True),
    db.Column("image_id", db.ForeignKey("images.id"), primary_key=True),
)
 
 
class Persona(PkModel):
    """One of Roger's personas."""
 
    __tablename__ = "personas"
    name = Column(db.String(80), unique=True, nullable=False)
    bio = Column(db.Text)
    # relationships
    episodes = relationship("Episode", secondary=personas_episodes, back_populates="personas")
    clips = relationship("Clip", secondary=personas_clips, back_populates="personas")
    images = relationship("Image", secondary=personas_images, back_populates="personas")
 
    def __repr__(self):
        """Represent instance as a unique string."""
        return f"<Persona({self.name!r})>"
 
 
class Image(PkModel):
    """An image of one of Roger's personas from an episode of American Dad."""
    
    __tablename__ = "images"
    link = Column(db.String(120), unique=True)
    path = Column(db.String(120), unique=True)
    episode_id = reference_col("episodes")
    # relationships
    personas = relationship("Persona", secondary=personas_images, back_populates="images")
    
 
 
class Episode(PkModel):
    """An episode of American Dad."""
    
    # FIXME: We can add Clips and Images linked to Personas that are not assigned to this episode
 
    __tablename__ = "episodes"
    title = Column(db.String(120), unique=True, nullable=False)
    plot = Column(db.Text)
    tmdb_id = Column(db.String(10))
    tvdb_id = Column(db.String(10))
    imdb_id = Column(db.String(10))
    # relationships
    personas = relationship("Persona", secondary=personas_episodes, back_populates="episodes")
    images = relationship("Image", backref="episode")
    clips = relationship("Clip", backref="episode")
 
    def __repr__(self):
        """Represent instance as a unique string."""
        return f"<Episode({self.title!r})>"
 
 
class Clip(PkModel):
    """A clip from an episode of American Dad that contains one or more of Roger's personas."""
 
    __tablename__ = "clips"
    title = Column(db.String(80), unique=True, nullable=False)
    timestamp = Column(db.String(7), nullable=True)  # 00M:00S
    link = Column(db.String(7), nullable=True)
    episode_id = reference_col("episodes")
    # relationships
    personas = relationship("Persona", secondary=personas_clips, back_populates="clips")

However, notice the FIXME comment. I'm having trouble figuring out how to constrain the many-to-many relationships on personas+images, personas+clips, and personas+episodes in a way that they all look at each other before adding a new entry to restrict the possible additions to the subset of items that meet the criteria of those other many-to-many relationships.

Can someone please provide a solution to ensure the many-to-many relationships respect the episode_id relationship in the parent tables?

Edit to add pseudo model example of expected behavior

# omitting some detail fields for brevity
e1 = Episode(title="Some Episode")
e2 = Episode(title="Another Episode")
p1 = Persona(name="Raider Dave", episodes=[e1])
p2 = Persona(name="Ricky Spanish", episodes=[e2])
c1 = Clip(title="A clip", episode=e1, personas=[p2])  # should fail
i1 = Image(title="An image", episode=e2, personas=[p1]) # should fail
c2 = Clip(title="Another clip", episode=e1, personas=[p1])  # should succeed
i2 = Image(title="Another image", episode=e2, personas=[p2]) # should succeed
like image 954
CaffeinatedMike Avatar asked Oct 02 '21 17:10

CaffeinatedMike


People also ask

How do you create a many-to-many relationship in SQLAlchemy?

You add a tags class variable to the Post model. You use the db. relationship() method, passing it the name of the tags model ( Tag in this case). You pass the post_tag association table to the secondary parameter to establish a many-to-many relationship between posts and tags.

How do you create a one to many relationship in flask-SQLAlchemy?

Flask Flask-SQLAlchemy Relationships: One to Many In order to achieve that we place a Foreign key on the child referencing the parent that is from our example we place a foreign key on Post class to reference the User class. We then use relationship() on the parent which we access via our SQLAlchemy object db .

What is the use of foreign key constraint in MySQL?

SQL FOREIGN KEY Constraint. The FOREIGN KEY constraint is used to prevent actions that would destroy links between tables. A FOREIGN KEY is a field (or collection of fields) in one table, that refers to the PRIMARY KEY in another table. The table with the foreign key is called the child table, and the table with the primary key is called the ...

What is a foreign key in SQL?

A FOREIGN KEY is a field (or collection of fields) in one table that refers to the PRIMARY KEY in another table. ... The table containing the foreign key is called the child table, and the table containing the candidate key is called the referenced or parent table.

How do I add a foreign key to an existing table?

SQL FOREIGN KEY on ALTER TABLE. To create a FOREIGN KEY constraint on the "PersonID" column when the "Orders" table is already created, use the following SQL: MySQL / SQL Server / Oracle / MS Access: ALTER TABLE Orders. ADD FOREIGN KEY (PersonID) REFERENCES Persons(PersonID);

What is SQLAlchemy noforeignkeyserror?

sqlalchemy.exc.NoForeignKeysError: Could not determine join condition between parent/child tables on relationship Author.articles - there are no foreign keys linking these tables via secondary table ‘authorarticle’.


1 Answers

Add:

  • a non-nullable column episode_id,
  • a composite foreign key referencing personas_episode, and
  • a trigger to autofill episode_id.

The non-nullable column and the composite foreign key are sufficient to produce the correct constraints on a database-level as well as ensure that only proper data can be added outside of the SQLAlchemy models.

The trigger is proposed as a result of the lack of support within SQLAlchemy models to intercept before_insert event for Table referenced in relationship.secondary.

Implementation

SQLite doesn't support modifying NEW.episode_id in a BEFORE INSERT trigger, which means we have to autofill in an AFTER INSERT trigger. So, we allow the column to be nullable and add 2 more triggers to check episode_id constraint later.

episode_id_nullable = db.engine.dialect.name == "sqlite"                # Add this

personas_clips = db.Table(
    "personas_clips",
    db.Column("persona_id", db.ForeignKey("personas.id"), primary_key=True),
    db.Column("episode_id", db.Integer, nullable=episode_id_nullable),  # Add this
    db.Column("clip_id", db.ForeignKey("clips.id"), primary_key=True),
    db.ForeignKeyConstraint(["persona_id", "episode_id"], ["personas_episodes.persona_id", "personas_episodes.episode_id"]),  # Add this
)

personas_images = db.Table(
    "personas_images",
    db.Column("persona_id", db.ForeignKey("personas.id"), primary_key=True),
    db.Column("episode_id", db.Integer, nullable=episode_id_nullable),  # Add this
    db.Column("image_id", db.ForeignKey("images.id"), primary_key=True),
    db.ForeignKeyConstraint(["persona_id", "episode_id"], ["personas_episodes.persona_id", "personas_episodes.episode_id"]),  # Add this
)

SQLite triggers:

  1. Before insert, check that the clip_id/image_id references a clip/image in an episode where persona is in (based on persona_episodes).
  2. Before update, check that the episode_id is not set to NULL.
  3. After insert, autofill the episode_id.
SQLITE_CHECK_EPISODE_ID_BEFORE_INSERT = """
CREATE TRIGGER {table_name}_check_episode_id_before_insert BEFORE INSERT ON {table_name}
  FOR EACH ROW
  WHEN NEW.episode_id IS NULL
  BEGIN
    SELECT RAISE(ABORT, 'NOT NULL constraint failed: {table_name}.episode_id') WHERE NOT EXISTS (
        SELECT 1
        FROM {fk_target_table_name}
        JOIN personas_episodes ON {fk_target_table_name}.episode_id = personas_episodes.episode_id
        WHERE {fk_target_table_name}.{fk_target_name} = NEW.{fk_name}
          AND personas_episodes.persona_id = NEW.persona_id
    );
  END;
"""

SQLITE_CHECK_EPISODE_ID_BEFORE_UPDATE = """
CREATE TRIGGER {table_name}_check_episode_id_before_update BEFORE UPDATE ON {table_name}
  FOR EACH ROW
  WHEN NEW.episode_id IS NULL
  BEGIN
    SELECT RAISE(ABORT, 'NOT NULL constraint failed: {table_name}.episode_id');
  END;
"""

SQLITE_AUTOFILL_EPISODE_ID = """
CREATE TRIGGER {table_name}_autofill_episode_id AFTER INSERT ON {table_name}
  FOR EACH ROW
  WHEN NEW.episode_id IS NULL
  BEGIN
    UPDATE {table_name}
    SET episode_id = (SELECT {fk_target_table_name}.episode_id
        FROM {fk_target_table_name}
        JOIN personas_episodes ON {fk_target_table_name}.episode_id = personas_episodes.episode_id
        WHERE {fk_target_table_name}.{fk_target_name} = NEW.{fk_name}
          AND personas_episodes.persona_id = NEW.persona_id)
    WHERE {fk_name} = NEW.{fk_name}
      AND persona_id = NEW.persona_id;
  END;
"""

PostgreSQL trigger:

  1. Before insert, autofill the episode_id.
POSTGRESQL_AUTOFILL_EPISODE_ID = """
CREATE OR REPLACE FUNCTION {table_name}_autofill_episode_id() RETURNS TRIGGER AS ${table_name}_autofill_episode_id$
  DECLARE
    _episode_id INT;
    in_episode BOOL;
  BEGIN
    IF NEW.episode_id IS NULL THEN
        SELECT episode_id INTO _episode_id FROM {fk_target_table_name} WHERE {fk_target_name} = NEW.{fk_name};
        SELECT TRUE INTO in_episode FROM personas_episodes WHERE persona_id = NEW.persona_id AND episode_id = _episode_id;
        IF in_episode IS NOT NULL THEN
            NEW.episode_id = _episode_id;
        END IF;
    END IF;
    RETURN NEW;
  END;
${table_name}_autofill_episode_id$ LANGUAGE plpgsql;

CREATE TRIGGER {table_name}_autofill_episode_id BEFORE INSERT OR UPDATE ON {table_name}
  FOR EACH ROW EXECUTE PROCEDURE {table_name}_autofill_episode_id();
"""

Adding the triggers after_create the tables personas_clips and personas_images:

from sqlalchemy import event, text


def after_create_trigger_autofill_episode_id(target, connection, **kw):
    fk = next(fk for fk in target.foreign_keys if "personas" not in fk.column.table.name)
    if connection.dialect.name == "sqlite":
        connection.execute(text(SQLITE_CHECK_EPISODE_ID_BEFORE_INSERT.format(table_name=target.name, fk_target_table_name=fk.column.table.name, fk_target_name=fk.column.name,fk_name=fk.parent.name)))
        connection.execute(text(SQLITE_CHECK_EPISODE_ID_BEFORE_UPDATE.format(table_name=target.name, fk_target_table_name=fk.column.table.name, fk_target_name=fk.column.name, fk_name=fk.parent.name)))
        connection.execute(text(SQLITE_AUTOFILL_EPISODE_ID.format(table_name=target.name, fk_target_table_name=fk.column.table.name, fk_target_name=fk.column.name, fk_name=fk.parent.name)))
    elif connection.dialect.name == "postgresql":
        connection.execute(text(POSTGRESQL_AUTOFILL_EPISODE_ID.format(table_name=target.name, fk_target_table_name=fk.column.table.name, fk_target_name=fk.column.name, fk_name=fk.parent.name)))


event.listen(personas_clips, "after_create", after_create_trigger_autofill_episode_id)
event.listen(personas_images, "after_create", after_create_trigger_autofill_episode_id)

Test cases

Here's what I have at the moment based on the expected behaviour in the question.

from sqlalchemy.exc import IntegrityError
from sqlalchemy.sql import select

from models import *

if db.engine.dialect.name == "sqlite":
    db.session.execute("pragma foreign_keys=on")
else:
    db.session.execute("""
DROP TABLE IF EXISTS episodes CASCADE;
DROP TABLE IF EXISTS personas CASCADE;
DROP TABLE IF EXISTS personas_episodes CASCADE;
DROP TABLE IF EXISTS clips CASCADE;
DROP TABLE IF EXISTS personas_clips;
DROP TABLE IF EXISTS images CASCADE;
DROP TABLE IF EXISTS personas_images;
""")
    db.session.commit()

db.create_all()

e1 = Episode(title="Some Episode")
e2 = Episode(title="Another Episode")
db.session.add(e1)
db.session.add(e2)
db.session.commit()

p1 = Persona(name="Raider Dave", episodes=[e1])
p2 = Persona(name="Ricky Spanish", episodes=[e2])
db.session.add(p1)
db.session.add(p2)
db.session.commit()

c1 = Clip(title="A clip", episode=e1, personas=[p2])         # should fail
db.session.add(c1)
try:
    db.session.commit()
    assert False
except IntegrityError:
    db.session.rollback()
assert Clip.query.first() is None, list(db.session.execute(select(personas_clips)))

i1 = Image(link="An image", episode=e2, personas=[p1])       # should fail
db.session.add(i1)
try:
    db.session.commit()
    assert False
except IntegrityError:
    db.session.rollback()
assert Image.query.first() is None, list(db.session.execute(select(personas_images)))

c2 = Clip(title="Another clip", episode=e1, personas=[p1])   # should succeed
db.session.add(c2)
db.session.commit()
assert Clip.query.first() is not None

i2 = Image(link="Another image", episode=e2, personas=[p2])  # should succeed
db.session.add(i2)
db.session.commit()
assert Image.query.first() is not None

Alternatives that didn't work out

SQLAlchemy doesn't appear to support before_insert event for Table, only Model.
https://docs.sqlalchemy.org/en/14/orm/events.html#sqlalchemy.orm.MapperEvents.before_insert

I tried using Association Proxy, but could not support c2.personas.remove(p1) cleanly. https://docs.sqlalchemy.org/en/14/orm/extensions/associationproxy.html

like image 107
aaron Avatar answered Nov 15 '22 07:11

aaron