I have implemented full-text search in my Rails app using PostgreSQL's built-in mechanism. I've created the following migration in order to ensure that my index is updated after each insert/update:
class AddFulltextIndexToTracks < ActiveRecord::Migration
def up
add_column :tracks, :search_vector, 'tsvector'
execute <<-SQL
CREATE INDEX tracks_search_idx
ON tracks
USING gin(search_vector);
SQL
execute <<-SQL
DROP TRIGGER IF EXISTS tracks_search_vector_update ON tracks;
SQL
execute <<-SQL
CREATE OR REPLACE FUNCTION tracks_search_vector_update_callback() RETURNS TRIGGER AS $$
DECLARE
new_tsvector tsvector;
BEGIN
IF TG_OP = 'INSERT' THEN
SELECT INTO new_tsvector
p_search.document
FROM (SELECT
NEW.id AS track_id,
to_tsvector('simple', coalesce(unaccent(NEW.title), '')) ||
to_tsvector('simple', coalesce(unaccent(NEW.original_file), '')) ||
to_tsvector('simple', coalesce(unaccent(NEW.metadata->'title'), '')) ||
to_tsvector('simple', coalesce(unaccent(NEW.metadata->'artist'), '')) ||
to_tsvector('simple', coalesce(unaccent(NEW.metadata->'album'), '')) ||
to_tsvector('simple', coalesce(string_agg(unaccent(track_tags.name), ' '), '')) as document
FROM tracks
LEFT OUTER JOIN track_tag_associations ON NEW.id = track_tag_associations.track_id
LEFT OUTER JOIN track_tags ON track_tag_associations.track_tag_id = track_tags.id
GROUP BY NEW.id) p_search
WHERE track_id = NEW.id;
new.search_vector = new_tsvector;
END IF;
IF TG_OP = 'UPDATE' THEN
SELECT INTO new_tsvector
p_search.document
FROM (SELECT
NEW.id AS track_id,
to_tsvector('simple', coalesce(unaccent(NEW.title), '')) ||
to_tsvector('simple', coalesce(unaccent(NEW.original_file), '')) ||
to_tsvector('simple', coalesce(unaccent(NEW.metadata->'title'), '')) ||
to_tsvector('simple', coalesce(unaccent(NEW.metadata->'artist'), '')) ||
to_tsvector('simple', coalesce(unaccent(NEW.metadata->'album'), '')) ||
to_tsvector('simple', coalesce(string_agg(unaccent(track_tags.name), ' '), '')) as document
FROM tracks
LEFT OUTER JOIN track_tag_associations ON NEW.id = track_tag_associations.track_id
LEFT OUTER JOIN track_tags ON track_tag_associations.track_tag_id = track_tags.id
GROUP BY NEW.id) p_search
WHERE track_id = NEW.id;
new.search_vector = new_tsvector;
END IF;
RETURN NEW;
END
$$ LANGUAGE 'plpgsql';
SQL
execute <<-SQL
CREATE TRIGGER tracks_search_vector_update
BEFORE INSERT OR UPDATE
ON tracks
FOR EACH ROW EXECUTE PROCEDURE
tracks_search_vector_update_callback();
SQL
Track.find_each { |r| r.touch }
end
def down
execute <<-SQL
DROP TRIGGER IF EXISTS tracks_search_vector_update on tracks;
SQL
execute <<-SQL
DROP FUNCTION IF EXISTS tracks_search_vector_update_callback();
SQL
remove_column :tracks, :search_vector, 'tsvector'
end
end
It works well in development/production.
The problem is that the trigger is not called when I am running specs (RSpec-based, Database Cleaner is used to cleanup the DB).
This is a full log of spec that makes an attempt to use full-text search index:
ActiveRecord::SchemaMigration Load (0.5ms) SELECT "schema_migrations".* FROM "schema_migrations"
(3.2ms) ALTER TABLE "schema_migrations" DISABLE TRIGGER ALL;ALTER TABLE "editors" DISABLE TRIGGER ALL;ALTER TABLE "editor_authentications" DISABLE TRIGGER ALL;ALTER TABLE "dashboard_wallpapers" DISABLE TRIGGER ALL;ALTER TABLE "plans" DISABLE TRIGGER ALL;ALTER TABLE "editor_channel_roles" DISABLE TRIGGER ALL;ALTER TABLE "track_tags_hierarchies" DISABLE TRIGGER ALL;ALTER TABLE "track_tags" DISABLE TRIGGER ALL;ALTER TABLE "channels" DISABLE TRIGGER ALL;ALTER TABLE "servers" DISABLE TRIGGER ALL;ALTER TABLE "track_tag_associations" DISABLE TRIGGER ALL;ALTER TABLE "track_broadcasts" DISABLE TRIGGER ALL;ALTER TABLE "track_uploads" DISABLE TRIGGER ALL;ALTER TABLE "tracks" DISABLE TRIGGER ALL
(2.0ms) select table_name from information_schema.views where table_schema = 'radiokitwebapp_test'
(28.2ms) TRUNCATE TABLE "editors", "editor_authentications", "dashboard_wallpapers", "plans", "editor_channel_roles", "track_tags_hierarchies", "track_tags", "channels", "servers", "track_tag_associations", "track_broadcasts", "track_uploads", "tracks" RESTART IDENTITY CASCADE;
(2.9ms) ALTER TABLE "schema_migrations" ENABLE TRIGGER ALL;ALTER TABLE "editors" ENABLE TRIGGER ALL;ALTER TABLE "editor_authentications" ENABLE TRIGGER ALL;ALTER TABLE "dashboard_wallpapers" ENABLE TRIGGER ALL;ALTER TABLE "plans" ENABLE TRIGGER ALL;ALTER TABLE "editor_channel_roles" ENABLE TRIGGER ALL;ALTER TABLE "track_tags_hierarchies" ENABLE TRIGGER ALL;ALTER TABLE "track_tags" ENABLE TRIGGER ALL;ALTER TABLE "channels" ENABLE TRIGGER ALL;ALTER TABLE "servers" ENABLE TRIGGER ALL;ALTER TABLE "track_tag_associations" ENABLE TRIGGER ALL;ALTER TABLE "track_broadcasts" ENABLE TRIGGER ALL;ALTER TABLE "track_uploads" ENABLE TRIGGER ALL;ALTER TABLE "tracks" ENABLE TRIGGER ALL
(0.1ms) BEGIN
Plan Exists (0.7ms) SELECT 1 AS one FROM "plans" WHERE "plans"."identifier" = 'small' LIMIT 1
SQL (0.6ms) INSERT INTO "plans" ("archive_capacity", "default", "drive_capacity", "identifier") VALUES ($1, $2, $3, $4) RETURNING "id" [["archive_capacity", 26843545600], ["default", "t"], ["drive_capacity", 10737418240], ["identifier", "small"]]
(0.1ms) COMMIT
(0.1ms) BEGIN
SQL (0.6ms) INSERT INTO "servers" ("address", "created_at", "name", "updated_at") VALUES ($1, $2, $3, $4) RETURNING "id" [["address", "254.199.64.7"], ["created_at", "2014-05-08 23:14:21.580465"], ["name", "server1"], ["updated_at", "2014-05-08 23:14:21.580465"]]
(0.1ms) COMMIT
(0.1ms) BEGIN
Plan Load (0.3ms) SELECT "plans".* FROM "plans" WHERE "plans"."id" = $1 LIMIT 1 [["id", 0]]
Plan Load (0.6ms) SELECT "plans".* FROM "plans" WHERE "plans"."default" = 't' ORDER BY "plans"."id" ASC LIMIT 1
SQL (1.0ms) INSERT INTO "channels" ("created_at", "name", "plan_id", "server_id", "updated_at") VALUES ($1, $2, $3, $4, $5) RETURNING "id" [["created_at", "2014-05-08 23:14:21.602073"], ["name", "eum"], ["plan_id", 1], ["server_id", 1], ["updated_at", "2014-05-08 23:14:21.602073"]]
SQL (0.3ms) UPDATE "servers" SET "channels_count" = COALESCE("channels_count", 0) + 1 WHERE "servers"."id" = 1
SQL (1.1ms) INSERT INTO "track_tags" ("channel_id", "created_at", "updated_at") VALUES ($1, $2, $3) RETURNING "id" [["channel_id", 1], ["created_at", "2014-05-08 23:14:21.641002"], ["updated_at", "2014-05-08 23:14:21.641002"]]
(0.3ms) SELECT pg_try_advisory_lock(2085799232), 1399590861.6440396
SQL (0.5ms) INSERT INTO "track_tags_hierarchies" ("ancestor_id", "descendant_id", "generations") VALUES ($1, $2, $3) [["ancestor_id", 1], ["descendant_id", 1], ["generations", 0]]
TrackTag Load (0.5ms) SELECT "track_tags".* FROM "track_tags" WHERE "track_tags"."parent_id" = $1 ORDER BY name [["parent_id", 1]]
(0.4ms) SELECT pg_advisory_unlock(2085799232), 1399590861.6652837
(0.3ms) COMMIT
TrackTag Load (0.4ms) SELECT "track_tags".* FROM "track_tags" WHERE "track_tags"."channel_id" = $1 ORDER BY "track_tags"."id" ASC LIMIT 1 [["channel_id", 1]]
(0.1ms) BEGIN
Channel Load (0.4ms) SELECT "channels".* FROM "channels" WHERE "channels"."id" = $1 LIMIT 1 [["id", 1]]
TrackTag Exists (0.3ms) SELECT 1 AS one FROM "track_tags" WHERE ("track_tags"."name" = 'taggy' AND "track_tags"."parent_id" = 1) LIMIT 1
SQL (0.3ms) INSERT INTO "track_tags" ("channel_id", "created_at", "name", "parent_id", "updated_at") VALUES ($1, $2, $3, $4, $5) RETURNING "id" [["channel_id", 1], ["created_at", "2014-05-08 23:14:21.678220"], ["name", "taggy"], ["parent_id", 1], ["updated_at", "2014-05-08 23:14:21.678220"]]
(0.2ms) SELECT pg_try_advisory_lock(2085799232), 1399590861.6800814
SQL (0.2ms) INSERT INTO "track_tags_hierarchies" ("ancestor_id", "descendant_id", "generations") VALUES ($1, $2, $3) [["ancestor_id", 2], ["descendant_id", 2], ["generations", 0]]
(0.4ms) INSERT INTO "track_tags_hierarchies"
(ancestor_id, descendant_id, generations)
SELECT x.ancestor_id, 2, x.generations + 1
FROM "track_tags_hierarchies" x
WHERE x.descendant_id = 1
TrackTag Load (0.3ms) SELECT "track_tags".* FROM "track_tags" WHERE "track_tags"."parent_id" = $1 ORDER BY name [["parent_id", 2]]
(0.1ms) SELECT pg_advisory_unlock(2085799232), 1399590861.6850104
(0.1ms) COMMIT
(0.2ms) BEGIN
SQL (4.8ms) INSERT INTO "tracks" ("channel_id", "created_at", "metadata", "original_file", "original_file_mime_type", "original_file_size", "title", "updated_at") VALUES ($1, $2, $3, $4, $5, $6, $7, $8) RETURNING "id" [["channel_id", 1], ["created_at", "2014-05-08 23:14:21.710895"], ["metadata", ""], ["original_file", "sine-stereo-440hz-0point8-1sec-128kbit-cbr.mp3"], ["original_file_mime_type", "audio/mpeg"], ["original_file_size", 17135], ["title", "sine-stereo-440hz-0point8-1sec-128kbit-cbr"], ["updated_at", "2014-05-08 23:14:21.710895"]]
SQL (0.8ms) UPDATE "channels" SET drive_usage = drive_usage + 17135 WHERE "channels"."id" = 1
(0.2ms) COMMIT
(0.1ms) BEGIN
SQL (1.1ms) INSERT INTO "track_tag_associations" ("created_at", "track_id", "track_tag_id", "updated_at") VALUES ($1, $2, $3, $4) RETURNING "id" [["created_at", "2014-05-08 23:14:21.733305"], ["track_id", 1], ["track_tag_id", 2], ["updated_at", "2014-05-08 23:14:21.733305"]]
SQL (2.3ms) UPDATE "tracks" SET "updated_at" = '2014-05-08 23:14:21.739704' WHERE "tracks"."id" = 1
(0.1ms) COMMIT
SQL (1.1ms) UPDATE "tracks" SET "duration" = NULL, "metadata" = '', "analysis_state" = 0 WHERE "tracks"."id" = 1
(0.6ms) SELECT COUNT(*) FROM "tracks" WHERE (tracks.search_vector @@ to_tsquery('simple', 'abc+def') )
(1.8ms) ALTER TABLE "schema_migrations" DISABLE TRIGGER ALL;ALTER TABLE "editors" DISABLE TRIGGER ALL;ALTER TABLE "editor_authentications" DISABLE TRIGGER ALL;ALTER TABLE "dashboard_wallpapers" DISABLE TRIGGER ALL;ALTER TABLE "plans" DISABLE TRIGGER ALL;ALTER TABLE "editor_channel_roles" DISABLE TRIGGER ALL;ALTER TABLE "track_tags_hierarchies" DISABLE TRIGGER ALL;ALTER TABLE "track_tags" DISABLE TRIGGER ALL;ALTER TABLE "channels" DISABLE TRIGGER ALL;ALTER TABLE "servers" DISABLE TRIGGER ALL;ALTER TABLE "track_tag_associations" DISABLE TRIGGER ALL;ALTER TABLE "track_broadcasts" DISABLE TRIGGER ALL;ALTER TABLE "track_uploads" DISABLE TRIGGER ALL;ALTER TABLE "tracks" DISABLE TRIGGER ALL
(0.6ms) DELETE FROM "editors";
(0.3ms) DELETE FROM "editor_authentications";
(0.2ms) DELETE FROM "dashboard_wallpapers";
(0.2ms) DELETE FROM "plans";
(0.3ms) DELETE FROM "editor_channel_roles";
(0.1ms) DELETE FROM "track_tags_hierarchies";
(0.2ms) DELETE FROM "track_tags";
(0.2ms) DELETE FROM "channels";
(0.2ms) DELETE FROM "servers";
(0.2ms) DELETE FROM "track_tag_associations";
(0.5ms) DELETE FROM "track_broadcasts";
(0.4ms) DELETE FROM "track_uploads";
(0.2ms) DELETE FROM "tracks";
(1.5ms) ALTER TABLE "schema_migrations" ENABLE TRIGGER ALL;ALTER TABLE "editors" ENABLE TRIGGER ALL;ALTER TABLE "editor_authentications" ENABLE TRIGGER ALL;ALTER TABLE "dashboard_wallpapers" ENABLE TRIGGER ALL;ALTER TABLE "plans" ENABLE TRIGGER ALL;ALTER TABLE "editor_channel_roles" ENABLE TRIGGER ALL;ALTER TABLE "track_tags_hierarchies" ENABLE TRIGGER ALL;ALTER TABLE "track_tags" ENABLE TRIGGER ALL;ALTER TABLE "channels" ENABLE TRIGGER ALL;ALTER TABLE "servers" ENABLE TRIGGER ALL;ALTER TABLE "track_tag_associations" ENABLE TRIGGER ALL;ALTER TABLE "track_broadcasts" ENABLE TRIGGER ALL;ALTER TABLE "track_uploads" ENABLE TRIGGER ALL;ALTER TABLE "tracks" ENABLE TRIGGER ALL
While triggers are initially disabled, due to Database Cleaner activity, they are enabled again before the actual test runs.
In development/production after creating/updating a record, Track#search_vector contains data. In test it is always nil.
What am I doing wrong?
Be sure to use a :deletion
strategy (DatabaseCleaner.clean_with(:deletion)
and DatabaseCleaner.strategy = :deletion
). If you use :truncate
, my guess is that it will basically recreate the table, and in doing so won't re-build the table trigger.
See Postgresql Truncation speed
A TRUNCATE gives you a completely new table and indexes as if they were just CREATEed. It's like you deleted all the records, reindexed the table and did a vacuum full.
I hope this works for you - works for me.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With