From fda21992dac8dde144e8280c53e9c9843a76b375 Mon Sep 17 00:00:00 2001 From: joschne Date: Thu, 20 Jun 2024 17:18:56 +0200 Subject: [PATCH] add logic to update community statements with cron job --- ...13085447-pgwar-community-statements-up.sql | 243 +++++++++++++++++- database/postgres/light.Dockerfile | 2 +- ...ted-full-texts-in-subjects-of-stmt-del.sql | 6 +- ...utdated-full-texts-in-subjects-of-stmt.sql | 6 +- ...date-community-statements-from-deletes.sql | 77 ++++++ ...date-community-statements-from-upserts.sql | 55 ++++ 6 files changed, 378 insertions(+), 11 deletions(-) create mode 100644 database/test/units/functions/pgwar-update-community-statements-from-deletes.sql create mode 100644 database/test/units/functions/pgwar-update-community-statements-from-upserts.sql diff --git a/database/migrations/sqls/20240613085447-pgwar-community-statements-up.sql b/database/migrations/sqls/20240613085447-pgwar-community-statements-up.sql index 0c7930f55..4257d2455 100644 --- a/database/migrations/sqls/20240613085447-pgwar-community-statements-up.sql +++ b/database/migrations/sqls/20240613085447-pgwar-community-statements-up.sql @@ -31,8 +31,8 @@ CREATE TABLE IF NOT EXISTS pgwar.community_statements_deleted( CREATE OR REPLACE FUNCTION pgwar.handle_community_statements_delete() RETURNS TRIGGER AS $$ BEGIN - -- Insert or update the deleted row in pgwar.community_statements - INSERT INTO pgwar.community_statements (pk_entity, fk_subject_info, fk_property, fk_object_info, object_value, tmsp_deletion) + -- Insert or update the deleted row in pgwar.community_statements_deleted + INSERT INTO pgwar.community_statements_deleted (pk_entity, fk_subject_info, fk_property, fk_object_info, object_value, tmsp_deletion) VALUES (OLD.pk_entity, OLD.fk_subject_info, OLD.fk_property, OLD.fk_object_info, OLD.object_value, CURRENT_TIMESTAMP) ON CONFLICT (pk_entity) DO UPDATE SET @@ -40,7 +40,7 @@ BEGIN fk_property = EXCLUDED.fk_property, fk_object_info = EXCLUDED.fk_object_info, object_value = EXCLUDED.object_value, - tmsp_deletion = EXCLUDED.tmsp_deletion; + tmsp_deletion = CURRENT_TIMESTAMP; RETURN OLD; END; @@ -104,4 +104,239 @@ SELECT fk_object_info, object_value, tmsp_deletion -FROM pgwar.project_statements_deleted; \ No newline at end of file +FROM pgwar.project_statements_deleted; + + +------ Table pgwar.offsets ------------------------------------------------------------------ +--------------------------------------------------------------------------------------------- +CREATE TABLE IF NOT EXISTS pgwar.offsets( + job_name text, + offset_tmsp timestamp with time zone, + PRIMARY KEY (job_name) +); + +CREATE INDEX IF NOT EXISTS project_statements_tmsp_last_modification_idx +ON pgwar.project_statements USING btree +(tmsp_last_modification ASC NULLS LAST); + +------ Function to update community statements from inserted or updated project statements -- +--------------------------------------------------------------------------------------------- +CREATE OR REPLACE FUNCTION pgwar.update_community_statements_from_upserts() +RETURNS void AS $$ +DECLARE + _job_name text; +BEGIN + _job_name := 'update-community-statements-from-upserts'; + + -- initialize offset, if needed + IF NOT EXISTS( + SELECT offset_tmsp + FROM pgwar.offsets + WHERE job_name = _job_name + ) THEN + INSERT INTO pgwar.offsets (job_name, offset_tmsp) + VALUES (_job_name, '2000-01-01 00:00:00.000000+00'); + END IF; + + -- get current offset + WITH _offset AS ( + SELECT offset_tmsp + FROM pgwar.offsets + WHERE job_name = _job_name + ), + -- identify updated project statements + upserted_p_stmts AS ( + SELECT pk_entity, + max(tmsp_last_modification) new_offset_tmsp + FROM ( + SELECT pk_entity, tmsp_last_modification + FROM + pgwar.project_statements, + _offset + WHERE tmsp_last_modification > _offset.offset_tmsp + ORDER BY tmsp_last_modification ASC + ) AS modified + GROUP BY pk_entity + ), + insert_community_statements AS ( + -- insert or update community statements + INSERT INTO pgwar.community_statements ( + pk_entity, + fk_subject_info, + fk_property, + fk_object_info, + fk_object_tables_cell, + ord_num_of_domain, + ord_num_of_range, + object_label, + object_value, + tmsp_last_modification + ) + SELECT + p_stmt.pk_entity, + p_stmt.fk_subject_info, + p_stmt.fk_property, + p_stmt.fk_object_info, + p_stmt.fk_object_tables_cell, + avg(p_stmt.ord_num_of_domain) AS ord_num_of_domain, + avg(p_stmt.ord_num_of_range) AS ord_num_of_range, + p_stmt.object_label, + p_stmt.object_value, + upserted_p_stmts.new_offset_tmsp AS tmsp_last_modification + FROM pgwar.project_statements p_stmt, + upserted_p_stmts + WHERE p_stmt.pk_entity = upserted_p_stmts.pk_entity + GROUP BY + p_stmt.pk_entity, + p_stmt.fk_subject_info, + p_stmt.fk_property, + p_stmt.fk_object_info, + p_stmt.fk_object_tables_cell, + p_stmt.object_label, + p_stmt.object_value, + upserted_p_stmts.new_offset_tmsp + ON CONFLICT (pk_entity) + DO UPDATE SET + fk_subject_info = EXCLUDED.fk_subject_info, + fk_property = EXCLUDED.fk_property, + fk_object_info = EXCLUDED.fk_object_info, + fk_object_tables_cell = EXCLUDED.fk_object_tables_cell, + ord_num_of_domain = EXCLUDED.ord_num_of_domain, + ord_num_of_range = EXCLUDED.ord_num_of_range, + object_label = EXCLUDED.object_label, + object_value = EXCLUDED.object_value, + tmsp_last_modification = EXCLUDED.tmsp_last_modification + -- return the tmsp_last_modification which equals new_offset_tmsp + RETURNING tmsp_last_modification + ) + -- set the offset + UPDATE pgwar.offsets + SET offset_tmsp = new_offset.tmsp_last_modification + FROM ( + SELECT tmsp_last_modification + FROM insert_community_statements + LIMIT 1 + ) new_offset + WHERE job_name = _job_name; + +END; +$$ LANGUAGE plpgsql; + + +CREATE INDEX IF NOT EXISTS project_statements_deleted_tmsp_deletion_idx +ON pgwar.project_statements_deleted USING btree +(tmsp_deletion ASC NULLS LAST); + +------ Function to update community statements from deleted project statements -- +--------------------------------------------------------------------------------------------- +CREATE OR REPLACE FUNCTION pgwar.update_community_statements_from_deletes() +RETURNS void AS $$ +DECLARE + _job_name text; +BEGIN + _job_name := 'update-community-statements-from-deletes'; + + -- initialize offset, if needed + IF NOT EXISTS( + SELECT offset_tmsp + FROM pgwar.offsets + WHERE job_name = _job_name + ) THEN + INSERT INTO pgwar.offsets (job_name, offset_tmsp) + VALUES (_job_name, '2000-01-01 00:00:00.000000+00'); + END IF; + + -- get current offset + WITH _offset AS ( + SELECT offset_tmsp + FROM pgwar.offsets + WHERE job_name = _job_name + ), + -- identify updated project statements + deleted_p_stmts AS ( + SELECT + pk_entity, + max(tmsp_deletion) new_offset_tmsp + FROM ( + SELECT pk_entity, tmsp_deletion + FROM + pgwar.project_statements_deleted, + _offset + WHERE tmsp_deletion > _offset.offset_tmsp + ORDER BY tmsp_deletion ASC + ) AS modified + GROUP BY pk_entity + ), + insert_community_statements AS ( + -- insert or update community statements + INSERT INTO pgwar.community_statements ( + pk_entity, + fk_subject_info, + fk_property, + fk_object_info, + fk_object_tables_cell, + ord_num_of_domain, + ord_num_of_range, + object_label, + object_value, + tmsp_last_modification + ) + SELECT + p_stmt.pk_entity, + p_stmt.fk_subject_info, + p_stmt.fk_property, + p_stmt.fk_object_info, + p_stmt.fk_object_tables_cell, + avg(p_stmt.ord_num_of_domain) AS ord_num_of_domain, + avg(p_stmt.ord_num_of_range) AS ord_num_of_range, + p_stmt.object_label, + p_stmt.object_value, + deleted_p_stmts.new_offset_tmsp AS tmsp_last_modification + FROM pgwar.project_statements p_stmt, + deleted_p_stmts + WHERE p_stmt.pk_entity = deleted_p_stmts.pk_entity + GROUP BY + p_stmt.pk_entity, + p_stmt.fk_subject_info, + p_stmt.fk_property, + p_stmt.fk_object_info, + p_stmt.fk_object_tables_cell, + p_stmt.object_label, + p_stmt.object_value, + deleted_p_stmts.new_offset_tmsp + ON CONFLICT (pk_entity) + DO UPDATE SET + fk_subject_info = EXCLUDED.fk_subject_info, + fk_property = EXCLUDED.fk_property, + fk_object_info = EXCLUDED.fk_object_info, + fk_object_tables_cell = EXCLUDED.fk_object_tables_cell, + ord_num_of_domain = EXCLUDED.ord_num_of_domain, + ord_num_of_range = EXCLUDED.ord_num_of_range, + object_label = EXCLUDED.object_label, + object_value = EXCLUDED.object_value, + tmsp_last_modification = EXCLUDED.tmsp_last_modification + -- return the tmsp_last_modification which equals new_offset_tmsp + RETURNING tmsp_last_modification + ), + delete_community_statements AS ( + DELETE FROM pgwar.community_statements + WHERE pk_entity IN (SELECT pk_entity FROM deleted_p_stmts) + AND pk_entity NOT IN ( + SELECT DISTINCT ps.pk_entity + FROM pgwar.project_statements ps, + deleted_p_stmts d + WHERE ps.pk_entity = d.pk_entity + ) + ) + -- set the offset + UPDATE pgwar.offsets + SET offset_tmsp = new_offset.tmsp_last_modification + FROM ( + SELECT tmsp_last_modification + FROM insert_community_statements + LIMIT 1 + ) new_offset + WHERE job_name = _job_name; + +END; +$$ LANGUAGE plpgsql; \ No newline at end of file diff --git a/database/postgres/light.Dockerfile b/database/postgres/light.Dockerfile index a5a9246d1..9658ec470 100644 --- a/database/postgres/light.Dockerfile +++ b/database/postgres/light.Dockerfile @@ -1,4 +1,4 @@ -FROM ghcr.io/geovistory/toolbox-dev-db:s-2 +FROM ghcr.io/geovistory/toolbox-dev-db:s-3 RUN rm -Rf /logs RUN mkdir /logs diff --git a/database/test/units/functions/pgwar-get-outdated-full-texts-in-subjects-of-stmt-del.sql b/database/test/units/functions/pgwar-get-outdated-full-texts-in-subjects-of-stmt-del.sql index 5ad25b6ff..299e3ade2 100644 --- a/database/test/units/functions/pgwar-get-outdated-full-texts-in-subjects-of-stmt-del.sql +++ b/database/test/units/functions/pgwar-get-outdated-full-texts-in-subjects-of-stmt-del.sql @@ -71,10 +71,10 @@ VALUES (4, 44, 31, 522, 11, NULL), SELECT is( count(*)::int, - 3, - 'Assert the limit of 3 is respected' + 1, + 'Assert the limit of 1 is respected' ) -FROM pgwar.get_outdated_full_texts_in_subjects_of_stmt_del(3); +FROM pgwar.get_outdated_full_texts_in_subjects_of_stmt_del(1); SELECT * FROM finish(); diff --git a/database/test/units/functions/pgwar-get-outdated-full-texts-in-subjects-of-stmt.sql b/database/test/units/functions/pgwar-get-outdated-full-texts-in-subjects-of-stmt.sql index e72d0a318..19489f9ec 100644 --- a/database/test/units/functions/pgwar-get-outdated-full-texts-in-subjects-of-stmt.sql +++ b/database/test/units/functions/pgwar-get-outdated-full-texts-in-subjects-of-stmt.sql @@ -71,10 +71,10 @@ VALUES (4, 44, 31, 522, 11, NULL), SELECT is( count(*)::int, - 3, - 'Assert the limit of 3 is respected' + 1, + 'Assert the limit of 1 is respected' ) -FROM pgwar.get_outdated_full_texts_in_subjects_of_stmt(3); +FROM pgwar.get_outdated_full_texts_in_subjects_of_stmt(1); SELECT * FROM finish(); diff --git a/database/test/units/functions/pgwar-update-community-statements-from-deletes.sql b/database/test/units/functions/pgwar-update-community-statements-from-deletes.sql new file mode 100644 index 000000000..59d89aead --- /dev/null +++ b/database/test/units/functions/pgwar-update-community-statements-from-deletes.sql @@ -0,0 +1,77 @@ +BEGIN; + +SELECT plan(3); + +INSERT INTO pgwar.community_statements ( + pk_entity, + fk_subject_info, + fk_property, + fk_object_info, + ord_num_of_domain, + ord_num_of_range, + object_label + ) +VALUES + (1, 99, 22, 11, 0, 0, 'LABEL'), + (2, 66, 22, 11, 4, 3, 'LABEL'), + (3, 44, 22, 11, 4, 3, 'LABEL'); + + +INSERT INTO pgwar.project_statements ( + pk_entity, + fk_project, + fk_subject_info, + fk_property, + fk_object_info, + ord_num_of_domain, + ord_num_of_range, + object_label + ) +VALUES + (1, 31, 99, 22, 11, 4, 3, 'LABEL'), + (1, 32, 99, 22, 11, 8, 1, 'LABEL'), + (2, 31, 66, 22, 11, 4, 3, 'LABEL'), + (2, 32, 66, 22, 11, 4, 3, 'LABEL'); + + +INSERT INTO pgwar.project_statements_deleted ( + pk_entity, + fk_project, + fk_subject_info, + fk_property, + fk_object_info, + tmsp_deletion + ) +VALUES + (1, 8765, 99, 22, 11, CURRENT_TIMESTAMP), + (3, 8765, 44, 22, 11, CURRENT_TIMESTAMP); + + +-- run the update task +SELECT pgwar.update_community_statements_from_deletes(); + +SELECT is( + ord_num_of_domain, + 6::numeric, + 'Assert the ord nums are recalculated for community statement with pk_entity 1' +) +FROM pgwar.community_statements +WHERE pk_entity = 1; + +SELECT is_empty( + 'SELECT * FROM pgwar.community_statements WHERE pk_entity = 3;', + 'Assert community statement with pk_entity 3 is deleted' +); + +SELECT is( + offset_tmsp, + CURRENT_TIMESTAMP, + 'Assert that offset is updated' +) +FROM pgwar.offsets +WHERE job_name = 'update-community-statements-from-deletes'; + +SELECT * +FROM finish(); + +ROLLBACK; \ No newline at end of file diff --git a/database/test/units/functions/pgwar-update-community-statements-from-upserts.sql b/database/test/units/functions/pgwar-update-community-statements-from-upserts.sql new file mode 100644 index 000000000..a34299168 --- /dev/null +++ b/database/test/units/functions/pgwar-update-community-statements-from-upserts.sql @@ -0,0 +1,55 @@ +BEGIN; + +SELECT plan(3); + + +INSERT INTO pgwar.project_statements ( + pk_entity, + fk_project, + fk_subject_info, + fk_property, + fk_object_info, + ord_num_of_domain, + ord_num_of_range, + object_label + ) +VALUES + (1, 31, 99, 22, 11, 4, 3, 'LABEL'), + (1, 32, 99, 22, 11, 8, 1, 'LABEL'), + (2, 31, 66, 22, 11, 4, 3, 'LABEL'), + (2, 32, 66, 22, 11, 4, 3, 'LABEL'), + (3, 31, 44, 22, 11, 4, 3, 'LABEL'), + (3, 32, 44, 22, 11, 4, 3, 'LABEL'), + (3, 33, 44, 22, 11, 4, 3, 'LABEL'); + +-- run the update task +SELECT pgwar.update_community_statements_from_upserts(); + +SELECT is( + count(*), + 3::bigint, + 'Assert that 3 community statments were created' +) +FROM pgwar.community_statements; + +SELECT is( + ord_num_of_domain, + 6::numeric, + 'Assert that ord_num_of_domain is correctly calulated' +) +FROM pgwar.community_statements +WHERE pk_entity = 1; + + +SELECT is( + offset_tmsp, + CURRENT_TIMESTAMP, + 'Assert that offset is updated' +) +FROM pgwar.offsets +WHERE job_name = 'update-community-statements-from-upserts'; + +SELECT * +FROM finish(); + +ROLLBACK; \ No newline at end of file