使用分区表,提升全文搜索的效率。
落絮的所有查询请求都是按时间降序排序的,然而全文索引并不包含时间,使得数据库需要将所有匹配关键字的数据进行排序再返回。
使用分区表按年索引数据,对于近期有大量匹配的查询,将只需要查询最近一年的数据,会快很多。对于匹配很少的查询,我们逐年查询,会比较慢。
在 database
节指定最早支持的年份(必须拥有对应的分区表),默认为2016年。。
建立新的分区数据表(可以按需求多建立几年的分表):
create table messages_p (
group_id bigint not null references tg_groups (group_id),
msgid bigint not null,
from_user bigint,
from_user_name text not null,
text text not null,
created_at timestamp with time zone not null,
updated_at timestamp with time zone
) PARTITION BY RANGE (created_at);
CREATE TABLE messages_y2016 PARTITION OF messages_p
FOR VALUES FROM ('2016-01-01') TO ('2017-01-01');
CREATE TABLE messages_y2017 PARTITION OF messages_p
FOR VALUES FROM ('2017-01-01') TO ('2018-01-01');
CREATE TABLE messages_y2018 PARTITION OF messages_p
FOR VALUES FROM ('2018-01-01') TO ('2019-01-01');
CREATE TABLE messages_y2019 PARTITION OF messages_p
FOR VALUES FROM ('2019-01-01') TO ('2020-01-01');
CREATE TABLE messages_y2020 PARTITION OF messages_p
FOR VALUES FROM ('2020-01-01') TO ('2021-01-01');
CREATE TABLE messages_y2021 PARTITION OF messages_p
FOR VALUES FROM ('2021-01-01') TO ('2022-01-01');
CREATE TABLE messages_y2022 PARTITION OF messages_p
FOR VALUES FROM ('2022-01-01') TO ('2023-01-01');
CREATE TABLE messages_y2023 PARTITION OF messages_p
FOR VALUES FROM ('2023-01-01') TO ('2024-01-01');
导入数据:
insert into messages_p (group_id, msgid, from_user, from_user_name, text, created_at, updated_at)
select group_id, msgid, from_user, from_user_name, text, created_at, updated_at from messages;
删除旧表并重命名新表:
DROP TABLE messages;
ALTER TABLE messages_p RENAME TO messages;
创建索引:
create unique index messages_msgid_idx on messages (group_id, msgid, created_at DESC);
CREATE INDEX message_idx ON messages USING pgroonga (text) WITH (tokenizer='TokenNgram("report_source_location", true, "loose_blank", true)');
创建触发器:
CREATE TRIGGER table_updated AFTER INSERT
ON messages FOR EACH ROW EXECUTE PROCEDURE update_usernames();