From 148b5f366d3425290a084cf055a51e8922fb8a4c Mon Sep 17 00:00:00 2001 From: brianheineman Date: Sun, 4 Aug 2024 12:50:32 -0600 Subject: [PATCH] docs: add PortalCorp example for pgvector --- Cargo.lock | 52 +++++-- Cargo.toml | 1 + examples/portal_corp_extension/Cargo.toml | 17 +++ examples/portal_corp_extension/src/main.rs | 130 ++++++++++++++++++ .../Cargo.toml | 2 +- .../src/main.rs | 10 +- 6 files changed, 192 insertions(+), 20 deletions(-) create mode 100644 examples/portal_corp_extension/Cargo.toml create mode 100644 examples/portal_corp_extension/src/main.rs rename examples/{vector_extension => tensor_chord_extension}/Cargo.toml (93%) rename examples/{vector_extension => tensor_chord_extension}/src/main.rs (96%) diff --git a/Cargo.lock b/Cargo.lock index fe255b3..4ddf5c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1511,6 +1511,15 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "pgvector" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0e8871b6d7ca78348c6cd29b911b94851f3429f0cd403130ca17f26c1fb91a6" +dependencies = [ + "sqlx", +] + [[package]] name = "phf" version = "0.11.2" @@ -1595,6 +1604,21 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "portal_corp_extension" +version = "0.15.0" +dependencies = [ + "anyhow", + "indoc", + "pgvector", + "postgresql_embedded", + "postgresql_extensions", + "sqlx", + "tokio", + "tracing", + "tracing-subscriber", +] + [[package]] name = "postgres" version = "0.19.8" @@ -2600,6 +2624,20 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "tensor_chord_extension" +version = "0.15.0" +dependencies = [ + "anyhow", + "indoc", + "postgresql_embedded", + "postgresql_extensions", + "sqlx", + "tokio", + "tracing", + "tracing-subscriber", +] + [[package]] name = "test-log" version = "0.2.16" @@ -2965,20 +3003,6 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" -[[package]] -name = "vector_extension" -version = "0.15.0" -dependencies = [ - "anyhow", - "indoc", - "postgresql_embedded", - "postgresql_extensions", - "sqlx", - "tokio", - "tracing", - "tracing-subscriber", -] - [[package]] name = "version_check" version = "0.9.5" diff --git a/Cargo.toml b/Cargo.toml index 92217f4..0fe44da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ human_bytes = { version = "0.4.3", default-features = false } indoc = "2.0.5" md-5 = "0.10.6" num-format = "0.4.4" +pgvector = "0.4.0" quick-xml = "0.36.1" rand = "0.8.5" regex = "1.10.6" diff --git a/examples/portal_corp_extension/Cargo.toml b/examples/portal_corp_extension/Cargo.toml new file mode 100644 index 0000000..d3f920c --- /dev/null +++ b/examples/portal_corp_extension/Cargo.toml @@ -0,0 +1,17 @@ +[package] +edition.workspace = true +name = "portal_corp_extension" +publish = false +license.workspace = true +version.workspace = true + +[dependencies] +anyhow = { workspace = true } +indoc = { workspace = true } +pgvector = { workspace = true, features = ["sqlx"] } +postgresql_embedded = { path = "../../postgresql_embedded" } +postgresql_extensions = { path = "../../postgresql_extensions" } +sqlx = { workspace = true, features = ["runtime-tokio"] } +tracing = { workspace = true } +tracing-subscriber = { workspace = true } +tokio = { workspace = true, features = ["full"] } diff --git a/examples/portal_corp_extension/src/main.rs b/examples/portal_corp_extension/src/main.rs new file mode 100644 index 0000000..0599f55 --- /dev/null +++ b/examples/portal_corp_extension/src/main.rs @@ -0,0 +1,130 @@ +#![forbid(unsafe_code)] +#![deny(clippy::pedantic)] + +use anyhow::Result; +use indoc::indoc; +use pgvector::Vector; +use sqlx::{PgPool, Row}; +use tracing::info; + +use postgresql_embedded::{PostgreSQL, Settings, VersionReq}; + +/// Example of how to install and configure the portal corp pgvector extension. +/// +/// See: +#[tokio::main] +async fn main() -> Result<()> { + tracing_subscriber::fmt().compact().init(); + + info!("Installing PostgreSQL"); + let settings = Settings { + version: VersionReq::parse("=16.3.0")?, + ..Default::default() + }; + let mut postgresql = PostgreSQL::new(settings); + postgresql.setup().await?; + + info!("Installing the vector extension from PortalCorp"); + postgresql_extensions::install( + postgresql.settings(), + "portal-corp", + "pgvector_compiled", + &VersionReq::parse("=0.16.12")?, + ) + .await?; + + info!("Starting PostgreSQL"); + postgresql.start().await?; + + let database_name = "vector-example"; + info!("Creating database {database_name}"); + postgresql.create_database(database_name).await?; + + info!("Configuring extension"); + let settings = postgresql.settings(); + let database_url = settings.url(database_name); + let pool = PgPool::connect(database_url.as_str()).await?; + // configure_extension(&pool).await?; + pool.close().await; + + info!("Restarting database"); + postgresql.stop().await?; + postgresql.start().await?; + + info!("Enabling extension"); + let pool = PgPool::connect(database_url.as_str()).await?; + enable_extension(&pool).await?; + + info!("Creating table"); + create_table(&pool).await?; + + info!("Creating data"); + create_data(&pool).await?; + + info!("Get the nearest neighbors by L2 distance"); + execute_query( + &pool, + "SELECT * FROM items ORDER BY embedding <-> '[3,1,2]' LIMIT 5", + ) + .await?; + + info!("Stopping database"); + postgresql.stop().await?; + Ok(()) +} + +async fn enable_extension(pool: &PgPool) -> Result<()> { + sqlx::query("DROP EXTENSION IF EXISTS vector") + .execute(pool) + .await?; + sqlx::query("CREATE EXTENSION IF NOT EXISTS vector") + .execute(pool) + .await?; + Ok(()) +} + +async fn create_table(pool: &PgPool) -> Result<()> { + sqlx::query(indoc! {" + CREATE TABLE IF NOT EXISTS items ( + id bigserial PRIMARY KEY, + embedding vector(3) NOT NULL + ) + "}) + .execute(pool) + .await?; + Ok(()) +} + +async fn create_data(pool: &PgPool) -> Result<()> { + sqlx::query(indoc! {" + INSERT INTO items (embedding) + VALUES + ('[1,2,3]'), + ('[4,5,6]') + "}) + .execute(pool) + .await?; + Ok(()) +} + +async fn execute_query(pool: &PgPool, query: &str) -> Result<()> { + info!("Query: {query}"); + let rows = sqlx::query(query).fetch_all(pool).await?; + for row in rows { + let id: i64 = row.try_get("id")?; + let embedding: Vector = row.try_get("embedding")?; + info!("ID: {id}, Embedding: {embedding:?}"); + } + Ok(()) +} + +#[cfg(target_os = "linux")] +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_main() -> Result<()> { + main() + } +} diff --git a/examples/vector_extension/Cargo.toml b/examples/tensor_chord_extension/Cargo.toml similarity index 93% rename from examples/vector_extension/Cargo.toml rename to examples/tensor_chord_extension/Cargo.toml index 3af7994..b22cde8 100644 --- a/examples/vector_extension/Cargo.toml +++ b/examples/tensor_chord_extension/Cargo.toml @@ -1,6 +1,6 @@ [package] edition.workspace = true -name = "vector_extension" +name = "tensor_chord_extension" publish = false license.workspace = true version.workspace = true diff --git a/examples/vector_extension/src/main.rs b/examples/tensor_chord_extension/src/main.rs similarity index 96% rename from examples/vector_extension/src/main.rs rename to examples/tensor_chord_extension/src/main.rs index acf261e..6153e70 100644 --- a/examples/vector_extension/src/main.rs +++ b/examples/tensor_chord_extension/src/main.rs @@ -8,7 +8,7 @@ use tracing::info; use postgresql_embedded::{PostgreSQL, Settings, VersionReq}; -/// Example of how to install and configure the vector extension. +/// Example of how to install and configure the TensorChord vector extension. /// /// See: #[tokio::main] @@ -63,21 +63,21 @@ async fn main() -> Result<()> { info!("Squared Euclidean Distance"); execute_query( &pool, - "SELECT '[1, 2, 3]'::vector <-> '[3, 2, 1]'::vector AS value;", + "SELECT '[1, 2, 3]'::vector <-> '[3, 2, 1]'::vector AS value", ) .await?; info!("Negative Dot Product"); execute_query( &pool, - "SELECT '[1, 2, 3]'::vector <#> '[3, 2, 1]'::vector AS value;", + "SELECT '[1, 2, 3]'::vector <#> '[3, 2, 1]'::vector AS value", ) .await?; info!("Cosine Distance"); execute_query( &pool, - "SELECT '[1, 2, 3]'::vector <=> '[3, 2, 1]'::vector AS value;", + "SELECT '[1, 2, 3]'::vector <=> '[3, 2, 1]'::vector AS value", ) .await?; @@ -100,7 +100,7 @@ async fn enable_extension(pool: &PgPool) -> Result<()> { sqlx::query("DROP EXTENSION IF EXISTS vectors") .execute(pool) .await?; - sqlx::query("CREATE EXTENSION vectors") + sqlx::query("CREATE EXTENSION IF NOT EXISTS vectors") .execute(pool) .await?; Ok(())