From 238d1c713de3ca983e8f6066af6b9080c9b0e088 Mon Sep 17 00:00:00 2001 From: avi tal Date: Thu, 24 Oct 2024 09:09:51 +0300 Subject: [PATCH] Fixes #974 - returning score driven by cosine similarity by (1 - distance) instead of distance (#1048) Fixing the returned value of cosine similarity by (1 - distance) Co-authored-by: avi.tal --- vectorstores/pgvector/pgvector.go | 2 +- vectorstores/pgvector/pgvector_test.go | 45 ++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/vectorstores/pgvector/pgvector.go b/vectorstores/pgvector/pgvector.go index f8910de3a..7c8becb54 100644 --- a/vectorstores/pgvector/pgvector.go +++ b/vectorstores/pgvector/pgvector.go @@ -318,7 +318,7 @@ func (s Store) SimilaritySearch( SELECT data.document, data.cmetadata, - data.distance + (1 - data.distance) AS score FROM ( SELECT filtered_embedding_dims.*, diff --git a/vectorstores/pgvector/pgvector_test.go b/vectorstores/pgvector/pgvector_test.go index 112e45226..7e50f283b 100644 --- a/vectorstores/pgvector/pgvector_test.go +++ b/vectorstores/pgvector/pgvector_test.go @@ -179,6 +179,51 @@ func TestPgvectorStoreRestWithScoreThreshold(t *testing.T) { require.Len(t, docs, 10) } +func TestPgvectorStoreSimilarityScore(t *testing.T) { + t.Parallel() + pgvectorURL := preCheckEnvSetting(t) + ctx := context.Background() + + llm, err := openai.New( + openai.WithEmbeddingModel("text-embedding-ada-002"), + ) + require.NoError(t, err) + e, err := embeddings.NewEmbedder(llm) + require.NoError(t, err) + + conn, err := pgx.Connect(ctx, pgvectorURL) + require.NoError(t, err) + + store, err := pgvector.New( + ctx, + pgvector.WithConn(conn), + pgvector.WithEmbedder(e), + pgvector.WithPreDeleteCollection(true), + pgvector.WithCollectionName(makeNewCollectionName()), + ) + require.NoError(t, err) + + defer cleanupTestArtifacts(ctx, t, store, pgvectorURL) + + _, err = store.AddDocuments(context.Background(), []schema.Document{ + {PageContent: "Tokyo is the capital city of Japan."}, + {PageContent: "Paris is the city of love."}, + {PageContent: "I like to visit London."}, + }) + require.NoError(t, err) + + // test with a score threshold of 0.8, expected 6 documents + docs, err := store.SimilaritySearch( + ctx, + "What is the capital city of Japan?", + 3, + vectorstores.WithScoreThreshold(0.8), + ) + require.NoError(t, err) + require.Len(t, docs, 1) + require.True(t, docs[0].Score > 0.9) +} + func TestSimilaritySearchWithInvalidScoreThreshold(t *testing.T) { t.Parallel() pgvectorURL := preCheckEnvSetting(t)