-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathschema.sql
53 lines (50 loc) · 1.2 KB
/
schema.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
-- RUN 1st: create vector extension if not exists
CREATE EXTENSION IF NOT EXISTS vector;
-- RUN 2nd: create mp table
CREATE TABLE IF NOT EXISTS mp (
id BIGSERIAL PRIMARY KEY,
content_title TEXT,
content_url TEXT,
content_date TEXT,
content TEXT,
content_length BIGINT,
content_tokens BIGINT,
embedding VECTOR(1536)
);
-- RUN 3rd: create mp_search function
CREATE OR REPLACE FUNCTION mp_search (
query_embedding VECTOR(1536),
similarity_threshold FLOAT,
match_count INT
)
RETURNS TABLE (
id BIGINT,
content_title TEXT,
content_url TEXT,
content_date TEXT,
content TEXT,
content_length BIGINT,
content_tokens BIGINT,
similarity FLOAT
) LANGUAGE plpgsql AS $$
BEGIN
RETURN QUERY SELECT
mp.id,
mp.content_title,
mp.content_url,
mp.content_date,
mp.content,
mp.content_length,
mp.content_tokens,
1 - (mp.embedding <=> query_embedding) AS similarity
FROM mp
WHERE 1 - (mp.embedding <=> query_embedding) > similarity_threshold
ORDER BY mp.embedding <=> query_embedding DESC
LIMIT match_count;
END;
$$;
-- RUN 4th: create index for mp table
CREATE INDEX IF NOT EXISTS mp_embedding_index
ON mp
USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);