Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ft embedder #9

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@ main
.DS_Store
*.local.*
tmp
scripts
input.txt
ollama/
46 changes: 46 additions & 0 deletions ai/ai.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package ai

import (
"bytes"
"context"
"fmt"
"github.com/tmc/langchaingo/documentloaders"
"github.com/tmc/langchaingo/embeddings"
"github.com/tmc/langchaingo/llms/ollama"
"github.com/tmc/langchaingo/schema"
"github.com/tmc/langchaingo/textsplitter"
)

func GenerateEmbeddings(ctx context.Context, content schema.Document) ([]float32, error) {
ollamaLLM, err := ollama.New(ollama.WithModel("llama3.2"))
if err != nil {
fmt.Println("error declaring Ollama", err)
return nil, err
}
ollamaEmbeder, err := embeddings.NewEmbedder(ollamaLLM)
if err != nil {
fmt.Println("error creating embedding", err)
return nil, err
}
vector, err := ollamaEmbeder.EmbedQuery(ctx, content.PageContent)
if err != nil {
fmt.Println("error creating vector embeddings", err)
return nil, err
}
fmt.Println(len(vector))
return vector, nil
}

func CreateChunks(ctx context.Context, data string, maxChunkSize int) ([]schema.Document, error) {
text := documentloaders.NewText(bytes.NewReader([]byte(data)))
split := textsplitter.NewRecursiveCharacter(
textsplitter.WithChunkSize(maxChunkSize),
textsplitter.WithChunkOverlap(30),
)
docs, err := text.LoadAndSplit(context.Background(), split)
if err != nil {
fmt.Println("Error loading documents:", err)
return nil, err
}
return docs, nil
}
1 change: 1 addition & 0 deletions db/ai.sql.go
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
package db
123 changes: 123 additions & 0 deletions db/embeddings.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 36 additions & 2 deletions db/migrations/20250118135633_init.sql
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ CREATE INDEX IF NOT EXISTS idx_collection_workspace_favorite
CREATE TABLE IF NOT EXISTS collection_notes
(
id BIGSERIAL PRIMARY KEY,
collection_id BIGINT NOT NULL, -- Foreign key to collection table
note_id BIGINT NOT NULL, -- Foreign key to note table
collection_id BIGINT NOT NULL, -- Foreign key to collection table
note_id BIGINT NOT NULL, -- Foreign key to note table
trashed BOOLEAN DEFAULT FALSE, -- Trash flag

CONSTRAINT unique_collection_note_pair UNIQUE (collection_id, note_id)
Expand All @@ -144,6 +144,38 @@ CREATE INDEX IF NOT EXISTS idx_collection_notes_collection
CREATE INDEX IF NOT EXISTS idx_collection_notes_note
ON collection_notes (note_id);

CREATE TABLE embedding_job
(
id BIGSERIAL PRIMARY KEY,
note_id INT NOT NULL,
workspace_id INT NOT NULL,
content TEXT NOT NULL,
status INT DEFAULT 1,
attempts INT DEFAULT 0, -- Track retry attempts
created BIGINT NOT NULL DEFAULT 0,
updated BIGINT NOT NULL DEFAULT 0
);

CREATE INDEX IF NOT EXISTS idx_embedding_job_status
ON embedding_job (status);

CREATE EXTENSION IF NOT EXISTS vector;

CREATE TABLE embedding
(
id BIGSERIAL PRIMARY KEY,
note_id INT NOT NULL,
workspace_id INT NOT NULL,
chunk TEXT,
chunk_id INT GENERATED ALWAYS AS IDENTITY,
embedding_vector VECTOR(3072),
created BIGINT NOT NULL DEFAULT 0,
updated BIGINT NOT NULL DEFAULT 0
);

CREATE INDEX IF NOT EXISTS idx_embedding_job_status
ON embedding_job (status);

-- Optional: Create a partial index to optimize queries that filter by trashed
CREATE INDEX IF NOT EXISTS idx_collection_notes_not_trashed
ON collection_notes (collection_id, note_id)
Expand All @@ -159,4 +191,6 @@ DROP TABLE IF EXISTS session;
DROP TABLE IF EXISTS user_workspaces;
DROP TABLE IF EXISTS "user";
DROP TABLE IF EXISTS workspace;
DROP TABLE IF EXISTS embedding;
DROP TABLE IF EXISTS embedding_job;
-- +goose StatementEnd
23 changes: 23 additions & 0 deletions db/models.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions db/queries/embeddings.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
-- name: GetEmbeddingJobsByStatus :many
SELECT id,
note_id,
workspace_id,
content,
status,
attempts,
created,
updated
FROM embedding_job
WHERE status = $1;

-- name: InsertEmbeddingJobs :one
INSERT INTO embedding_job (note_id, workspace_id, content, created, updated)
VALUES ($1, $2, $3, $4, $5)
RETURNING id;

-- name: UpdateEmbeddingJobStatus :exec
UPDATE embedding_job
SET status = $2
WHERE id = $1;

-- name: InsertEmbeddings :one
INSERT INTO embedding (note_id, workspace_id, chunk, embedding_vector)
VALUES ($1, $2, $3, $4)
RETURNING id;
13 changes: 10 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ services:
volumes:
- pg_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U maek"]
test: [ "CMD-SHELL", "pg_isready -U maek" ]
interval: 5s
timeout: 5s
retries: 5
Expand All @@ -24,10 +24,17 @@ services:
- POSTGRES_USER=maek
- POSTGRES_PASSWORD=passwd
healthcheck:
test: ["CMD-SHELL", "pg_isready -U maek"]
test: [ "CMD-SHELL", "pg_isready -U maek" ]
interval: 5s
timeout: 5s
retries: 5

ollama:
image: ollama/ollama:0.5.12
ports:
- "11434:11434"
restart: unless-stopped

volumes:
pg_data:
pg_data:

10 changes: 9 additions & 1 deletion domains/notes/upsert.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package notes
import (
"context"
"errors"
"github.com/karngyan/maek/embedder"

"github.com/jackc/pgx/v5"

Expand Down Expand Up @@ -114,7 +115,7 @@ func UpsertNote(ctx context.Context, req *UpsertNoteRequest) (*Note, error) {

note.ID = id
// exists; time to do an update
return q.UpdateNote(ctx, db.UpdateNoteParams{
err = q.UpdateNote(ctx, db.UpdateNoteParams{
UUID: nuuid,
MdContent: req.MdContent,
Content: req.Content,
Expand All @@ -134,7 +135,14 @@ func UpsertNote(ctx context.Context, req *UpsertNoteRequest) (*Note, error) {
WorkspaceID: note.WorkspaceID,
Updated: note.Updated,
})

if err != nil {
return err
}

return embedder.AddEmbeddingJobs(ctx, q, note.ID, note.WorkspaceID, req.Content)
})

if err != nil {
return nil, err
}
Expand Down
Loading
Loading