From 7bbf4454fab079dc77e690a9d9db77c1f2f1cfd9 Mon Sep 17 00:00:00 2001 From: Wang Zuo <wzuoadjusted@gmail.com> Date: Wed, 19 Jul 2023 16:10:41 +0800 Subject: [PATCH 1/4] add vector type --- Makefile | 6 +- .../client/golang/templates/queryx/vector.go | 92 +++++++++++++++++++ .../queryx/vector_column.postgresql.go | 13 +++ .../golang/templates/queryx/vector_test.go | 39 ++++++++ inflect/golang.go | 23 ++--- internal/integration/client_test.go | 2 +- internal/integration/postgresql.hcl | 10 ++ .../integration/postgresql_client_test.go | 32 +++++++ schema/postgresql.go | 5 + 9 files changed, 207 insertions(+), 15 deletions(-) create mode 100644 generator/client/golang/templates/queryx/vector.go create mode 100644 generator/client/golang/templates/queryx/vector_column.postgresql.go create mode 100644 generator/client/golang/templates/queryx/vector_test.go create mode 100644 internal/integration/postgresql_client_test.go diff --git a/Makefile b/Makefile index 335e5c38..916b6275 100644 --- a/Makefile +++ b/Makefile @@ -16,9 +16,9 @@ clean: test-postgresql: install rm -rf internal/integration/db - cd internal/integration && QUERYX_ENV=test queryx db:drop --schema postgresql.hcl - cd internal/integration && QUERYX_ENV=test queryx db:create --schema postgresql.hcl - cd internal/integration && QUERYX_ENV=test queryx db:migrate --schema postgresql.hcl + # cd internal/integration && QUERYX_ENV=test queryx db:drop --schema postgresql.hcl + # cd internal/integration && QUERYX_ENV=test queryx db:create --schema postgresql.hcl + # cd internal/integration && QUERYX_ENV=test queryx db:migrate --schema postgresql.hcl cd internal/integration && QUERYX_ENV=test queryx generate --schema postgresql.hcl cd internal/integration && go test ./... # cd internal/integration && QUERYX_ENV=test queryx db:drop --schema postgresql.hcl diff --git a/generator/client/golang/templates/queryx/vector.go b/generator/client/golang/templates/queryx/vector.go new file mode 100644 index 00000000..e3596f2a --- /dev/null +++ b/generator/client/golang/templates/queryx/vector.go @@ -0,0 +1,92 @@ +package queryx + +import ( + "database/sql/driver" + "encoding/json" + "fmt" + "strconv" + "strings" +) + +type Vector struct { + Val []float32 + Null bool + Set bool +} + +func NewVector(v []float32) Vector { + return Vector{Val: v, Set: true} +} + +func NewNullableVector(v *[]float32) Vector { + if v != nil { + return Vector{Val: *v, Set: true} + } + return Vector{Null: true, Set: true} +} + +func (v Vector) String() string { + var buf strings.Builder + buf.WriteString("[") + + for i := 0; i < len(v.Val); i++ { + if i > 0 { + buf.WriteString(",") + } + buf.WriteString(strconv.FormatFloat(float64(v.Val[i]), 'f', -1, 32)) + } + + buf.WriteString("]") + return buf.String() +} + +func (v *Vector) Parse(s string) error { + v.Val = make([]float32, 0) + sp := strings.Split(s[1:len(s)-1], ",") + for i := 0; i < len(sp); i++ { + n, err := strconv.ParseFloat(sp[i], 32) + if err != nil { + return err + } + v.Val = append(v.Val, float32(n)) + } + return nil +} + +// Scan implements the Scanner interface. +func (v *Vector) Scan(src interface{}) (err error) { + switch src := src.(type) { + case []byte: + return v.Parse(string(src)) + case string: + return v.Parse(src) + default: + return fmt.Errorf("unsupported data type: %T", src) + } +} + +// Value implements the driver Valuer interface. +func (v Vector) Value() (driver.Value, error) { + return v.String(), nil +} + +// MarshalJSON implements the json.Marshaler interface. +func (v Vector) MarshalJSON() ([]byte, error) { + if v.Null { + return json.Marshal(nil) + } + return json.Marshal(v.Val) +} + +// UnmarshalJSON implements the json.Unmarshaler interface. +func (v *Vector) UnmarshalJSON(data []byte) error { + v.Set = true + if string(data) == "null" { + v.Null = true + return nil + } + if err := json.Unmarshal(data, &v.Val); err != nil { + return err + } + return nil +} diff --git a/generator/client/golang/templates/queryx/vector_column.postgresql.go b/generator/client/golang/templates/queryx/vector_column.postgresql.go new file mode 100644 index 00000000..a3e10435 --- /dev/null +++ b/generator/client/golang/templates/queryx/vector_column.postgresql.go @@ -0,0 +1,13 @@ +package queryx + +type VectorColumn struct { + Name string + Table *Table +} + +func (t *Table) NewVectorColumn(name string) *VectorColumn { + return &VectorColumn{ + Table: t, + Name: name, + } +} diff --git a/generator/client/golang/templates/queryx/vector_test.go b/generator/client/golang/templates/queryx/vector_test.go new file mode 100644 index 00000000..415207c3 --- /dev/null +++ b/generator/client/golang/templates/queryx/vector_test.go @@ -0,0 +1,39 @@ +package queryx + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestNewVector(t *testing.T) { + v1 := NewVector([]float32{1, 2, 3}) + require.Equal(t, []float32{1, 2, 3}, v1.Val) + require.Equal(t, false, v1.Null) + + v2 := NewNullableVector(nil) + require.Equal(t, true, v2.Null) +} + +func TestVectorJSON(t *testing.T) { + type Foo struct { + X Vector `json:"x"` + Y Vector `json:"y"` + } + x := NewVector([]float32{1, 2, 3}) + y := NewNullableVector(nil) + s := `{"x":[1,2,3],"y":null}` + + f1 := Foo{X: x, Y: y} + b, err := json.Marshal(f1) + require.NoError(t, err) + require.Equal(t, s, string(b)) + + var f2 Foo + err = json.Unmarshal([]byte(s), &f2) + require.NoError(t, err) + require.Equal(t, x, f2.X) + require.Equal(t, y, f2.Y) + +} diff --git a/inflect/golang.go b/inflect/golang.go index a91374df..a13339de 100644 --- a/inflect/golang.go +++ b/inflect/golang.go @@ -1,9 +1,5 @@ package inflect -import ( - "log" -) - // avoid go keyword with syntax error func goKeywordFix(s string) string { switch s { @@ -39,9 +35,10 @@ func goModelType(t string, null bool) string { return "queryx.Float" case "json", "jsonb": return "queryx.JSON" + case "vector(3)": + return "queryx.Vector" default: - log.Fatal("not found", t) - return "" + return t } } else { switch t { @@ -65,9 +62,10 @@ func goModelType(t string, null bool) string { return "float" case "json", "jsonb": return "queryx.JSON" + case "vector(3)": + return "queryx.Vector" default: - log.Fatal("not found", t) - return "" + return t } } } @@ -95,8 +93,10 @@ func goType(t string) string { return "Float" case "json", "jsonb": return "JSON" + case "vector(3)": + return "Vector" default: - return "" // TODO: raise error + return t } } @@ -125,8 +125,9 @@ func goChangeSetType(t string) string { return "float64" case "json", "jsonb": return "map[string]interface{}" + case "vector(3)": + return "[]float32" default: - log.Fatal("unknown type in goChangeSetType", t) // TODO: error handling - return "" + return t } } diff --git a/internal/integration/client_test.go b/internal/integration/client_test.go index c3f1c3c3..de29d20e 100644 --- a/internal/integration/client_test.go +++ b/internal/integration/client_test.go @@ -1,4 +1,4 @@ -package main +package integration import ( "database/sql" diff --git a/internal/integration/postgresql.hcl b/internal/integration/postgresql.hcl index 9a32eaa4..b3b67fc6 100644 --- a/internal/integration/postgresql.hcl +++ b/internal/integration/postgresql.hcl @@ -139,4 +139,14 @@ database "db" { columns = ["id"] } } + + model "Item" { + column "embedding" { + type = "vector(3)" + } + // column "embedding" { + // type = vector + // dimension = 3 + // } + } } diff --git a/internal/integration/postgresql_client_test.go b/internal/integration/postgresql_client_test.go new file mode 100644 index 00000000..9d8ebe0e --- /dev/null +++ b/internal/integration/postgresql_client_test.go @@ -0,0 +1,32 @@ +package integration + +import ( + "testing" + + "github.com/stretchr/testify/require" + "github.com/swiftcarrot/queryx/internal/integration/db/queryx" +) + +func TestVector(t *testing.T) { + _, err := c.QueryItem().DeleteAll() + require.NoError(t, err) + + item1, err := c.QueryItem().Create(c.ChangeItem().SetEmbedding([]float32{1, 2, 3})) + require.NoError(t, err) + require.Equal(t, item1.Embedding.Val, []float32{1, 2, 3}) + + item2, err := c.QueryItem().Create(c.ChangeItem().SetEmbedding([]float32{4, 5, 6})) + require.NoError(t, err) + require.Equal(t, item2.Embedding.Val, []float32{4, 5, 6}) + + type Foo struct { + embedding queryx.Vector `db:"embedding"` + } + var rows []Foo + err = c.Query("SELECT embedding FROM items ORDER BY embedding <-> '[3,1,2]'").Scan(&rows) + require.NoError(t, err) + require.Equal(t, []Foo{ + {queryx.NewVector([]float32{1, 2, 3})}, + {queryx.NewVector([]float32{4, 5, 6})}, + }, rows) +} diff --git a/schema/postgresql.go b/schema/postgresql.go index 44756bf8..c122d005 100644 --- a/schema/postgresql.go +++ b/schema/postgresql.go @@ -2,6 +2,7 @@ package schema import ( "fmt" + "log" "strconv" "strings" @@ -20,6 +21,8 @@ func (d *Database) CreatePostgreSQLSchema(dbName string) *schema.Schema { for _, c := range model.Columns { col := schema.NewColumn(c.Name) + log.Println(c.Type) + switch c.Type { case "bigint": if c.AutoIncrement { @@ -101,6 +104,8 @@ func (d *Database) CreatePostgreSQLSchema(dbName string) *schema.Schema { col.SetType(&postgres.UUIDType{T: postgres.TypeUUID}).SetDefault(&schema.RawExpr{X: d}) } } + default: + col.SetType(&postgres.UserDefinedType{T: c.Type}) } col.SetNull(c.Null) From ddc2643fe2ff543edb7227f696378c2d0a24fceb Mon Sep 17 00:00:00 2001 From: Wang Zuo <wzuoadjusted@gmail.com> Date: Wed, 19 Jul 2023 16:20:32 +0800 Subject: [PATCH 2/4] vector type with dimension in column --- Makefile | 6 +++--- inflect/golang.go | 8 ++++---- internal/integration/postgresql.hcl | 7 ++----- schema/hcl.go | 4 ++++ schema/model.go | 2 ++ schema/postgresql.go | 2 ++ 6 files changed, 17 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 916b6275..335e5c38 100644 --- a/Makefile +++ b/Makefile @@ -16,9 +16,9 @@ clean: test-postgresql: install rm -rf internal/integration/db - # cd internal/integration && QUERYX_ENV=test queryx db:drop --schema postgresql.hcl - # cd internal/integration && QUERYX_ENV=test queryx db:create --schema postgresql.hcl - # cd internal/integration && QUERYX_ENV=test queryx db:migrate --schema postgresql.hcl + cd internal/integration && QUERYX_ENV=test queryx db:drop --schema postgresql.hcl + cd internal/integration && QUERYX_ENV=test queryx db:create --schema postgresql.hcl + cd internal/integration && QUERYX_ENV=test queryx db:migrate --schema postgresql.hcl cd internal/integration && QUERYX_ENV=test queryx generate --schema postgresql.hcl cd internal/integration && go test ./... # cd internal/integration && QUERYX_ENV=test queryx db:drop --schema postgresql.hcl diff --git a/inflect/golang.go b/inflect/golang.go index a13339de..d26005ab 100644 --- a/inflect/golang.go +++ b/inflect/golang.go @@ -35,7 +35,7 @@ func goModelType(t string, null bool) string { return "queryx.Float" case "json", "jsonb": return "queryx.JSON" - case "vector(3)": + case "vector": return "queryx.Vector" default: return t @@ -62,7 +62,7 @@ func goModelType(t string, null bool) string { return "float" case "json", "jsonb": return "queryx.JSON" - case "vector(3)": + case "vector": return "queryx.Vector" default: return t @@ -93,7 +93,7 @@ func goType(t string) string { return "Float" case "json", "jsonb": return "JSON" - case "vector(3)": + case "vector": return "Vector" default: return t @@ -125,7 +125,7 @@ func goChangeSetType(t string) string { return "float64" case "json", "jsonb": return "map[string]interface{}" - case "vector(3)": + case "vector": return "[]float32" default: return t diff --git a/internal/integration/postgresql.hcl b/internal/integration/postgresql.hcl index b3b67fc6..93b1b2fc 100644 --- a/internal/integration/postgresql.hcl +++ b/internal/integration/postgresql.hcl @@ -142,11 +142,8 @@ database "db" { model "Item" { column "embedding" { - type = "vector(3)" + type = vector + dimension = 1536 } - // column "embedding" { - // type = vector - // dimension = 3 - // } } } diff --git a/schema/hcl.go b/schema/hcl.go index e90d1744..8e0bf704 100644 --- a/schema/hcl.go +++ b/schema/hcl.go @@ -67,6 +67,7 @@ var hclColumn = &hcl.BodySchema{ {Name: "null"}, {Name: "default"}, {Name: "unique"}, + {Name: "dimension"}, }, Blocks: []hcl.BlockHeaderSchema{}, } @@ -476,6 +477,8 @@ func columnFromBlock(block *hcl.Block, ctx *hcl.EvalContext) (*Column, error) { column.Array = valueAsBool(value) case "null": column.Null = valueAsBool(value) + case "dimension": + column.Dimension = valueAsInt(value) } } for name, attr := range content.Attributes { @@ -560,6 +563,7 @@ func Parse(body hcl.Body) (*Schema, error) { "json": cty.StringVal("json"), "jsonb": cty.StringVal("jsonb"), "uuid": cty.StringVal("uuid"), + "vector": cty.StringVal("vector"), }, Functions: map[string]function.Function{ "env": env, diff --git a/schema/model.go b/schema/model.go index dac50a08..e7cf3196 100644 --- a/schema/model.go +++ b/schema/model.go @@ -51,6 +51,8 @@ type Column struct { // sql auto_increment AutoIncrement bool Default interface{} // TODO: support default + // vector dimension + Dimension int } type Type struct { diff --git a/schema/postgresql.go b/schema/postgresql.go index c122d005..7bf58cdc 100644 --- a/schema/postgresql.go +++ b/schema/postgresql.go @@ -104,6 +104,8 @@ func (d *Database) CreatePostgreSQLSchema(dbName string) *schema.Schema { col.SetType(&postgres.UUIDType{T: postgres.TypeUUID}).SetDefault(&schema.RawExpr{X: d}) } } + case "vector": + col.SetType(&postgres.UserDefinedType{T: fmt.Sprintf("vector(%d)", c.Dimension)}) default: col.SetType(&postgres.UserDefinedType{T: c.Type}) } From 39b13a61ba7df9cce3ecada70b012f96fcdaa85a Mon Sep 17 00:00:00 2001 From: Wang Zuo <wzuoadjusted@gmail.com> Date: Thu, 20 Jul 2023 11:08:22 +0800 Subject: [PATCH 3/4] postgres service in test with pgvector image --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 575f0ed2..39d55ce3 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -5,7 +5,7 @@ jobs: runs-on: ubuntu-latest services: postgres: - image: postgres:14.2 + image: ankane/pgvector:0.4.4 env: POSTGRES_PASSWORD: postgres POSTGRES_USER: postgres From 45d2c8a87bbc4d3e640b570697d6afbd741ec26f Mon Sep 17 00:00:00 2001 From: Wang Zuo <wzuoadjusted@gmail.com> Date: Thu, 20 Jul 2023 11:09:46 +0800 Subject: [PATCH 4/4] fix image tag --- .github/workflows/integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 39d55ce3..5d2bd707 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -5,7 +5,7 @@ jobs: runs-on: ubuntu-latest services: postgres: - image: ankane/pgvector:0.4.4 + image: ankane/pgvector:v0.4.4 env: POSTGRES_PASSWORD: postgres POSTGRES_USER: postgres