diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 8ac9501..13660a1 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -52,3 +52,14 @@ The fourth character of the block is a `_` filler. `argv[i]` is the value of the rowid or id to match against for the point query. The remaining 3 characters of the block are `_` fillers. + +#### `VEC0_IDXSTR_KIND_METADATA_CONSTRAINT` (`'&'`) + +`argv[i]` is the value of the `WHERE` constraint for a metdata column in a KNN query. + +The second character of the block denotes which metadata column the constraint belongs to, using `A` to denote the first metadata column column, `B` for the second, etc. It is encoded with `'A' + metadata_idx` and can be decoded with `c - 'A'`. + +The third character of the block is the constraint operator. It will be one of `enum vec0_metadata_operator`, as only a subset of operators are supported on metadata column KNN filters. + +The foruth character of the block is a `_` filler. + diff --git a/TODO b/TODO index 9351d32..a46b9bc 100644 --- a/TODO +++ b/TODO @@ -18,6 +18,7 @@ - DELETE and UPDATE support - large strings - date/datetime -- `v in (...)` handling -- remaining TODO items -- dictionary encoding? +- later + - `v in (...)` handling + - remaining TODO items + - dictionary encoding? diff --git a/sqlite-vec.c b/sqlite-vec.c index 0a2f08d..f39bcc5 100644 --- a/sqlite-vec.c +++ b/sqlite-vec.c @@ -4074,7 +4074,12 @@ int vec0_result_metadata_value_for_rowid(vec0_vtab *p, i64 rowid, int metadata_i break; } case VEC0_METADATA_COLUMN_KIND_INT64: { - // TODO handle int64 values + i64 value; + rc = sqlite3_blob_read(blobValue, &value, sizeof(value), chunk_offset * sizeof(i64)); + if(rc != SQLITE_OK) { + goto done; + } + sqlite3_result_int64(context, value); break; } case VEC0_METADATA_COLUMN_KIND_FLOAT: { @@ -4087,7 +4092,13 @@ int vec0_result_metadata_value_for_rowid(vec0_vtab *p, i64 rowid, int metadata_i break; } case VEC0_METADATA_COLUMN_KIND_DOUBLE: { - // TODO handle double values + double value; + rc = sqlite3_blob_read(blobValue, &value, sizeof(value), chunk_offset * sizeof(double)); + if(rc != SQLITE_OK) { + goto done; + } + sqlite3_result_double(context, value); + break; break; } case VEC0_METADATA_COLUMN_KIND_TEXT: { @@ -5483,6 +5494,15 @@ static int vec0BestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo) { value = VEC0_METADATA_OPERATOR_NE; break; } + default: { + // IMP: V16511_00582 + rc = SQLITE_ERROR; + vtab_set_error(pVTab, + "An illegal WHERE constraint was provided on a vec0 metadata column in a KNN query. " + "Only one of EQUALS, GREATER_THAN, LESS_THAN_OR_EQUAL, LESS_THAN, GREATER_THAN_OR_EQUAL, NOT_EQUALS is allowed." + ); + goto done; + } } if(value) { diff --git a/tests/__snapshots__/test-metadata.ambr b/tests/__snapshots__/test-metadata.ambr new file mode 100644 index 0000000..428e40b --- /dev/null +++ b/tests/__snapshots__/test-metadata.ambr @@ -0,0 +1,330 @@ +# serializer version: 1 +# name: test_constructor_limit[max 16 metadata columns] + dict({ + 'error': 'OperationalError', + 'message': 'vec0 constructor error: More than 16 metadata columns were provided', + }) +# --- +# name: test_knn.1 + dict({ + 'error': 'OperationalError', + 'message': 'An illegal WHERE constraint was provided on a vec0 metadata column in a KNN query. Only one of EQUALS, GREATER_THAN, LESS_THAN_OR_EQUAL, LESS_THAN, GREATER_THAN_OR_EQUAL, NOT_EQUALS is allowed.', + }) +# --- +# name: test_knn[sqlite_master] + OrderedDict({ + 'sql': "select * from sqlite_master where type = 'table' order by name", + 'rows': list([ + OrderedDict({ + 'type': 'table', + 'name': 'sqlite_sequence', + 'tbl_name': 'sqlite_sequence', + 'rootpage': 3, + 'sql': 'CREATE TABLE sqlite_sequence(name,seq)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v', + 'tbl_name': 'v', + 'rootpage': 0, + 'sql': 'CREATE VIRTUAL TABLE v using vec0(vector float[1], name text, chunk_size=8)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_chunks', + 'tbl_name': 'v_chunks', + 'rootpage': 2, + 'sql': 'CREATE TABLE "v_chunks"(chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,size INTEGER NOT NULL,validity BLOB NOT NULL,rowids BLOB NOT NULL)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_metadata_chunks00', + 'tbl_name': 'v_metadata_chunks00', + 'rootpage': 7, + 'sql': 'CREATE TABLE "v_metadata_chunks00"(rowid PRIMARY KEY, data BLOB NOT NULL)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_rowids', + 'tbl_name': 'v_rowids', + 'rootpage': 4, + 'sql': 'CREATE TABLE "v_rowids"(rowid INTEGER PRIMARY KEY AUTOINCREMENT,id,chunk_id INTEGER,chunk_offset INTEGER)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_vector_chunks00', + 'tbl_name': 'v_vector_chunks00', + 'rootpage': 5, + 'sql': 'CREATE TABLE "v_vector_chunks00"(rowid PRIMARY KEY,vectors BLOB NOT NULL)', + }), + ]), + }) +# --- +# name: test_normal.1 + dict({ + 'v_chunks': OrderedDict({ + 'sql': 'select * from v_chunks', + 'rows': list([ + ]), + }), + 'v_metadata_chunks00': OrderedDict({ + 'sql': 'select * from v_metadata_chunks00', + 'rows': list([ + ]), + }), + 'v_metadata_chunks01': OrderedDict({ + 'sql': 'select * from v_metadata_chunks01', + 'rows': list([ + ]), + }), + 'v_metadata_chunks02': OrderedDict({ + 'sql': 'select * from v_metadata_chunks02', + 'rows': list([ + ]), + }), + 'v_metadata_chunks03': OrderedDict({ + 'sql': 'select * from v_metadata_chunks03', + 'rows': list([ + ]), + }), + 'v_metadata_chunks04': OrderedDict({ + 'sql': 'select * from v_metadata_chunks04', + 'rows': list([ + ]), + }), + 'v_rowids': OrderedDict({ + 'sql': 'select * from v_rowids', + 'rows': list([ + ]), + }), + 'v_vector_chunks00': OrderedDict({ + 'sql': 'select * from v_vector_chunks00', + 'rows': list([ + ]), + }), + }) +# --- +# name: test_normal.2 + OrderedDict({ + 'sql': 'insert into v(vector, n1, n2, f, d, t) values (?, ?, ?, ?, ?, ?)', + 'rows': list([ + ]), + }) +# --- +# name: test_normal.3 + OrderedDict({ + 'sql': 'insert into v(vector, n1, n2, f, d, t) values (?, ?, ?, ?, ?, ?)', + 'rows': list([ + ]), + }) +# --- +# name: test_normal.4 + OrderedDict({ + 'sql': 'insert into v(vector, n1, n2, f, d, t) values (?, ?, ?, ?, ?, ?)', + 'rows': list([ + ]), + }) +# --- +# name: test_normal.5 + OrderedDict({ + 'sql': 'select * from v', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vector': b'\x11\x11\x11\x11', + 'n1': 1, + 'n2': 1, + 'f': 1.100000023841858, + 'd': 1.1, + 't': 'one', + }), + OrderedDict({ + 'rowid': 2, + 'vector': b'""""', + 'n1': 2, + 'n2': 2, + 'f': 2.200000047683716, + 'd': 2.2, + 't': 'two', + }), + OrderedDict({ + 'rowid': 3, + 'vector': b'3333', + 'n1': 3, + 'n2': 3, + 'f': 3.299999952316284, + 'd': 3.3, + 't': 'three', + }), + ]), + }) +# --- +# name: test_normal.6 + dict({ + 'v_chunks': OrderedDict({ + 'sql': 'select * from v_chunks', + 'rows': list([ + OrderedDict({ + 'chunk_id': 1, + 'size': 8, + 'validity': b'\x07', + 'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadata_chunks00': OrderedDict({ + 'sql': 'select * from v_metadata_chunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadata_chunks01': OrderedDict({ + 'sql': 'select * from v_metadata_chunks01', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadata_chunks02': OrderedDict({ + 'sql': 'select * from v_metadata_chunks02', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\xcd\xcc\x8c?\xcd\xcc\x0c@33S@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadata_chunks03': OrderedDict({ + 'sql': 'select * from v_metadata_chunks03', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x9a\x99\x99\x99\x99\x99\xf1?\x9a\x99\x99\x99\x99\x99\x01@ffffff\n@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_metadata_chunks04': OrderedDict({ + 'sql': 'select * from v_metadata_chunks04', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'data': b'\x03\x00\x00\x00one?\x01\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00two@\x01\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00three\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + 'v_rowids': OrderedDict({ + 'sql': 'select * from v_rowids', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 0, + }), + OrderedDict({ + 'rowid': 2, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 1, + }), + OrderedDict({ + 'rowid': 3, + 'id': None, + 'chunk_id': 1, + 'chunk_offset': 2, + }), + ]), + }), + 'v_vector_chunks00': OrderedDict({ + 'sql': 'select * from v_vector_chunks00', + 'rows': list([ + OrderedDict({ + 'rowid': 1, + 'vectors': b'\x11\x11\x11\x11""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + }), + ]), + }), + }) +# --- +# name: test_normal[sqlite_master] + OrderedDict({ + 'sql': "select * from sqlite_master where type = 'table' order by name", + 'rows': list([ + OrderedDict({ + 'type': 'table', + 'name': 'sqlite_sequence', + 'tbl_name': 'sqlite_sequence', + 'rootpage': 3, + 'sql': 'CREATE TABLE sqlite_sequence(name,seq)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v', + 'tbl_name': 'v', + 'rootpage': 0, + 'sql': 'CREATE VIRTUAL TABLE v using vec0(vector float[1], n1 int, n2 int64, f float, d double, t text, chunk_size=8)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_chunks', + 'tbl_name': 'v_chunks', + 'rootpage': 2, + 'sql': 'CREATE TABLE "v_chunks"(chunk_id INTEGER PRIMARY KEY AUTOINCREMENT,size INTEGER NOT NULL,validity BLOB NOT NULL,rowids BLOB NOT NULL)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_metadata_chunks00', + 'tbl_name': 'v_metadata_chunks00', + 'rootpage': 7, + 'sql': 'CREATE TABLE "v_metadata_chunks00"(rowid PRIMARY KEY, data BLOB NOT NULL)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_metadata_chunks01', + 'tbl_name': 'v_metadata_chunks01', + 'rootpage': 9, + 'sql': 'CREATE TABLE "v_metadata_chunks01"(rowid PRIMARY KEY, data BLOB NOT NULL)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_metadata_chunks02', + 'tbl_name': 'v_metadata_chunks02', + 'rootpage': 11, + 'sql': 'CREATE TABLE "v_metadata_chunks02"(rowid PRIMARY KEY, data BLOB NOT NULL)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_metadata_chunks03', + 'tbl_name': 'v_metadata_chunks03', + 'rootpage': 13, + 'sql': 'CREATE TABLE "v_metadata_chunks03"(rowid PRIMARY KEY, data BLOB NOT NULL)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_metadata_chunks04', + 'tbl_name': 'v_metadata_chunks04', + 'rootpage': 15, + 'sql': 'CREATE TABLE "v_metadata_chunks04"(rowid PRIMARY KEY, data BLOB NOT NULL)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_rowids', + 'tbl_name': 'v_rowids', + 'rootpage': 4, + 'sql': 'CREATE TABLE "v_rowids"(rowid INTEGER PRIMARY KEY AUTOINCREMENT,id,chunk_id INTEGER,chunk_offset INTEGER)', + }), + OrderedDict({ + 'type': 'table', + 'name': 'v_vector_chunks00', + 'tbl_name': 'v_vector_chunks00', + 'rootpage': 5, + 'sql': 'CREATE TABLE "v_vector_chunks00"(rowid PRIMARY KEY,vectors BLOB NOT NULL)', + }), + ]), + }) +# --- diff --git a/tests/test-metadata.py b/tests/test-metadata.py new file mode 100644 index 0000000..a967473 --- /dev/null +++ b/tests/test-metadata.py @@ -0,0 +1,113 @@ +import sqlite3 +from collections import OrderedDict + + +def test_constructor_limit(db, snapshot): + pass + assert exec( + db, + f""" + create virtual table v using vec0( + {",".join([f"metadata{x} integer" for x in range(17)])} + v float[1] + ) + """, + ) == snapshot(name="max 16 metadata columns") + + +def test_normal(db, snapshot): + db.execute( + "create virtual table v using vec0(vector float[1], n1 int, n2 int64, f float, d double, t text, chunk_size=8)" + ) + assert exec( + db, "select * from sqlite_master where type = 'table' order by name" + ) == snapshot(name="sqlite_master") + + assert vec0_shadow_table_contents(db, "v") == snapshot() + + INSERT = "insert into v(vector, n1, n2, f, d, t) values (?, ?, ?, ?, ?, ?)" + assert exec(db, INSERT, [b"\x11\x11\x11\x11", 1, 1, 1.1, 1.1, "one"]) == snapshot() + assert exec(db, INSERT, [b"\x22\x22\x22\x22", 2, 2, 2.2, 2.2, "two"]) == snapshot() + assert ( + exec(db, INSERT, [b"\x33\x33\x33\x33", 3, 3, 3.3, 3.3, "three"]) == snapshot() + ) + + assert exec(db, "select * from v") == snapshot() + assert vec0_shadow_table_contents(db, "v") == snapshot() + + +# +# assert exec(db, "select * from v") == snapshot() +# assert vec0_shadow_table_contents(db, "v") == snapshot() +# +# db.execute("drop table v;") +# assert exec(db, "select * from sqlite_master order by name") == snapshot( +# name="sqlite_master post drop" +# ) + + +def test_types(db, snapshot): + pass + + +def test_updates(db, snapshot): + pass + + +def test_deletes(db, snapshot): + pass + + +def test_knn(db, snapshot): + db.execute( + "create virtual table v using vec0(vector float[1], name text, chunk_size=8)" + ) + assert exec( + db, "select * from sqlite_master where type = 'table' order by name" + ) == snapshot(name="sqlite_master") + db.executemany( + "insert into v(vector, name) values (?, ?)", + [("[1]", "alex"), ("[2]", "brian"), ("[3]", "craig")], + ) + + # EVIDENCE-OF: V16511_00582 catches "illegal" constraints on metadata columns + assert ( + exec( + db, + "select *, distance from v where vector match '[5]' and k = 3 and name like 'illegal'", + ) + == snapshot() + ) + + +def exec(db, sql, parameters=[]): + try: + rows = db.execute(sql, parameters).fetchall() + except (sqlite3.OperationalError, sqlite3.DatabaseError) as e: + return { + "error": e.__class__.__name__, + "message": str(e), + } + a = [] + for row in rows: + o = OrderedDict() + for k in row.keys(): + o[k] = row[k] + a.append(o) + result = OrderedDict() + result["sql"] = sql + result["rows"] = a + return result + + +def vec0_shadow_table_contents(db, v): + shadow_tables = [ + row[0] + for row in db.execute( + "select name from sqlite_master where name like ? order by 1", [f"{v}_%"] + ).fetchall() + ] + o = {} + for shadow_table in shadow_tables: + o[shadow_table] = exec(db, f"select * from {shadow_table}") + return o