diff --git a/astro.config.mjs b/astro.config.mjs index cde86425..f17cb1f9 100644 --- a/astro.config.mjs +++ b/astro.config.mjs @@ -65,6 +65,7 @@ export default defineConfig({ { label: 'Create your first graph', link: '/get-started' }, { label: 'Query & visualize your graph', link: '/get-started/cypher-intro' }, { label: 'Run prepared Cypher statements', link: '/get-started/prepared-statements' }, + { label: 'Scan data from various sources', link: '/get-started/scan'}, { label: 'Run graph algorithms', link: '/get-started/graph-algorithms' }, ] }, @@ -148,6 +149,7 @@ export default defineConfig({ { label: 'Go', link: '/client-apis/go' }, { label: 'C++', link: '/client-apis/cpp' }, { label: 'C', link: '/client-apis/c' }, + { label: 'WebAssembly', link: '/client-apis/wasm' }, { label: '.NET', link: '/client-apis/net', badge: { text: 'Community', variant: 'caution'}}, { label: 'Elixir', link: '/client-apis/elixir', badge: { text: 'Community', variant: 'caution'}} ], @@ -197,8 +199,9 @@ export default defineConfig({ ] }, { label: 'JSON', link: '/extensions/json' }, - { label: 'Iceberg', link: '/extensions/iceberg', badge: { text: 'New' }}, - { label: 'Delta Lake', link: '/extensions/delta', badge: { text: 'New' }}, + { label: 'Iceberg', link: '/extensions/iceberg' }, + { label: 'Delta Lake', link: '/extensions/delta' }, + { label: 'Full-text search', link: '/extensions/full-text-search', badge: { text: 'New' }}, ], autogenerate: { directory: 'reference' }, }, diff --git a/src/content/docs/client-apis/c.mdx b/src/content/docs/client-apis/c.mdx index fb4167c6..892dd0f8 100644 --- a/src/content/docs/client-apis/c.mdx +++ b/src/content/docs/client-apis/c.mdx @@ -73,4 +73,62 @@ And then link against `/libkuzu.so` (or `libkuzu.dylib`/`libkuzu.l The static library is more complicated (as noted above, it's recommended that you use CMake to handle the details) and is not installed by default, but all static libraries will be available in the build directory. -You need to define `KUZU_STATIC_DEFINE`, and link against the static kuzu library in `build/src`, as well as `antlr4_cypher`, `antlr4_runtime`, `brotlidec`, `brotlicommon`, `utf8proc`, `re2`, `serd`, `fastpfor`, `miniparquet`, `zstd`, `miniz`, `mbedtls`, `lz4` (all of which can be found in the third_party subdirectory of the CMake build directory. E.g. `build/third_party/zstd/libzstd.a`) and whichever standard library you're using. \ No newline at end of file +You need to define `KUZU_STATIC_DEFINE`, and link against the static Kùzu library in `build/src`, as well as `antlr4_cypher`, `antlr4_runtime`, `brotlidec`, `brotlicommon`, `utf8proc`, `re2`, `serd`, `fastpfor`, `miniparquet`, `zstd`, `miniz`, `mbedtls`, `lz4` (all of which can be found in the third_party subdirectory of the CMake build directory. E.g. `build/third_party/zstd/libzstd.a`) and whichever standard library you're using. + +## Handling Kùzu output using `kuzu_query_result_get_next()` + +For the examples in this section we will be using the following schema: +```cypher +CREATE NODE TABLE person(id INT64 PRIMARY KEY); +``` + +The `kuzu_query_result_get_next()` function returns a reference to the resulting flat tuple. Additionally, to reduce resource allocation all calls to `kuzu_query_result_get_next()` reuse the same +flat tuple object. This means that for a query result, each call to `kuzu_query_result_get_next()` actually overwrites the flat tuple previously returned by the previous call. + +Thus, we recommend processing each tuple immediately before making the next call to `getNext`: + +```c +kuzu_query_result result; +kuzu_connection_query(conn, "MATCH (p:person) RETURN p.*", result); +while (kuzu_query_result_has_next(result)) { + kuzu_flat_tuple tuple; + kuzu_query_result_get_next(result, tuple); + do_something(tuple); +} +``` + +If you wish to process the tuples later, you must explicitly make a copy of each tuple: +```cpp +static kuzu_value* copy_flat_tuple(kuzu_flat_tuple* tuple, uint32_t tupleLen) { + kuzu_value* ret = malloc(sizeof(kuzu_value) * tupleLen); + for (uint32_t i = 0; i < tupleLen; i++) { + kuzu_flat_tuple_get_value(tuple, i, &ret[i]); + } + return ret; +} + +void mainFunction() { + kuzu_query_result result; + kuzu_connection_query(conn, "MATCH (p:person) RETURN p.*", &result); + + uint64_t num_tuples = kuzu_query_result_get_num_tuples(&result); + kuzu_value** tuples = (kuzu_value**)malloc(sizeof(kuzu_value*) * num_tuples); + for (uint64_t i = 0; i < num_tuples; ++i) { + kuzu_flat_tuple tuple; + kuzu_query_result_get_next(&result, &tuple); + tuples[i] = copy_flat_tuple(&tuple, kuzu_query_result_get_num_columns(&result)); + kuzu_flat_tuple_destroy(&tuple); + } + + for (uint64_t i = 0; i < num_tuples; ++i) { + for (uint64_t j = 0; j < kuzu_query_result_get_num_columns(&result); ++j) { + doSomething(tuples[i][j]); + kuzu_value_destroy(&tuples[i][j]); + } + free(tuples[i]); + } + + free((void*)tuples); + kuzu_query_result_destroy(&result); +} +``` diff --git a/src/content/docs/client-apis/cpp.mdx b/src/content/docs/client-apis/cpp.mdx index 0a60502f..4f65ff47 100644 --- a/src/content/docs/client-apis/cpp.mdx +++ b/src/content/docs/client-apis/cpp.mdx @@ -11,10 +11,71 @@ See the following link for the full documentation of the C++ API. href="https://kuzudb.com/api-docs/cpp/annotated.html" /> +## Handling Kùzu output using `getNext()` + +For the examples in this section we will be using the following schema: +```cypher +CREATE NODE TABLE person(id INT64 PRIMARY KEY); +``` + +The `getNext()` function in a `QueryResult` returns a reference to the resulting `FlatTuple`. Additionally, to reduce resource allocation all calls to `getNext()` reuse the same +FlatTuple object. This means that for a `QueryResult`, each call to `getNext()` actually overwrites the `FlatTuple` previously returned by the previous call to `getNext()`. + +Thus, we don't recommend using `QueryResult` like this: + +```cpp +std::unique_ptr result = conn.query("MATCH (p:person) RETURN p.*"); +std::vector> tuples; +while (result->hasNext()) { + // Each call to getNext() actually returns a pointer to the same tuple object + tuples.emplace_back(result->getNext()); +} + +// This is wrong! +// The vector stores a bunch of pointers to the same underlying tuple object +for (const auto& resultTuple: tuples) { + doSomething(resultTuple); +} +``` + +Instead, we recommend processing each tuple immediately before making the next call to `getNext`: +```cpp +std::unique_ptr result = conn.query("MATCH (p:person) RETURN p.*"); +std::vector> tuples; +while (result->hasNext()) { + auto tuple = result->getNext(); + doSomething(tuple); +} +``` + +If wish to process the tuples later, you must explicitly make a copy of each tuple: +```cpp +static decltype(auto) copyFlatTuple(kuzu::processor::FlatTuple* tuple) { + std::vector> ret; + for (uint32_t i = 0; i < tuple->len(); i++) { + ret.emplace_back(tuple->getValue(i)->copy()); + } + return ret; +} + +void mainFunction() { + std::unique_ptr result = conn->query("MATCH (p:person) RETURN p.*"); + std::vector>> tuples; + while (result->hasNext()) { + auto tuple = result->getNext(); + tuples.emplace_back(copyFlatTuple(tuple.get())); + } + for (const auto& tuple : tuples) { + doSomething(tuple); + } +} +``` + +## UDF API + In addition to interfacing with the database, the C++ API offers users the ability to define custom functions via User Defined Functions (UDFs), described below. -## UDF API Kùzu provides two interfaces that enable you to define your own custom scalar and vectorized functions. ### Scalar functions @@ -211,7 +272,7 @@ conn->createVectorizedFunction("addFour", &addFour); conn->query("MATCH (p:person) return addFour(p.age)"); ``` -#### Option 2. Vectorized function with input and return type in Cypher +#### Option 2. Vectorized function with input and return type in Cypher Create a vectorized function with input and return type in Cypher. ```cpp @@ -263,4 +324,4 @@ conn->query("MATCH (p:person) return addDate(p.birthdate, p.age)"); ## Linking -See the [C API Documentation](/client-apis/c#linking) for details as linking to the C++ API is more or less identical. \ No newline at end of file +See the [C API Documentation](/client-apis/c#linking) for details as linking to the C++ API is more or less identical. diff --git a/src/content/docs/client-apis/java.mdx b/src/content/docs/client-apis/java.mdx index a8dd9011..ac4c120f 100644 --- a/src/content/docs/client-apis/java.mdx +++ b/src/content/docs/client-apis/java.mdx @@ -10,3 +10,63 @@ See the following link for the full documentation of the Java API. title="Java API documentation" href="https://kuzudb.com/api-docs/java" /> + +## Handling Kùzu output using `getNext()` + +For the examples in this section we will be using the following schema: +```cypher +CREATE NODE TABLE person(id INT64 PRIMARY KEY); +``` + +The `getNext()` function in a `QueryResult` returns a reference to the resulting `FlatTuple`. Additionally, to reduce resource allocation all calls to `getNext()` reuse the same +FlatTuple object. This means that for a `QueryResult`, each call to `getNext()` actually overwrites the `FlatTuple` previously returned by the previous call to `getNext()`. + +Thus, we don't recommend using `QueryResult` like this: + +```java +QueryResult result = conn.query("MATCH (p:person) RETURN p.*"); +List tuples = new ArrayList(); +while (result.hasNext()) { + // Each call to getNext() actually returns a reference to the same tuple object + tuples.add(result.getNext()); +} + +// This is wrong! +// The list stores a bunch of references to the same underlying tuple object +for (FlatTuple resultTuple: tuples) { + doSomething(resultTuple); +} +``` + +Instead, we recommend processing each tuple immediately before making the next call to `getNext`: +```java +QueryResult result = conn.query("MATCH (p:person) RETURN p.*"); +while (result.hasNext()) { + FlatTuple tuple = result.getNext(); + doSomething(tuple); +} +``` + +If wish to process the tuples later, you must explicitly make a copy of each tuple: +```java +List copyFlatTuple(FlatTuple tuple, long tupleLen) throws ObjectRefDestroyedException { + List ret = new ArrayList(); + for (int i = 0; i < tupleLen; i++) { + ret.add(tuple.getValue(i).clone()); + } + return ret; +} + +void mainFunction() throws ObjectRefDestroyedException { + QueryResult result = conn.query("MATCH (p:person) RETURN p.*"); + List> tuples = new ArrayList>(); + while (result.hasNext()) { + FlatTuple tuple = result.getNext(); + tuples.add(copyFlatTuple(tuple, result.getNumColumns())); + } + + for (List tuple: tuples) { + doSomething(tuple); + } +} +``` diff --git a/src/content/docs/client-apis/wasm.mdx b/src/content/docs/client-apis/wasm.mdx new file mode 100644 index 00000000..990ca666 --- /dev/null +++ b/src/content/docs/client-apis/wasm.mdx @@ -0,0 +1,84 @@ +--- +title: WebAssembly (Wasm) +--- + +[WebAssembly](https://webassembly.org/), a.k.a. _Wasm_, is a standard defining any suitable low-level +programming language as compilation target, enabling deployment of software within web browsers on a variety +of devices. This page describes Kùzu's Wasm API, enabling Kùzu databases to run inside Wasm-capable +browsers. + +## Benefits of WASM + +Several benefits of Kùzu-Wasm are the following: + +- Fast, in-browser graph analysis without ever sending data to a server. +- Strong data privacy guarantees, as the data never leaves the browser. +- Real-time interactive in-browser graph analytics and visualization. + +## Installation + +```bash +npm i kuzu-wasm +``` + +## Example usage + +We provide a simple example to demonstrate how to use Kùzu-Wasm. In this example, we will create a simple graph and run a few simple queries. + +We provide three versions of this example: +- `browser_in_memory`: This example demonstrates how to use Kùzu-Wasm in a web browser with an in-memory filesystem. +- `browser_persistent`: This example demonstrates how to use Kùzu-Wasm in a web browser with a persistent IDBFS filesystem. +- `nodejs`: This example demonstrates how to use Kùzu-Wasm in Node.js. + +The example can be found in [the examples directory](https://github.com/kuzudb/kuzu/tree/master/tools/wasm/examples). + +## Understanding the package + +In this package, three different variants of WebAssembly modules are provided: +- **Default**: This is the default build of the WebAssembly module. It does not support multi-threading and uses Emscripten's default filesystem. This build has the smallest size and works in both Node.js and browser environments. It has the best compatibility and does not require cross-origin isolation. However, the performance may be limited due to the lack of multithreading support. This build is located at the root level of the package. +- **Multi-threaded**: This build supports multi-threading and uses Emscripten's default filesystem. This build has a larger size compared to the default build and only requires [cross-origin isolation](https://web.dev/articles/cross-origin-isolation-guide) in the browser environment. This build is located in the `multithreaded` directory. +- **Node.js**: This build is optimized for Node.js and uses Node.js's filesystem instead of Emscripten's default filesystem (`NODEFS` flag is enabled). This build also supports multi-threading. It is distributed as a CommonJS module rather than an ES module to maximize compatibility. This build is located in the `nodejs` directory. Note that this build only works in Node.js and does not work in the browser environment. + +In each variant, there are two different versions of the WebAssembly module: +- **Async**: This version of the module is the default version and each function call returns a Promise. This version dispatches all the function calls to the WebAssembly module to a Web Worker or Node.js worker thread to prevent blocking the main thread. However, this version may have a slight overhead due to the serialization and deserialization of the data required by the worker threads. This version is located at the root level of each variant (e.g., `kuzu-wasm`, `kuzu-wasm/multithreaded`, `kuzu-wasm/nodejs`). +- **Sync**: This version of the module is synchronous and does not require any callbacks (other than the module initialization). This version is good for scripting / CLI / prototyping purposes but is not recommended to be used in GUI applications or web servers because it may block the main thread and cause unexpected freezes. This alternative version is located in the `sync` directory of each variant (e.g., `kuzu-wasm/sync`, `kuzu-wasm/multithreaded/sync`, `kuzu-wasm/nodejs/sync`). + +Note that you cannot mix and match the variants and versions. For example, a `Database` object created with the default variant cannot be passed to a function in the multithreaded variant. Similarly, a `Database` object created with the async version cannot be passed to a function in the sync version. + +### Loading the Worker script (for async versions) +In each variant, the main module is bundled as one script file. However, the worker script is located in a separate file. The worker script is required to run the WebAssembly module in a Web Worker or Node.js worker thread. If you are using a build tool like Webpack, the worker script needs to be copied to the output directory. For example, in Webpack, you can use the `copy-webpack-plugin` to copy the worker script to the output directory. + +By default, the worker script is resolved under the same directory / URL prefix as the main module. If you want to change the location of the worker script, you can use pass the optional worker path parameter to the `setWorkerPath` function. For example: +```javascript +import kuzu from "kuzu-wasm"; +kuzu.setWorkerPath('path/to/worker.js'); +``` + +Note that this function must be called before any other function calls to the WebAssembly module. After the initialization is started, the worker script path cannot be changed and not finding the worker script will cause an error. + +For the Node.js variant, the worker script can be resolved automatically and you do not need to set the worker path. + +## API documentation +The API documentation can be found here: + +**Synchronous** version: [API documentation](https://kuzudb.com/api-docs/wasm/sync/) + +**Asynchronous** version: [API documentation](https://kuzudb.com/api-docs/wasm/async/) + +## Local development + +This section is relevant if you are interested in contributing to Kùzu's Wasm API. + +First, build the WebAssembly module: + +```bash +npm run build +``` + +This will build the WebAssembly module in the `release` directory and create a tarball ready for publishing under the current directory. + +You can run the tests as follows: + +```bash +npm test +``` diff --git a/src/content/docs/cypher/configuration.md b/src/content/docs/cypher/configuration.md index a970cb53..e52a7964 100644 --- a/src/content/docs/cypher/configuration.md +++ b/src/content/docs/cypher/configuration.md @@ -17,7 +17,6 @@ configuration **cannot** be used with other query clauses, such as `RETURN`. | `HOME_DIRECTORY`| system home directory | user home directory | | `FILE_SEARCH_PATH`| file search path | N/A | | `PROGRESS_BAR` | enable progress bar in CLI | false | -| `PROGRESS_BAR_TIME` | show progress bar after time in ms | 1000 | | `CHECKPOINT_THRESHOLD` | the WAL size threshold in bytes at which to automatically trigger a checkpoint | 16777216 (16MB) | | `WARNING_LIMIT` | maximum number of [warnings](/import#warnings-table-inspect-skipped-rows) that can be stored in a single connection. | 8192 | | `SPILL_TO_DISK` | spill data disk if there is not enough memory when running `COPY FROM (cannot be set to TRUE under in-memory or read-only mode) | true | diff --git a/src/content/docs/cypher/data-definition/create-table.md b/src/content/docs/cypher/data-definition/create-table.md index 4eaa533f..7607903e 100644 --- a/src/content/docs/cypher/data-definition/create-table.md +++ b/src/content/docs/cypher/data-definition/create-table.md @@ -75,32 +75,60 @@ Once you create node tables, you can define relationships between them using the The following statement adds to the catalog a `Follows` relationship table between `User` and `User` with one `date` property on the relationship. ```sql -CREATE REL TABLE Follows(FROM User TO User, since DATE) +CREATE REL TABLE Follows(FROM User TO User, since DATE); +``` + +Defining a rel table with multiple node table pairs is also possible. The following statement adds a `Knows` relationship table between two node table pairs: +1. `User` and `User` +2. `User` and `City` + +```sql +CREATE REL TABLE Knows(FROM User TO User, FROM User TO City); ``` :::caution[Notes] -- **Syntax**: There is no comma between the `FROM` and `TO` clauses. +- **Syntax**: There is no comma between the `FROM` and `TO` clauses, however a comma is needed between two node table pairs. - **Directionality**: Each relationship has a direction following the property graph model. So when `Follows` relationship records are added, each one has a specific source (FROM) node and a specific destination (TO) node. - **Primary keys**: You cannot define a primary key for relationship records. Each relationship gets a unique system-level edge ID, which are internally generated. You can check if two edges are the same, i.e., have the same edge ID, using the `=` and `!=` operator between the `ID()` function on two variables that bind to relationships. For example, you can query `MATCH (n1:User)-[r1:Follows]->(n2:User)<-[r2:Follows]-(n3:User) WHERE ID(r1) != ID(r2) RETURN *` to ensure that the same relationship does not bind to both r1 and r2. -- **Pairing**: A relationship can only be defined as being from one node table/label to one node table/label. - ::: +::: -### Relationship Multiplicities +### Bulk insert to relationship table with multiple from-to pairs + +Internally, a relationship table with multiple from-to pairs creates a child table per from-to pair. In the example above, the following two children +tables are created internally +``` +Knows_User_User +Knows_User_City +``` + +When bulk insert into a relationship table with multiple from-to pairs, user need to specify which child table to insert through `from, to` options. For exmaple, the following two statements will bulk insert into `Knows` relationship table. +``` +Copy Knows FROM 'knows_user_user.csv' (from='User', to='User'); +Copy Knows FROM 'knows_user_city.csv' (from='User', to='City'); +``` -For any relationship label E, e.g., , by default there can be multiple relationships from any node v both in the forward and backward direction. In database terminology, relationships are by default many-to-many. For example in the first Follows example above: (i) any User node v can follow multiple User nodes; and (ii) be followed by multiple User nodes. You can also constrain the multiplicity to _at most 1_ (we don't yet support exactly 1 semantics as in foreign key constraints in relational systems) in either direction. You can restrict the multiplicities for two reasons: +Alternatively, you can also directly copy into a child table. Though this approach is not recommended and will be deprecated in the future release. +``` +Copy Knows_User_User FROM 'knows_user_user.csv'; +Copy Knows_User_City FROM 'knows_user_city.csv'; +``` + +### Relationship Multiplicities -1. Constraint: Multiplicities can serve as constraints you would like to enforce (e..g, you want Kùzu to error if an application tries to add a second relationship of a particular label to some node) -2. Performance: Kùzu can store 1-to-1, many-to-1, or 1-to-many relationships (explained momentarily) in more efficient/compressed format, which is also faster to scan. +For any relationship label E, e.g., , by default there can be multiple relationships from any node v both in the forward and backward direction. In database terminology, relationships are by default many-to-many. For example in the first `Follows` example above: (i) any `User` node `v` can follow multiple `User` nodes; and (ii) be followed by multiple `User` nodes. You can also constrain the multiplicity to _at most 1_ (we don't yet support "exactly 1" semantics as you may be used to via foreign key constraints in relational systems) in either direction. +:::note[Note] You can optionally declare the multiplicity of relationships by adding `MANY_MANY`, `ONE_MANY`, `MANY_ONE`, or `ONE_ONE` clauses to the end of the `CREATE REL TABLE` command. +::: + Below are a few examples: ```sql CREATE REL TABLE LivesIn(FROM User TO City, MANY_ONE) ``` -The DDL shown above indicates that `LivesIn` has n-1 multiplicity. This command enforces an additional constraint that each `User` node `v` might live in at most one `City` node (assuming our database has `City` nodes). It does not put any constraint in the "backward" direction, i.e., there can be multiple `User`s living in the same `City`. As another example to explain the semantics of multiplicity constraints in the presence of multiple node labels, consider the following: +The DDL shown above indicates that `LivesIn` has `n-1` multiplicity. This command enforces an additional constraint that each `User` node `v` might live in at most one `City` node (assuming our database has `City` nodes). It does not put any constraint in the "backward" direction, i.e., there can be multiple `User`s living in the same `City`. As another example to explain the semantics of multiplicity constraints in the presence of multiple node labels, consider the following: ```sql CREATE REL TABLE Likes(FROM Pet TO User, ONE_MANY) @@ -110,42 +138,21 @@ The DDL above indicates that `Likes` has 1-to-n multiplicity. This DDL command e In general in a relationship `E`'s multiplicity, if the "source side" is `ONE`, then for each node `v` that can be the destination of `E` relationships, `v` can have at most one backward edge. If the "destination side" is `ONE`, then each node `v` that can be the source of `E` relationships, `v` can have at most one forward edge. -## Create relationship table group - -You can use relationship table groups to gain added flexibility in your data modelling, by defining a relationship table with multiple node table pairs. This is done via the `CREATE REL TABLE GROUP` statement. This has a similar syntax to `CREATE REL TABLE`, but uses multiple `FROM ... TO ...` clauses. Internally, a relationship table group defines a relationship table for _each_ `FROM ... TO ...` block. Any query to a relationship table group is treated as a query on the union of _all_ relationship tables in the group. - +## Create relationship table group [Deprecated] :::note[Note] -Currently, Kùzu does not allow `COPY FROM` or `CREATE` using a relationship table group. You need to explicitly specify a relationship table -that you want to insert data into. +Relationship table group has been deprecated since our v0.8.0 release. Users can now define multiple node table pairs in rel table using multiple `FROM ... TO ...` clauses. ::: -```sql -CREATE REL TABLE GROUP Knows (FROM User To User, FROM User to City, year INT64); -``` -The statement above creates a Knows_User_User rel table and a Knows_User_City rel table. And a Knows rel table group refering these two rel tables. +You can use relationship table groups to gain added flexibility in your data modelling, by defining a relationship table with multiple node table pairs. This is done via the `CREATE REL TABLE GROUP` statement. This has a similar syntax to `CREATE REL TABLE`, but uses multiple `FROM ... TO ...` clauses. Internally, a relationship table group defines a relationship table for _each_ `FROM ... TO ...` block. Any query to a relationship table group is treated as a query on the union of _all_ relationship tables in the group. -```sql -CALL SHOW_TABLES() RETURN *; -``` -Output: +```sql +CREATE REL TABLE GROUP Knows (FROM User To User, FROM User to City, year INT64); ``` ----------------------------------------------- -| TableName | TableType | TableComment | ----------------------------------------------- -| Knows | REL_GROUP | | ----------------------------------------------- -| Knows_User_City | REL | | ----------------------------------------------- -| Knows_User_User | REL | | ----------------------------------------------- -| User | NODE | | ----------------------------------------------- -| City | NODE | | ----------------------------------------------- -``` + +The statement above creates a `Knows_User_User` rel table and a `Knows_User_City` rel table. And a `Knows` rel table group refering these two rel tables. A relationship table group can be used as a regular relationship table for querying purposes. @@ -173,4 +180,4 @@ CREATE NODE TABLE IF NOT EXISTS UW(ID INT64, PRIMARY KEY(ID)) ``` This query tells Kùzu to only create the `UW` table if it doesn't exist. -The same applies to relationship tables as well. \ No newline at end of file +The same applies to relationship tables as well. diff --git a/src/content/docs/cypher/query-clauses/call.md b/src/content/docs/cypher/query-clauses/call.md index 23fd4401..f2bb8192 100644 --- a/src/content/docs/cypher/query-clauses/call.md +++ b/src/content/docs/cypher/query-clauses/call.md @@ -19,6 +19,9 @@ The following tables lists the built-in schema functions you can use with the `C | `SHOW_WARNINGS()` | returns the contents of the [Warnings Table](/import#warnings-table-inspecting-skipped-rows) | | `CLEAR_WARNINGS()` | clears all warnings in the [Warnings Table](/import#warnings-table-inspecting-skipped-rows) | | `TABLE_INFO('tableName')` | returns metadata information of the given table | +| `SHOW_OFFICIAL_EXTENSIONS` | returns all official [extensions](/extensions) which can be installed by `INSTALL ` | +| `SHOW_LOADED_EXTENSIONS` | returns all loaded extensions | +| `SHOW_INDEXES` | returns all indexes built in the system | ### TABLE_INFO @@ -198,3 +201,151 @@ This function has no output. ```cypher CALL clear_warnings(); ``` + +### SHOW_OFFICIAL_EXTENSIONS +If you would like to know all official [extensions](../../extensions) available in Kùzu, you can run the `SHOW_OFFICIAL_EXTENSIONS` function. + +| Column | Description | Type | +| ------ | ----------- | ---- | +| name | name of the extension | STRING | +| description | description of the extension | STRING | + +```cypher +CALL SHOW_OFFICIAL_EXTENSIONS() RETURN *; +``` + +Output: +``` +┌──────────┬─────────────────────────────────────────────────────────────────────────┐ +│ name │ description │ +│ STRING │ STRING │ +├──────────┼─────────────────────────────────────────────────────────────────────────┤ +│ SQLITE │ Adds support for reading from SQLITE tables │ +│ JSON │ Adds support for JSON operations │ +│ ICEBERG │ Adds support for reading from iceberg tables │ +│ HTTPFS │ Adds support for reading and writing files over a HTTP(S)/S3 filesystem │ +│ DELTA │ Adds support for reading from delta tables │ +│ POSTGRES │ Adds support for reading from POSTGRES tables │ +│ FTS │ Adds support for full-text search indexes │ +│ DUCKDB │ Adds support for reading from duckdb tables │ +└──────────┴─────────────────────────────────────────────────────────────────────────┘ +``` + +### SHOW_LOADED_EXTENSIONS +If you would like to know information about loaded extensions in Kùzu, you can run the `SHOW_LOADED_EXTENSIONS` function. + +| Column | Description | Type | +| ------ | ----------- | ---- | +| extension name | name of the extension | STRING | +| extension source | whether the extension is officially supported by Kùzu Inc., or developed by a third-party | STRING | +| extension path | the path to the extension | STRING | + +```cypher +CALL SHOW_LOADED_EXTENSIONS() RETURN *; +``` + +``` +┌────────────────┬──────────────────┬─────────────────────────────────────────────────────────────────────────────┐ +│ extension name │ extension source │ extension path │ +│ STRING │ STRING │ STRING │ +├────────────────┼──────────────────┼─────────────────────────────────────────────────────────────────────────────┤ +│ FTS │ OFFICIAL │ extension/fts/build/libfts.kuzu_extension │ +└────────────────┴──────────────────┴─────────────────────────────────────────────────────────────────────────────┘ +``` + +### SHOW_INDEXES +If you would like to know information about indexes built in kuzu, you can run the `SHOW_INDEXES` function. + +| Column | Description | Type | +| ------ | ----------- | ---- | +| table name | the table which the index is built on | STRING | +| index name | the name of the index | STRING | +| index type | the type of the index (e.g. FTS, HNSW) | STRING | +| property names | the properties which the index is built on | STRING[] | +| extension loaded | whether the depended extension has been loaded | BOOL | +| index definition | the cypher query to create the index | STRING | + +Note: +Some indexes are implemented within extensions. If a required extension is not loaded, the extension loaded field will display false, and the index definition field will be null. + +```cypher +CALL SHOW_INDEXES() RETURN *; +``` + +``` +┌────────────┬────────────┬────────────┬─────────────────────────┬──────────────────┬──────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ table name │ index name │ index type │ property names │ extension loaded │ index definition │ +│ STRING │ STRING │ STRING │ STRING[] │ BOOL │ STRING │ +├────────────┼────────────┼────────────┼─────────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────────────────────────────────────────┤ +│ book │ bookIdx │ FTS │ [abstract,author,title] │ True │ CALL CREATE_FTS_INDEX('book', 'bookIdx', ['abstract', 'author', 'title' ], stemmer := 'porter'); │ +└────────────┴────────────┴────────────┴─────────────────────────┴──────────────────┴──────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +### Using yield +The `YIELD` clause in Kuzu is used to rename the return columns of a CALL function to avoid naming conflicition and better readability. +Usage: +``` +CALL FUNC() +YIELD COLUMN0 [AS ALIAS0], COLUMN1 [AS ALIAS1] +RETURN ALIAS0, ALIAS1 +``` + +Example: +To rename the output column name of `current_setting('threads')` from `threads` to `threads_num`, you can use the following query: +``` +CALL current_setting('threads') +YIELD threads as threads_num +RETURN *; +``` + +Result: +``` +┌─────────────┐ +│ threads_num │ +│ STRING │ +├─────────────┤ +│ 10 │ +└─────────────┘ +``` + +Another useful scenario is to avoid naming conflicition when two call functions in the same query returns a column with the same name. +``` +CALL table_info('person') +YIELD `property id` as person_id, name as person_name, type as person_type, `default expression` as person_default, `primary key` as person_pk +CALL table_info('student') +YIELD `property id` as student_id, name as student_name, type as student_type, `default expression` as student_default, `primary key` as student_pk +RETURN *; +``` + +Result: +``` +┌───────────┬─────────────┬─────────────┬────────────────┬───────────┬────────────┬──────────────┬──────────────┬─────────────────┬────────────┐ +│ person_id │ person_name │ person_type │ person_default │ person_pk │ student_id │ student_name │ student_type │ student_default │ student_pk │ +│ INT32 │ STRING │ STRING │ STRING │ BOOL │ INT32 │ STRING │ STRING │ STRING │ BOOL │ +├───────────┼─────────────┼─────────────┼────────────────┼───────────┼────────────┼──────────────┼──────────────┼─────────────────┼────────────┤ +│ 0 │ id │ INT64 │ NULL │ True │ 0 │ id │ INT64 │ NULL │ True │ +└───────────┴─────────────┴─────────────┴────────────────┴───────────┴────────────┴──────────────┴──────────────┴─────────────────┴────────────┘ +``` + +:::caution[Note] +1. If the `YIELD` clause is used after a `CALL` function, **all** return columns of the function must appear in the `YIELD` clause. + +For example: +``` +CALL table_info('person') +YIELD `property id` as person_id +RETURN person_id +``` +The query throws an exception since not all returns columns of the `table_info` function appear in the yield clause. + +2. The column names to yield must match the original return column names of the call function. +For example: +``` +CALL current_setting('threads') +YIELD thread as threads_num +RETURN *; +``` +The query throws an exception since the column name to yield is `thread` which doesn't match the return column name(`threads`) of the call function. + +3. The syntax in Kùzu Cypher is different from other systems like Neo4j. In Kùzu, the `YIELD` clause must be followed by a return clause. `YIELD *` is not allowed in Kùzu. +::: diff --git a/src/content/docs/cypher/query-clauses/limit.md b/src/content/docs/cypher/query-clauses/limit.md index cbb0935e..acb9fe20 100644 --- a/src/content/docs/cypher/query-clauses/limit.md +++ b/src/content/docs/cypher/query-clauses/limit.md @@ -18,17 +18,58 @@ LIMIT 3; ``` Result: ``` ------------ -| u.name | ------------ -| Zhang | ------------ -| Karissa | ------------ -| Adam | ------------ +┌─────────┐ +│ u.name │ +│ STRING │ +├─────────┤ +│ Zhang │ +│ Karissa │ +│ Adam │ +└─────────┘ ``` If you omit the `ORDER BY`, you would get some k tuples in a `LIMIT k` query but you have no guarantee about which ones will be selected. + +The number of rows to limit can either be: +1. A parameter expression when used with prepared statement: + +Prepare: +```c++ +auto prepared = conn->prepare("MATCH (u:User) RETURN u.name limit $lt") +``` +Execution: +The number of rows to limit can be given at the time of execution. +```c++ +conn->execute(prepared.get(), std::make_pair(std::string{"lt"}, 1)) +``` + +Result: +``` +┌────────┐ +│ u.name │ +│ STRING │ +├────────┤ +│ Adam │ +└────────┘ +``` +2. A literal expression which can be evaluated at compile time. +```cypher +MATCH (u:User) +RETURN u.name +limit 1+2 +``` +Result: + +``` +┌─────────┐ +│ u.name │ +│ STRING │ +├─────────┤ +│ Adam │ +│ Karissa │ +│ Zhang │ +└─────────┘ +``` + diff --git a/src/content/docs/cypher/query-clauses/load-from.md b/src/content/docs/cypher/query-clauses/load-from.md index 756ae208..b9f06e93 100644 --- a/src/content/docs/cypher/query-clauses/load-from.md +++ b/src/content/docs/cypher/query-clauses/load-from.md @@ -21,14 +21,20 @@ Some example usage for the `LOAD FROM` clause is shown below. ```cypher LOAD FROM "user.csv" (header = true) -WHERE CAST(age, INT64) > 25 +WHERE age > 25 RETURN COUNT(*); ----------------- -| COUNT_STAR() | ----------------- -| 3 | ----------------- ``` +This returns: +``` +┌──────────────┐ +│ COUNT_STAR() │ +│ INT64 │ +├──────────────┤ +│ 3 │ +└──────────────┘ +``` + +### Skipping lines To skip the first 2 lines of the CSV file, you can use the `SKIP` parameter as follows: @@ -38,20 +44,28 @@ RETURN *; ``` ### Create nodes from input file + +You can pass the contents of `LOAD FROM` to a + ```cypher +// Create a node table +// Scan file and use its contents to create nodes LOAD FROM "user.csv" (header = true) -CREATE (:User {name: name, age: CAST(age, INT64)}); +CREATE (:User {name: name, age: CAST(age AS INT64)}); -MATCH (u:User) RETURN u; ----------------------------------------------------- -| u | ----------------------------------------------------- -| {_ID: 0:0, _LABEL: User, name: Adam, age: 30} | ----------------------------------------------------- -| {_ID: 0:1, _LABEL: User, name: Karissa, age: 40} | ----------------------------------------------------- -| {_ID: 0:2, _LABEL: User, name: Zhang, age: 50} | ----------------------------------------------------- +// Return the nodes we just created +MATCH (u:User) RETURN u.name, u.age; +``` +``` +┌─────────┬───────┐ +│ u.name │ u.age │ +│ STRING │ INT64 │ +├─────────┼───────┤ +│ Adam │ 30 │ +│ Karissa │ 40 │ +│ Zhang │ 50 │ +│ Noura │ 25 │ +└─────────┴───────┘ ``` ### Reorder and subset columns @@ -64,16 +78,16 @@ input file has more columns specified in a different order. // Return age column before the name column LOAD FROM "user.csv" (header = true) RETURN age, name LIMIT 3; - --------------------- -| age | name | --------------------- -| 30 | Adam | --------------------- -| 40 | Karissa | --------------------- -| 50 | Zhang | --------------------- +``` +``` +┌───────┬─────────┐ +│ age │ name │ +│ INT64 │ STRING │ +├───────┼─────────┤ +│ 30 │ Adam │ +│ 40 │ Karissa │ +│ 50 │ Zhang │ +└───────┴─────────┘ ``` ### Bound variable names and data types @@ -97,11 +111,12 @@ WHERE name =~ 'Adam*' RETURN name, age; ``` ``` --------------- -| name | age | --------------- -| Adam | 30 | --------------- +┌────────┬───────┐ +│ name │ age │ +│ STRING │ INT64 │ +├────────┼───────┤ +│ Adam │ 30 │ +└────────┴───────┘ ``` :::caution[Note] @@ -144,7 +159,20 @@ You can also see the details of any warnings generated by the skipped lines usin See the [ignoring erroneous rows section of `COPY FROM`](import#ignore-erroneous-rows) for more details. ## Scan Data Formats -Load from can scan several raw or in-memory file formats, such as CSV, Parquet, Pandas, Polars, Arrow tables, and JSON. +`LOAD FROM` can scan several raw or in-memory file formats, such as CSV, Parquet, Pandas, Polars, Arrow tables, and JSON. + +### File format detection +`LOAD FROM` determines the file format based on the file extension if the `file_format` option is not given. For instance, files with a `.csv` extension are automatically recognized as CSV format. + +If the file format cannot be inferred from the extension, or if you need to override the default sniffing behaviour, the `file_format` option can be used. + +For example, to load a CSV file that has a `.tsv` extension (for tab-separated data), you must explicitly specify the file format using the `file_format` option, as shown below: +```cypher +LOAD FROM 'data.tsv' (file_format='csv') +RETURN * +``` + + Below we give examples of using `LOAD FROM` to scan data from each of these formats. We assume `WITH HEADERS` is not used in the examples below, so we discuss how Kùzu infers the variable names and data types of that bind to the scanned tuples. @@ -157,7 +185,7 @@ See the ](/import/csv#ignoring-erroneous-rows) documentation pages for the `COPY FROM` file. The configurations documented in those pages can also be specified after the `LOAD FROM` statement inside `()` when scanning CSV files. For example, you can indicate that the first line should -be interpreted as a header line by setting `(haders = true)` or that the CSV delimiter is '|' by setting `(DELIM="|")`. +be interpreted as a header line by setting `(headers = true)` or that the CSV delimiter is '|' by setting `(DELIM="|")`. Some of these configurations are also by default [automatically detected](/import/csv#auto-detecting-configurations) by Kùzu when scanning CSV files. These configurations determine the names and data types of the variables that bind to the fields scanned from CSV files. @@ -173,7 +201,7 @@ provide the names of the columns. The data types are always automatically inferr if `LOAD WITH HEADERS (...) FROM` is used, in which case the data types provided inside the `(...)` are used as described [above](#bound-variable-names-and-data-types)). -Suppose user.csv is a CSV file with the following contents: +Suppose `user.csv` is a CSV file with the following contents: ``` name,age Adam,30 @@ -185,15 +213,14 @@ Then if you run the following query, Kùzu will infer the column names `name` an ```cypher LOAD FROM "user.csv" (header = true) RETURN *; ------------------ -| name | age | ------------------ -| Adam | 30 | ------------------ -| Karissa | 40 | ------------------ -| Zhang | 50 | ------------------ +┌─────────┬───────┐ +│ name │ age │ +│ STRING │ INT64 │ +├─────────┼───────┤ +│ Adam │ 30 │ +│ Karissa │ 40 │ +│ Zhang │ 50 │ +└─────────┴───────┘ ``` @@ -207,15 +234,15 @@ Zhang,50 ```cypher LOAD FROM "user.csv" (header = false) RETURN *; ---------------------- -| column0 | column1 | ---------------------- -| Adam | 30 | ---------------------- -| Karissa | 40 | ---------------------- -| Zhang | 50 | ---------------------- +┌─────────┬─────────┐ +│ column0 │ column1 │ +│ STRING │ STRING │ +├─────────┼─────────┤ +│ name │ age │ +│ Adam │ 30 │ +│ Karissa │ 40 │ +│ Zhang │ 50 │ +└─────────┴─────────┘ ``` ### Parquet @@ -227,15 +254,14 @@ and the same content as in the `user.csv` file above. Then the query below will ```cypher LOAD FROM "user.parquet" RETURN *; ----------------- -| f0 | f1 | ----------------- -| Adam | 30 | ----------------- -| Karissa | 40 | ----------------- -| Zhang | 50 | ----------------- +┌─────────┬───────┐ +│ f0 │ f1 │ +│ STRING │ INT64 │ +├─────────┼───────┤ +│ Adam │ 30 │ +│ Karissa │ 40 │ +│ Zhang │ 50 │ +└─────────┴───────┘ ``` ### Pandas @@ -337,5 +363,5 @@ age: [[30,40,50]] ``` ### JSON -Kùzu can scan JSON files using `LOAD FROM`. -All JSON-related features are part of the JSON extension. See the documentation on the [JSON extension](/extensions/json#load-from) for details. +Kùzu can scan JSON files using `LOAD FROM`, but only upon installation of the JSON extension. +See the documentation on the [JSON extension](/extensions/json#load-from) for details. diff --git a/src/content/docs/cypher/query-clauses/skip.md b/src/content/docs/cypher/query-clauses/skip.md index 393f2953..1b5d4491 100644 --- a/src/content/docs/cypher/query-clauses/skip.md +++ b/src/content/docs/cypher/query-clauses/skip.md @@ -20,14 +20,57 @@ SKIP 2; ``` Result: ``` ------------ -| u.name | ------------ -| Karissa | ------------ -| Zhang | ------------ +┌─────────┐ +│ u.name │ +│ STRING │ +├─────────┤ +│ Karissa │ +│ Zhang │ +└─────────┘ ``` If you omit the `ORDER BY`, you would skip some k tuples in a `SKIP` k query but you have no guarantee about which ones will be skipped. + + +The number of rows to skip can either be: +1. A parameter expression when used with prepared statement: + +Prepare: +```c++ +auto prepared = conn->prepare("MATCH (u:User) RETURN u.name skip $sp") +``` + +Execution: + +The number of rows to skip can be given at the time of execution. +```c++ +conn->execute(prepared.get(), std::make_pair(std::string{"sp"}, 2)) +``` + +Result: +``` +┌────────┐ +│ u.name │ +│ STRING │ +├────────┤ +│ Zhang │ +│ Noura │ +└────────┘ +``` +2. A literal expression which can be evaluated at compile time. +```cypher +MATCH (u:User) +RETURN u.name +skip 2+1 +``` +Result: + +``` +┌────────┐ +│ u.name │ +│ STRING │ +├────────┤ +│ Noura │ +└────────┘ +``` diff --git a/src/content/docs/extensions/attach/rdbms.mdx b/src/content/docs/extensions/attach/rdbms.mdx index 60256ec7..4eb7305d 100644 --- a/src/content/docs/extensions/attach/rdbms.mdx +++ b/src/content/docs/extensions/attach/rdbms.mdx @@ -109,8 +109,42 @@ Result: └──────────────┘ ``` - -#### 3. Scan from DuckDB tables +#### 3. Data type mapping from DuckDB to Kùzu + +The table below shows the mapping from duckdb's type to Kùzu's type: +| Data type in DuckDB | Corresponding data type in Kùzu | +|-----------------------------|----------------------------------| +| BIGINT | INT64 | +| BIT | UNSUPPORTED | +| BLOB | BLOB | +| BOOLEAN | BOOL | +| DATE | DATE | +| DECIMAL(prec, scale) | DECIMAL(prec, scale) | +| DOUBLE | DOUBLE | +| FLOAT | FLOAT | +| HUGEINT | INT128 | +| INTEGER | INT32 | +| INTERVAL | INTERVAL | +| SMALLINT | INT16 | +| TIME | UNSUPPORTED | +| TIMESTAMP WITH TIME ZONE | UNSUPPORTED | +| TIMESTAMP | TIMESTAMP | +| TINYINT | INT8 | +| UBIGINT | UINT64 | +| UHUGEINT | UNSUPPORTED | +| UINTEGER | UINT32 | +| USMALLINT | UINT16 | +| UTINYINT | UINT8 | +| UUID | UUID | +| VARCHAR | STRING | +| ENUM | UNSUPPORTED | +| ARRAY | ARRAY | +| LIST | LIST | +| MAP | MAP | +| STRUCT | STRUCT | +| UNION | UNION | + +#### 4. Scan from DuckDB tables Finally, we can utilize the `LOAD FROM` statement to scan the `person` table. Note that you need to prefix the external `person` table with the database alias (in our example `uw`). See the `USE` statement which allows you to @@ -137,7 +171,7 @@ Result: --------------- ``` -#### 4. USE: Reference database without alias +#### 5. USE: Reference database without alias You can use the `USE` statement for attached databases to use a default database name for future operations. This can be used when reading from an attached database to avoid specifying the full database name @@ -164,7 +198,7 @@ LOAD FROM person RETURN * ``` -#### 5. Copy data from DuckDB tables +#### 6. Copy data from DuckDB tables One important use case of the external RDBMS extensions is to facilitate seamless data transfer from the external RDBMS to Kùzu. In this example, we continue using the `university.db` database created in the last step, but this time, @@ -187,7 +221,7 @@ If the schemas are not the same, e.g., `Person` contains only `name` property wh COPY Person FROM (LOAD FROM uw.person RETURN name); ``` -#### 6. Query the data in Kùzu +#### 7. Query the data in Kùzu Finally, we can verify the data in the `Person` table in Kùzu. @@ -210,7 +244,7 @@ Result: ------------------ ``` -#### 7. Clear attached database schema cache +#### 8. Clear attached database schema cache To avoid redundantly retrieving schema information from attached databases, Kùzu maintains a schema cache including table names and their respective columns and types. Should modifications occur in the schema @@ -219,7 +253,7 @@ schema data may become obsolete. You can use the `clear_attached_db_cache()` fun schema information in such cases. ```sql -CALL clear_attached_db_cache() RETURN * +CALL clear_attached_db_cache() ``` Note: If you have attached to databases from different RDBMSs, say Postgres, DuckDB, and Sqlite, this call will clear the cache for all of them. @@ -319,7 +353,56 @@ The below table lists some common connection string parameters: | `password` | Postgres password | [empty] | | `port` | Port number | 5432 | -#### 3. Scan from PostgreSQL tables +#### 3. Data type mapping from PostgreSQL to Kùzu + +The table below shows the mapping from PostgreSQL's type to Kùzu's type: +| PostgreSQL Data Type | Corresponding Data Type in Kùzu | +|-------------------------------------------|----------------------------------| +| bigint (int8) | INT64 | +| bigserial (serial8) | INT64 | +| bit [ (n) ] | STRING | +| bit varying [ (n) ] (varbit [ (n) ]) | STRING | +| boolean (bool) | BOOL | +| box | DOUBLE[] | +| bytea | BLOB | +| character [ (n) ] (char [ (n) ]) | STRING | +| character varying [ (n) ] (varchar [ (n)])| STRING | +| cidr | STRING | +| circle | DOUBLE[] | +| date | DATE | +| double precision (float8) | DOUBLE | +| inet | STRING | +| integer (int, int4) | INT32 | +| interval [ fields ] [ (p) ] | INTERVAL | +| json | JSON | +| line | DOUBLE[] | +| lseg | DOUBLE[] | +| macaddr | STRING | +| macaddr8 | STRING | +| money | STRING | +| numeric [ (p, s) ] (decimal [ (p, s) ]) | DECIMAL | +| path | DOUBLE[] | +| pg_lsn | STRING | +| pg_snapshot | STRING | +| point | STRUCT(x DOUBLE, y DOUBLE) | +| polygon | DOUBLE[] | +| real (float4) | FLOAT | +| smallint (int2) | INT16 | +| smallserial (serial2) | INT16 | +| serial (serial4) | INT32 | +| text | STRING | +| time [ (p) ] [ without time zone ] | UNSUPPORTED | +| time [ (p) ] with time zone (timetz) | UNSUPPORTED | +| timestamp [ (p) ] [ without time zone ] | TIMESTAMP | +| timestamp [ (p) ] with time zone (timestamptz) | UNSUPPORTED | +| tsquery | STRING | +| tsvector | STRING | +| txid_snapshot | STRING | +| uuid | UUID | +| xml | STRING | + + +#### 4. Scan from PostgreSQL tables Finally, we can utilize the `LOAD FROM` statement to scan the `Person` table. @@ -344,7 +427,7 @@ Result: --------------- ``` -#### 4. USE: Reference database without alias +#### 5. USE: Reference database without alias You can use the `USE` statement for attached databases to use a default database name for future operations. This can be used when reading from an attached database to avoid specifying the full database name @@ -371,7 +454,7 @@ LOAD FROM person RETURN * ``` -#### 5. Copy data from PostgreSQL tables +#### 6. Copy data from PostgreSQL tables One important use case of the external RDBMS extensions is to facilitate seamless data transfer from the external RDBMS to Kùzu. In this example, we continue using the `university.db` database created in the last step, but this time, @@ -394,7 +477,7 @@ If the schemas are not the same, e.g., `Person` contains only `name` property wh COPY Person FROM (LOAD FROM uw.person RETURN name); ``` -#### 6. Query the data in Kùzu +#### 7. Query the data in Kùzu Finally, we can verify the data in the `Person` table in Kùzu. @@ -417,7 +500,7 @@ Result: ------------------ ``` -#### 7. Clear attached database schema cache +#### 8. Clear attached database schema cache To avoid redundantly retrieving schema information from attached databases, Kùzu maintains a schema cache including table names and their respective columns and types. Should modifications occur in the schema @@ -426,12 +509,12 @@ schema data may become obsolete. You can use the `clear_attached_db_cache()` fun schema information in such cases. ```sql -CALL clear_attached_db_cache() RETURN * +CALL clear_attached_db_cache() ``` Note: If you have attached to databases from different RDBMSs, say Postgres, DuckDB, and Sqlite, this call will clear the cache for all of them. -#### 8. Detach database +#### 9. Detach database To detach a database, use `DETACH [ALIAS]` as follows: @@ -489,7 +572,29 @@ the alias `uw`: ATTACH 'university.db' AS uw (dbtype sqlite); ``` -#### 3. Scan from SQLite tables +#### 3. Data type mapping from SQLite to Kùzu + +The table below shows the mapping from SQLite's type to Kùzu's type: +| SQLite Storage Class / Datatype | Corresponding Data Type in Kùzu | +|--------------------------------------------|----------------------------------| +| NULL | BLOB | +| INTEGER | INT64 | +| REAL | DOUBLE | +| TEXT | STRING | +| BLOB | BLOB | +| BOOLEAN | INT64 | +| DATE | DATE | +| TIME | TIMESTAMP | + +Note: Sqlite uses a [dynamic type system](https://www.sqlite.org/datatype3.html), meaning that a column in sqlite can store values with different types. The option: `sqlite_all_varchar_option` is provided to scan such columns in Kùzu. +Usage: +``` +`CALL sqlite_all_varchar_option=