From cfad9605cb47cac4f49c6c7e67102ec581f2d357 Mon Sep 17 00:00:00 2001 From: Adam Marcus Date: Sun, 4 Jun 2023 19:19:03 -0400 Subject: [PATCH] Support for SQLite storage of ayb metadata (#100) * AybDb trait that supports SQLite and PostgreSQL depending on connection string * Remove reliance on sqlx offline mode (we support multiple databases now) * Add migrations & tests for SQLite, create data directory on start * Add missing files for SQLite and tests, remove base test config * Genericize duplicate constraint logic across SQLite and Postgres * Allow parallel tests by removing shared state * Cargo fmt * Servers on different ports * Update readme * Version bump * Clarification * Ignore local reset script * Ignore local reset script --- .github/workflows/tests.yml | 4 - .gitignore | 4 +- Cargo.lock | 142 ++++++++--- Cargo.toml | 8 +- README.md | 44 ++-- .../{ => postgres}/2022100901_initial.sql | 0 migrations/sqlite/2022100901_initial.sql | 17 ++ sqlx-data.json | 147 ------------ src/ayb_db.rs | 2 +- src/ayb_db/crud.rs | 117 --------- src/ayb_db/db_interfaces.rs | 227 ++++++++++++++++++ src/ayb_db/models.rs | 5 +- src/http/endpoints.rs | 20 +- src/http/server.rs | 19 +- tests/e2e.rs | 49 +++- tests/{reset_db.sh => reset_db_postgres.sh} | 2 +- tests/reset_db_sqlite.sh | 4 + ....toml => test-server-config-postgres.toml} | 2 +- tests/test-server-config-sqlite.toml | 4 + 19 files changed, 448 insertions(+), 369 deletions(-) rename migrations/{ => postgres}/2022100901_initial.sql (100%) create mode 100644 migrations/sqlite/2022100901_initial.sql delete mode 100644 sqlx-data.json delete mode 100644 src/ayb_db/crud.rs create mode 100644 src/ayb_db/db_interfaces.rs rename tests/{reset_db.sh => reset_db_postgres.sh} (78%) create mode 100755 tests/reset_db_sqlite.sh rename tests/{test-server-config.toml => test-server-config-postgres.toml} (71%) create mode 100644 tests/test-server-config-sqlite.toml diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2ab6890b..e51b2be2 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -21,10 +21,6 @@ jobs: --health-retries 5 ports: - 5432:5432 - env: - # Build binaries based on sqlx-data.json rather than live database, - # which isn't initialized yet for tests. - SQLX_OFFLINE: true steps: - uses: actions/checkout@v3 - name: Cargo cache diff --git a/.gitignore b/.gitignore index 46ea8984..f27c069a 100644 --- a/.gitignore +++ b/.gitignore @@ -11,5 +11,7 @@ # Ignore ayb-specific files ayb.toml e2e.sh -tests/ayb_data +tests/ayb_data_postgres +tests/ayb_data_sqlite ayb_data +reset_db.sh diff --git a/Cargo.lock b/Cargo.lock index 2a53eeda..df70d7ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,7 +8,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57a7559404a7f3573127aab53c08ce37a6c6a315c374a31070f3c91cd1b4a7fe" dependencies = [ - "bitflags 1.3.2", + "bitflags", "bytes", "futures-core", "futures-sink", @@ -31,7 +31,7 @@ dependencies = [ "actix-utils", "ahash 0.8.3", "base64 0.21.0", - "bitflags 1.3.2", + "bitflags", "brotli", "bytes", "bytestring", @@ -310,6 +310,17 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "async-trait" +version = "0.1.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.15", +] + [[package]] name = "atoi" version = "1.0.0" @@ -332,8 +343,10 @@ dependencies = [ "actix-web", "assert-json-diff", "assert_cmd", + "async-trait", "clap", "derive_more", + "dyn-clone", "env_logger", "prettytable-rs", "regex", @@ -365,12 +378,6 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" -[[package]] -name = "bitflags" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c70beb79cbb5ce9c4f8e20849978f34225931f665bb49efa6982875a4d5facb3" - [[package]] name = "block-buffer" version = "0.10.4" @@ -474,7 +481,7 @@ checksum = "914c8c79fb560f238ef6429439a30023c862f7a28e688c58f7203f12b29970bd" dependencies = [ "anstream", "anstyle", - "bitflags 1.3.2", + "bitflags", "clap_lex", "once_cell", "strsim", @@ -703,14 +710,17 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "dyn-clone" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68b0cf012f1230e43cd00ebb729c6bb58707ecfa8ad08b52ef3a4ccd2697fc30" + [[package]] name = "either" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" -dependencies = [ - "serde", -] [[package]] name = "encode_unicode" @@ -798,6 +808,18 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "flume" +version = "0.10.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1657b4441c3403d9f7b3409e47575237dac27b1b5726df654a6ecbf92f0f7577" +dependencies = [ + "futures-core", + "futures-sink", + "pin-project", + "spin", +] + [[package]] name = "fnv" version = "1.0.7" @@ -844,6 +866,17 @@ version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" +[[package]] +name = "futures-executor" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + [[package]] name = "futures-intrusive" version = "0.4.2" @@ -878,6 +911,7 @@ dependencies = [ "futures-task", "pin-project-lite", "pin-utils", + "slab", ] [[package]] @@ -920,6 +954,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +dependencies = [ + "ahash 0.7.6", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -929,13 +972,22 @@ dependencies = [ "ahash 0.7.6", ] +[[package]] +name = "hashlink" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7249a3129cbc1ffccd74857f81464a323a152173cdb134e0fd81bc803b29facf" +dependencies = [ + "hashbrown 0.11.2", +] + [[package]] name = "hashlink" version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69fe1fcf8b4278d860ad0548329f892a3631fb63f82574df68275f34cdbe0ffa" dependencies = [ - "hashbrown", + "hashbrown 0.12.3", ] [[package]] @@ -1080,7 +1132,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", - "hashbrown", + "hashbrown 0.12.3", ] [[package]] @@ -1174,9 +1226,9 @@ checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317" [[package]] name = "libsqlite3-sys" -version = "0.26.0" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc22eff61b133b115c6e8c74e818c628d6d5e7a502afea6f64dee076dd94326" +checksum = "898745e570c7d0453cc1fbc4a701eb6c662ed54e8fec8b7d14be137ebeeb9d14" dependencies = [ "cc", "pkg-config", @@ -1333,7 +1385,7 @@ version = "0.10.51" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97ea2d98598bf9ada7ea6ee8a30fb74f9156b63bbe495d64ec2b87c269d2dda3" dependencies = [ - "bitflags 1.3.2", + "bitflags", "cfg-if", "foreign-types", "libc", @@ -1431,6 +1483,26 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" +[[package]] +name = "pin-project" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c95a7476719eab1e366eaf73d0260af3021184f18177925b07f54b30089ceead" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39407670928234ebc5e6e580247dd567ad73a3578460c5990f9503df207e8f07" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.15", +] + [[package]] name = "pin-project-lite" version = "0.2.9" @@ -1551,7 +1623,7 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" dependencies = [ - "bitflags 1.3.2", + "bitflags", ] [[package]] @@ -1560,7 +1632,7 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" dependencies = [ - "bitflags 1.3.2", + "bitflags", ] [[package]] @@ -1636,15 +1708,16 @@ dependencies = [ [[package]] name = "rusqlite" -version = "0.29.0" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "549b9d036d571d42e6e85d1c1425e2ac83491075078ca9a15be021c56b1641f2" +checksum = "85127183a999f7db96d1a976a309eebbfb6ea3b0b400ddd8340190129de6eb7a" dependencies = [ - "bitflags 2.1.0", + "bitflags", "fallible-iterator", "fallible-streaming-iterator", - "hashlink", + "hashlink 0.7.0", "libsqlite3-sys", + "memchr", "smallvec", ] @@ -1663,7 +1736,7 @@ version = "0.37.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b864d3c18a5785a05953adeed93e2dca37ed30f18e69bba9f30079d51f363f" dependencies = [ - "bitflags 1.3.2", + "bitflags", "errno", "io-lifetimes", "libc", @@ -1704,7 +1777,7 @@ version = "2.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a332be01508d814fed64bf28f798a146d73792121129962fdf335bb3c49a4254" dependencies = [ - "bitflags 1.3.2", + "bitflags", "core-foundation", "core-foundation-sys", "libc", @@ -1846,6 +1919,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] + [[package]] name = "sqlformat" version = "0.2.1" @@ -1876,7 +1958,7 @@ dependencies = [ "ahash 0.7.6", "atoi", "base64 0.13.1", - "bitflags 1.3.2", + "bitflags", "byteorder", "bytes", "crc", @@ -1885,17 +1967,20 @@ dependencies = [ "dotenvy", "either", "event-listener", + "flume", "futures-channel", "futures-core", + "futures-executor", "futures-intrusive", "futures-util", - "hashlink", + "hashlink 0.8.1", "hex", "hkdf", "hmac", "indexmap", "itoa", "libc", + "libsqlite3-sys", "log", "md-5", "memchr", @@ -1926,12 +2011,9 @@ dependencies = [ "dotenvy", "either", "heck", - "hex", "once_cell", "proc-macro2", "quote", - "serde", - "serde_json", "sha2", "sqlx-core", "sqlx-rt", diff --git a/Cargo.toml b/Cargo.toml index 09febd1a..cb97381e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ayb" -version = "0.1.3" +version = "0.1.4" edition = "2021" description = "ayb makes it easy to create, host, and share embedded databases like SQLite and DuckDB" homepage = "https://github.com/marcua/ayb" @@ -9,15 +9,17 @@ license = "Apache-2.0" [dependencies] actix-web = { version = "4.3.1" } +async-trait = { version = "0.1.68" } clap = { version = "4.2.7", features = ["cargo", "derive", "env"] } derive_more = { version = "0.99.0" } +dyn-clone = { version = "1.0.11" } env_logger = { version = "0.10.0" } reqwest = { version = "0.11.18", features = ["json"] } -rusqlite = { version = "0.29.0", features = ["bundled"] } +rusqlite = { version = "0.27.0", features = ["bundled"] } regex = { version = "1.8.0"} serde = { version = "1.0", features = ["derive"] } serde_repr = { version = "0.1.11" } -sqlx = { version = "0.6.3", features = ["runtime-actix-native-tls", "postgres", "offline"] } +sqlx = { version = "0.6.3", features = ["runtime-actix-native-tls", "postgres", "sqlite"] } toml = { version = "0.7.3" } tokio = { version = "1.28.1", features = ["macros", "rt"] } prettytable-rs = "0.10.0" diff --git a/README.md b/README.md index c97a5e2a..2d60f095 100644 --- a/README.md +++ b/README.md @@ -20,27 +20,19 @@ cargo install ayb ``` ### Running the server -An `ayb` server stores its metadata in [PostgreSQL](https://www.postgresql.org/), and its embedded databases on a local disk. First, create an `ayb.toml` configuration file to tell the server what host/port to listen for connections on, how to connect to Postgres, and the data path for the embedded databases: +An `ayb` server stores its metadata in [SQLite](https://www.sqlite.org/index.html) or [PostgreSQL](https://www.postgresql.org/), and its embedded databases on a local disk. First, create an `ayb.toml` configuration file to tell the server what host/port to listen for connections on, how to connect to the database, and the data path for the embedded databases: ```bash cat < ayb.toml host = "0.0.0.0" port = 5433 -database_url = "postgresql://postgres_user:test@localhost:5432/test_db" +database_url = "sqlite://ayb_data/ayb.sqlite" +# Or, for Postgres: +# database_url = "postgresql://postgres_user:test@localhost:5432/test_db" data_path = "./ayb_data" EOF ``` -The `database_url` above assumes you've created a `postgres_user` with password `test` and a `test_db` that this user can access. Here are the commands to make that possible, assuming you already have PostgreSQL installed (say, with `sudo apt-get install postgresql` on Debian-based systems): -```bash -$ sudo -u postgres createuser -P postgres_user # Enter password `test` -$ sudo -u postgres psql -c "alter user postgres_user createdb;" -# Edit /etc/postgresql/14/main/pg_hba.conf to have an entry like: -# local all all scram-sha-256 -$ service postgresql restart -$ createdb -U postgres_user -W test_db -``` - Running the server then requires one command ```bash $ ayb server @@ -123,22 +115,22 @@ Thank you for asking. [I hope the answer elicits some nostalgia](https://www.you Here's a rough roadmap for the project, with items near the top of the list more likely to be completed first. The nitty-gritty list of prioritized issues can be found on [this project board](https://github.com/marcua/ayb/projects/1), with the most-likely-to-be-completed issues near the top of the to-do list. * Make the single-user `ayb` experience excellent - * Reduce reliance on PostgreSQL. Given that the goal of `ayb` is to make it easier to create, share, and query databases, it's frustrating that running `ayb` requires you to pay the nontrivial cost of operationalizing PostgreSQL. While Postgres will be helpful for eventually coordinating between multiple `ayb` nodes, a single-node version should be able to store its metadata in SQLite with little setup costs. - * Authentication and permissions. Add authentication/the ability to log in, and add permissions to endpoints so that you can't just issue queries against any database. - * Clustering. Support for multiple `ayb` nodes to serve databases and requests. Whereas a single database will not span multiple machines, parallelism/distribution will happen across users and databases. - * Persistence beyond the node. Using projects like [LiteFS](https://github.com/superfly/litefs), stream updates to databases to persistent storage, and allow failover if an `ayb` node disappears. - * Isolation. Since an `ayb` instance can have multiple tenants/databases, we want to use one of the many container/isolate/microVM projects to ensure that one tenant isn't able to access another tenant's data. - * Sessions/transactions. `ayb`'s query API is a stateless request/response API, making it impossible to start a database transaction or issue multiple queries in a session. Exposing sessions in the API will allow multiple statements per session, and by extension, transactions. - * Import/export of databases. `ayb` already uses existing well-established file formats (e.g., SQLite). There should be endpoints to import existing databases into `ayb` in those formats or export the underlying files so you're not locked in. + * [x] Reduce reliance on PostgreSQL (SQLite metadata storage). Given that the goal of `ayb` is to make it easier to create, share, and query databases, it's frustrating that running `ayb` requires you to pay the nontrivial cost of operationalizing PostgreSQL. While Postgres will be helpful for eventually coordinating between multiple `ayb` nodes, a single-node version should be able to store its metadata in SQLite with little setup costs. + * [ ] Authentication and permissions. Add authentication/the ability to log in, and add permissions to endpoints so that you can't just issue queries against any database. + * [ ] Clustering. Support for multiple `ayb` nodes to serve databases and requests. Whereas a single database will not span multiple machines, parallelism/distribution will happen across users and databases. + * [ ] Persistence beyond the node. Using projects like [LiteFS](https://github.com/superfly/litefs), stream updates to databases to persistent storage, and allow failover if an `ayb` node disappears. + * [ ] Isolation. Since an `ayb` instance can have multiple tenants/databases, we want to use one of the many container/isolate/microVM projects to ensure that one tenant isn't able to access another tenant's data. + * [ ] Sessions/transactions. `ayb`'s query API is a stateless request/response API, making it impossible to start a database transaction or issue multiple queries in a session. Exposing sessions in the API will allow multiple statements per session, and by extension, transactions. + * [ ] Import/export of databases. `ayb` already uses existing well-established file formats (e.g., SQLite). There should be endpoints to import existing databases into `ayb` in those formats or export the underlying files so you're not locked in. * Extend `ayb` to more people and software - * Collaboration. In addition to making it easy to create and query databases, it should be easy to share databases with others. Two use cases include adding private collaborators and allowing public read-only access. - * Forking. Allowing a user to fork their own copy of a database will enable collaborators to remix and build on each others' work. - * Versioning. To both make it less scary to execute sensitive operations and to make it possible for scientists to reference and publish checkpoints of their work, a user should be able to snapshot and revert to a database at a point in time. - * DuckDB. Allowing users to create a DuckDB database in addition to a SQLite database would allow you to create a data warehouse with a single command. This effort is dependent on the DuckDB project. First, the DuckDB file format is rapidly changing ahead of the project's 1.0 release. Additionally, I don't know of an equivalent streaming replication project to LiteFS for DuckDB that handles *persistence beyond the node*. - * PostgreSQL wire protocol. While an HTTP API makes it easy to build new web apps, exposing `ayb` over the PostgreSQL wire protocol will allow existing tools and libraries to connect to and query an `ayb` database. + * [ ] Collaboration. In addition to making it easy to create and query databases, it should be easy to share databases with others. Two use cases include adding private collaborators and allowing public read-only access. + * [ ] Forking. Allowing a user to fork their own copy of a database will enable collaborators to remix and build on each others' work. + * [ ] Versioning. To both make it less scary to execute sensitive operations and to make it possible for scientists to reference and publish checkpoints of their work, a user should be able to snapshot and revert to a database at a point in time. + * [ ] DuckDB. Allowing users to create a DuckDB database in addition to a SQLite database would allow you to create a data warehouse with a single command. This effort is dependent on the DuckDB project. First, the DuckDB file format is rapidly changing ahead of the project's 1.0 release. Additionally, I don't know of an equivalent streaming replication project to LiteFS for DuckDB that handles *persistence beyond the node*. + * [ ] PostgreSQL wire protocol. While an HTTP API makes it easy to build new web apps, exposing `ayb` over the PostgreSQL wire protocol will allow existing tools and libraries to connect to and query an `ayb` database. * Increase discoverability with a web frontend - * Provide a web interface analogous to the command line interface. Much like GitHub/Gitea/Forgejo make git more approachable, you shouldn't have to pay a command line knowledge tax in order to create, share, and query an `ayb` database. - * Explore people's public datasets. Beyond simplifying the command line, platforms like GitHub also make it easier to find a user's publicly shared repositories, follow along in their work, and fork a copy for your own exploration. That same experience should be possible for `ayb`-hosted databases. + * [ ] Provide a web interface analogous to the command line interface. Much like GitHub/Gitea/Forgejo make git more approachable, you shouldn't have to pay a command line knowledge tax in order to create, share, and query an `ayb` database. + * [ ] Explore people's public datasets. Beyond simplifying the command line, platforms like GitHub also make it easier to find a user's publicly shared repositories, follow along in their work, and fork a copy for your own exploration. That same experience should be possible for `ayb`-hosted databases. ## Contributing (This section is inspired by the [LiteFS project](https://github.com/superfly/litefs#contributing), and is just one of the many things to love about that project.) diff --git a/migrations/2022100901_initial.sql b/migrations/postgres/2022100901_initial.sql similarity index 100% rename from migrations/2022100901_initial.sql rename to migrations/postgres/2022100901_initial.sql diff --git a/migrations/sqlite/2022100901_initial.sql b/migrations/sqlite/2022100901_initial.sql new file mode 100644 index 00000000..c593f1d7 --- /dev/null +++ b/migrations/sqlite/2022100901_initial.sql @@ -0,0 +1,17 @@ +CREATE TABLE entity ( + id INTEGER PRIMARY KEY, + slug VARCHAR(64) NOT NULL, + entity_type SMALLINT NOT NULL, + + UNIQUE(slug) +); + +CREATE TABLE database ( + id INTEGER PRIMARY KEY, + slug VARCHAR(64) NOT NULL, + db_type SMALLINT NOT NULL, + entity_id INT NOT NULL, + + FOREIGN KEY(entity_id) REFERENCES entity(id), + UNIQUE(entity_id, slug) +); diff --git a/sqlx-data.json b/sqlx-data.json deleted file mode 100644 index 2904df11..00000000 --- a/sqlx-data.json +++ /dev/null @@ -1,147 +0,0 @@ -{ - "db": "PostgreSQL", - "643125a84e9b3b29ee3baa8ed476ba7be3e7b9b29a39bc531f46f589f0d14483": { - "describe": { - "columns": [ - { - "name": "id", - "ordinal": 0, - "type_info": "Int4" - }, - { - "name": "entity_id", - "ordinal": 1, - "type_info": "Int4" - }, - { - "name": "slug", - "ordinal": 2, - "type_info": "Varchar" - }, - { - "name": "db_type", - "ordinal": 3, - "type_info": "Int2" - } - ], - "nullable": [ - false, - false, - false, - false - ], - "parameters": { - "Left": [ - "Int4", - "Varchar", - "Int2" - ] - } - }, - "query": "\nINSERT INTO database ( entity_id, slug, db_type )\nVALUES ( $1, $2, $3 )\nRETURNING id, entity_id, slug, db_type\n " - }, - "88d4c10a9b791b1955482e2b95e74912ed733f89e2e4f8291621c2a1319c2994": { - "describe": { - "columns": [ - { - "name": "id", - "ordinal": 0, - "type_info": "Int4" - }, - { - "name": "slug", - "ordinal": 1, - "type_info": "Varchar" - }, - { - "name": "entity_type", - "ordinal": 2, - "type_info": "Int2" - } - ], - "nullable": [ - false, - false, - false - ], - "parameters": { - "Left": [ - "Varchar", - "Int2" - ] - } - }, - "query": "\nINSERT INTO entity ( slug, entity_type )\nVALUES ( $1, $2 )\nRETURNING id, slug, entity_type\n " - }, - "9155f37421915d53940ade5307a7b993e6b57d92f03ba735ff29bac164d8f0f4": { - "describe": { - "columns": [ - { - "name": "id", - "ordinal": 0, - "type_info": "Int4" - }, - { - "name": "slug", - "ordinal": 1, - "type_info": "Varchar" - }, - { - "name": "entity_id", - "ordinal": 2, - "type_info": "Int4" - }, - { - "name": "db_type", - "ordinal": 3, - "type_info": "Int2" - } - ], - "nullable": [ - false, - false, - false, - false - ], - "parameters": { - "Left": [ - "Text", - "Text" - ] - } - }, - "query": "\nSELECT\n database.id,\n database.slug,\n database.entity_id,\n database.db_type\nFROM database\nJOIN entity on database.entity_id = entity.id\nWHERE\n entity.slug = $1\n AND database.slug = $2\n " - }, - "b0be4cc26e844ab1a23fbabcaf8c457651a707b238b89730b0a9bd8b21accc1e": { - "describe": { - "columns": [ - { - "name": "id", - "ordinal": 0, - "type_info": "Int4" - }, - { - "name": "slug", - "ordinal": 1, - "type_info": "Varchar" - }, - { - "name": "entity_type", - "ordinal": 2, - "type_info": "Int2" - } - ], - "nullable": [ - false, - false, - false - ], - "parameters": { - "Left": [ - "Text" - ] - } - }, - "query": "\nSELECT\n id,\n slug,\n entity_type\nFROM entity\nWHERE slug = $1\n " - } -} \ No newline at end of file diff --git a/src/ayb_db.rs b/src/ayb_db.rs index fd2bd785..376324ab 100644 --- a/src/ayb_db.rs +++ b/src/ayb_db.rs @@ -1,2 +1,2 @@ -pub mod crud; +pub mod db_interfaces; pub mod models; diff --git a/src/ayb_db/crud.rs b/src/ayb_db/crud.rs deleted file mode 100644 index 0b9006e5..00000000 --- a/src/ayb_db/crud.rs +++ /dev/null @@ -1,117 +0,0 @@ -use crate::ayb_db::models::{Database, Entity, InstantiatedDatabase, InstantiatedEntity}; -use crate::error::AybError; -use sqlx; -use sqlx::postgres::PgPool; - -pub async fn create_database( - database: &Database, - pool: &PgPool, -) -> Result { - let rec = sqlx::query_as!( - InstantiatedDatabase, - r#" -INSERT INTO database ( entity_id, slug, db_type ) -VALUES ( $1, $2, $3 ) -RETURNING id, entity_id, slug, db_type - "#, - database.entity_id, - database.slug, - database.db_type - ) - .fetch_one(pool) - .await - .or_else(|err| match err { - // TODO(marcua): Figure out why `db_error.code() == "23505"`, which is less brittle and should work according to the sqlx docs, thinks it's receiving an `Option` for `code()`. - sqlx::Error::Database(db_error) if db_error.message() == "duplicate key value violates unique constraint \"database_entity_id_slug_key\"" => Err(AybError { - message: format!("Database already exists") - }), - _ => Err(AybError::from(err)), - })?; - - Ok(rec) -} - -pub async fn create_entity(entity: &Entity, pool: &PgPool) -> Result { - let rec = sqlx::query_as!( - InstantiatedEntity, - r#" -INSERT INTO entity ( slug, entity_type ) -VALUES ( $1, $2 ) -RETURNING id, slug, entity_type - "#, - entity.slug, - entity.entity_type - ) - .fetch_one(pool) - .await - .or_else(|err| match err { - // TODO(marcua): Figure out why `db_error.code() == "23505"`, which is less brittle and should work according to the sqlx docs, thinks it's receiving an `Option` for `code()`. - sqlx::Error::Database(db_error) - if db_error.message() - == "duplicate key value violates unique constraint \"entity_slug_key\"" => - { - Err(AybError { - message: format!("Entity already exists"), - }) - } - _ => Err(AybError::from(err)), - })?; - - Ok(rec) -} - -pub async fn get_database( - entity_slug: &String, - database_slug: &String, - pool: &PgPool, -) -> Result { - let rec = sqlx::query_as!( - InstantiatedDatabase, - r#" -SELECT - database.id, - database.slug, - database.entity_id, - database.db_type -FROM database -JOIN entity on database.entity_id = entity.id -WHERE - entity.slug = $1 - AND database.slug = $2 - "#, - entity_slug, - database_slug - ) - .fetch_one(pool) - .await?; - - Ok(rec) -} - -pub async fn get_entity( - entity_slug: &String, - pool: &PgPool, -) -> Result { - let rec = sqlx::query_as!( - InstantiatedEntity, - r#" -SELECT - id, - slug, - entity_type -FROM entity -WHERE slug = $1 - "#, - entity_slug - ) - .fetch_one(pool) - .await - .or_else(|err| match err { - sqlx::Error::RowNotFound => Err(AybError { - message: format!("Entity not found: {:?}", entity_slug), - }), - _ => Err(AybError::from(err)), - })?; - - Ok(rec) -} diff --git a/src/ayb_db/db_interfaces.rs b/src/ayb_db/db_interfaces.rs new file mode 100644 index 00000000..ee2b44f7 --- /dev/null +++ b/src/ayb_db/db_interfaces.rs @@ -0,0 +1,227 @@ +use crate::ayb_db::models::{Database, Entity, InstantiatedDatabase, InstantiatedEntity}; +use crate::error::AybError; +use async_trait::async_trait; +use dyn_clone::{clone_trait_object, DynClone}; +use sqlx::{ + migrate, + postgres::PgPoolOptions, + sqlite::{SqliteConnectOptions, SqlitePoolOptions}, + Pool, Postgres, Sqlite, +}; +use std::str::FromStr; + +// AybDb is a trait for a database interface for storing `ayb`'s +// metadata. To support different databases (e.g., SQLite and +// Postgres) via `sqlx`, which requires static types for connection +// pools and query execution, the AybDb trait is implemented for each +// database, with shared coe provided by the `implement_ayb_db` +// macro. This is inspired by the `seafowl` project's implementation, +// the details of which can be found here: +// https://github.com/splitgraph/seafowl/blob/542159ebb42cada59cea6bd82fef4ab9e9724a94/src/repository/default.rs#L28 +#[async_trait] +pub trait AybDb: DynClone + Send + Sync { + fn is_duplicate_constraint_error(&self, db_error: &dyn sqlx::error::DatabaseError) -> bool; + async fn create_database(&self, database: &Database) -> Result; + async fn create_entity(&self, entity: &Entity) -> Result; + async fn get_database( + &self, + entity_slug: &String, + database_slug: &String, + ) -> Result; + async fn get_entity(&self, entity_slug: &String) -> Result; +} + +clone_trait_object!(AybDb); + +#[macro_export] +macro_rules! implement_ayb_db { + ($db_type: ident) => { + #[async_trait] + impl AybDb for $db_type { + fn is_duplicate_constraint_error( + &self, + db_error: &dyn sqlx::error::DatabaseError, + ) -> bool { + match db_error.code() { + Some(code) => code.to_string() == $db_type::DUPLICATE_CONSTRAINT_ERROR_CODE, + None => false, + } + } + + async fn create_database( + &self, + database: &Database, + ) -> Result { + let db: InstantiatedDatabase = sqlx::query_as( + r#" + INSERT INTO database ( entity_id, slug, db_type ) + VALUES ( $1, $2, $3 ) + RETURNING id, entity_id, slug, db_type + "#, + ) + .bind(database.entity_id) + .bind(&database.slug) + .bind(database.db_type) + .fetch_one(&self.pool) + .await + .or_else(|err| match err { + sqlx::Error::Database(db_error) + if self.is_duplicate_constraint_error(&*db_error) => + { + Err(AybError { + message: format!("Database already exists"), + }) + } + _ => Err(AybError::from(err)), + })?; + + Ok(db) + } + + async fn create_entity(&self, entity: &Entity) -> Result { + let entity: InstantiatedEntity = sqlx::query_as( + r#" + INSERT INTO entity ( slug, entity_type ) + VALUES ( $1, $2 ) + RETURNING id, slug, entity_type + "#, + ) + .bind(&entity.slug) + .bind(entity.entity_type) + .fetch_one(&self.pool) + .await + .or_else(|err| match err { + sqlx::Error::Database(db_error) + if self.is_duplicate_constraint_error(&*db_error) => + { + Err(AybError { + message: format!("Entity already exists"), + }) + } + _ => Err(AybError::from(err)), + })?; + + Ok(entity) + } + + async fn get_database( + &self, + entity_slug: &String, + database_slug: &String, + ) -> Result { + let db: InstantiatedDatabase = sqlx::query_as( + r#" +SELECT + database.id, + database.slug, + database.entity_id, + database.db_type +FROM database +JOIN entity on database.entity_id = entity.id +WHERE + entity.slug = $1 + AND database.slug = $2 + "#, + ) + .bind(entity_slug) + .bind(database_slug) + .fetch_one(&self.pool) + .await?; + + Ok(db) + } + + async fn get_entity( + &self, + entity_slug: &String, + ) -> Result { + let entity: InstantiatedEntity = sqlx::query_as( + r#" +SELECT + id, + slug, + entity_type +FROM entity +WHERE slug = $1 + "#, + ) + .bind(entity_slug) + .fetch_one(&self.pool) + .await + .or_else(|err| match err { + sqlx::Error::RowNotFound => Err(AybError { + message: format!("Entity not found: {:?}", entity_slug), + }), + _ => Err(AybError::from(err)), + })?; + + Ok(entity) + } + } + }; +} + +#[derive(Clone)] +struct SqliteAybDb { + pub pool: Pool, +} + +impl SqliteAybDb { + pub const DUPLICATE_CONSTRAINT_ERROR_CODE: &str = "2067"; + + pub async fn connect(url: String) -> SqliteAybDb { + let connection_options = SqliteConnectOptions::from_str(&url) + .expect("Unable to interpret SQLite connection uri") + .create_if_missing(true); + let pool = SqlitePoolOptions::new() + .connect_with(connection_options) + .await + .expect("Unable to connect to database"); + migrate!("./migrations/sqlite") + .run(&pool) + .await + .expect("Unable to run migrations"); + return Self { pool: pool }; + } +} + +implement_ayb_db!(SqliteAybDb); + +#[derive(Clone)] +struct PostgresAybDb { + pub pool: Pool, +} + +impl PostgresAybDb { + pub const DUPLICATE_CONSTRAINT_ERROR_CODE: &str = "23505"; + + pub async fn connect(url: String) -> PostgresAybDb { + let pool = PgPoolOptions::new() + .max_connections(20) + .connect(&url) + .await + .expect("Unable to connect to database"); + migrate!("./migrations/postgres") + .run(&pool) + .await + .expect("Unable to run migrations"); + return Self { pool: pool }; + } +} + +implement_ayb_db!(PostgresAybDb); + +pub async fn connect_to_ayb_db(url: String) -> Result, AybError> { + if url.starts_with("sqlite") { + Ok(Box::new(SqliteAybDb::connect(url).await)) + } else if url.starts_with("postgres") { + Ok(Box::new(PostgresAybDb::connect(url).await)) + } else { + Err(AybError { + message: format!( + "Database type for {} is not supported (currently only SQLite and PostgreSQL)", + url + ), + }) + } +} diff --git a/src/ayb_db/models.rs b/src/ayb_db/models.rs index 5f698aec..773878a0 100644 --- a/src/ayb_db/models.rs +++ b/src/ayb_db/models.rs @@ -1,6 +1,7 @@ use clap::ValueEnum; use serde::{Deserialize, Serialize}; use serde_repr::{Deserialize_repr, Serialize_repr}; +use sqlx::FromRow; use std::fmt; #[derive( @@ -90,7 +91,7 @@ pub struct Database { pub db_type: i16, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, FromRow, Serialize, Deserialize)] pub struct InstantiatedDatabase { pub id: i32, pub entity_id: i32, @@ -104,7 +105,7 @@ pub struct Entity { pub entity_type: i16, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, FromRow, Serialize, Deserialize)] pub struct InstantiatedEntity { pub id: i32, pub slug: String, diff --git a/src/http/endpoints.rs b/src/http/endpoints.rs index 641e72f6..04c3cbd9 100644 --- a/src/http/endpoints.rs +++ b/src/http/endpoints.rs @@ -1,7 +1,4 @@ -use crate::ayb_db::crud::{ - create_database as create_database_crud, create_entity as create_entity_crud, - get_database as get_database_crud, get_entity as get_entity_crud, -}; +use crate::ayb_db::db_interfaces::AybDb; use crate::ayb_db::models::{DBType, Database, Entity, EntityType}; use crate::error::AybError; use crate::hosted_db::paths::database_path; @@ -11,23 +8,22 @@ use crate::http::structs::{ }; use crate::http::utils::get_header; use actix_web::{post, web, HttpRequest, HttpResponse}; -use sqlx; #[post("/v1/{entity}/{database}")] async fn create_database( path: web::Path, req: HttpRequest, - db_pool: web::Data, + ayb_db: web::Data>, ) -> Result { let entity_slug = &path.entity; - let entity = get_entity_crud(entity_slug, &db_pool).await?; + let entity = ayb_db.get_entity(entity_slug).await?; let db_type = get_header(req, "db-type")?; let database = Database { entity_id: entity.id, slug: path.database.clone(), db_type: DBType::from_str(&db_type) as i16, }; - let created_database = create_database_crud(&database, &db_pool).await?; + let created_database = ayb_db.create_database(&database).await?; Ok(HttpResponse::Created().json(APIDatabase::from_persisted(&entity, &created_database))) } @@ -35,12 +31,12 @@ async fn create_database( async fn query( path: web::Path, query: String, - db_pool: web::Data, + ayb_db: web::Data>, ayb_config: web::Data, ) -> Result, AybError> { let entity_slug = &path.entity; let database_slug = &path.database; - let database = get_database_crud(entity_slug, database_slug, &db_pool).await?; + let database = ayb_db.get_database(entity_slug, database_slug).await?; let db_type = DBType::from_i16(database.db_type); let db_path = database_path(entity_slug, database_slug, &ayb_config.data_path)?; let result = run_query(&db_path, &query, &db_type)?; @@ -51,13 +47,13 @@ async fn query( async fn register( path: web::Path, req: HttpRequest, - db_pool: web::Data, + ayb_db: web::Data>, ) -> Result { let entity_type = get_header(req, "entity-type")?; let entity = Entity { slug: path.entity.clone(), entity_type: EntityType::from_str(&entity_type) as i16, }; - let created_entity = create_entity_crud(&entity, &db_pool).await?; + let created_entity = ayb_db.create_entity(&entity).await?; Ok(HttpResponse::Created().json(APIEntity::from_persisted(&created_entity))) } diff --git a/src/http/server.rs b/src/http/server.rs index 678f9f33..86f4011a 100644 --- a/src/http/server.rs +++ b/src/http/server.rs @@ -1,8 +1,8 @@ +use crate::ayb_db::db_interfaces::connect_to_ayb_db; use crate::http::endpoints::{create_database, query, register}; use crate::http::structs::AybConfig; use actix_web::{middleware, web, App, HttpServer}; -use sqlx::migrate; -use sqlx::postgres::PgPoolOptions; +use dyn_clone::clone_box; use std::fs; use std::path::PathBuf; use toml; @@ -19,24 +19,15 @@ pub async fn run_server(config_path: &PathBuf) -> std::io::Result<()> { let contents = fs::read_to_string(config_path)?; let ayb_conf: AybConfig = toml::from_str(&contents).unwrap(); let ayb_conf_for_server = ayb_conf.clone(); - - let pool = PgPoolOptions::new() - .max_connections(20) - .connect(&ayb_conf.database_url) - .await - .expect("Unable to connect to database"); - - migrate!() - .run(&pool) - .await - .expect("Unable to run migrations"); + fs::create_dir_all(&ayb_conf.data_path).expect("Unable to create data directory"); + let ayb_db = connect_to_ayb_db(ayb_conf.database_url).await.unwrap(); println!("Starting server {}:{}...", ayb_conf.host, ayb_conf.port); HttpServer::new(move || { App::new() .wrap(middleware::Compress::default()) .configure(config) - .app_data(web::Data::new(pool.clone())) + .app_data(web::Data::new(clone_box(&*ayb_db))) .app_data(web::Data::new(ayb_conf_for_server.clone())) }) .bind((ayb_conf.host, ayb_conf.port))? diff --git a/tests/e2e.rs b/tests/e2e.rs index c17cf9ce..d489fbb6 100644 --- a/tests/e2e.rs +++ b/tests/e2e.rs @@ -4,12 +4,17 @@ use std::process::Command; use std::thread; use std::time; -fn client_query(query: &str, format: &str, result: &str) -> Result<(), Box> { +fn client_query( + server_url: &str, + query: &str, + format: &str, + result: &str, +) -> Result<(), Box> { Command::cargo_bin("ayb")? .args([ "client", "--url", - "http://127.0.0.1:5433", + server_url, "query", "e2e/test.sqlite", "--format", @@ -23,19 +28,37 @@ fn client_query(query: &str, format: &str, result: &str) -> Result<(), Box Result<(), Box> { - Command::new("tests/reset_db.sh").assert().success(); +fn client_server_integration_postgres() -> Result<(), Box> { + return client_server_integration("postgres", "http://127.0.0.1:5433"); +} + +#[test] +fn client_server_integration_sqlite() -> Result<(), Box> { + return client_server_integration("sqlite", "http://127.0.0.1:5434"); +} + +fn client_server_integration( + db_type: &str, + server_url: &str, +) -> Result<(), Box> { + Command::new(format!("tests/reset_db_{}.sh", db_type)) + .assert() + .success(); // Run server, give it a few seconds to start let mut server = Command::cargo_bin("ayb")? - .args(["server", "--config", "tests/test-server-config.toml"]) + .args([ + "server", + "--config", + &*format!("tests/test-server-config-{}.toml", db_type), + ]) .spawn()?; thread::sleep(time::Duration::from_secs(1)); // Register an entity. Command::cargo_bin("ayb")? .args(["client", "register", "e2e"]) - .env("AYB_SERVER_URL", "http://127.0.0.1:5433") + .env("AYB_SERVER_URL", server_url) .assert() .success() .stdout("Successfully registered e2e\n"); @@ -43,7 +66,7 @@ fn client_server_integration() -> Result<(), Box> { // Can't register an entity twice. Command::cargo_bin("ayb")? .args(["client", "register", "e2e"]) - .env("AYB_SERVER_URL", "http://127.0.0.1:5433") + .env("AYB_SERVER_URL", server_url) .assert() .success() .stdout("Error: Entity already exists\n"); @@ -53,7 +76,7 @@ fn client_server_integration() -> Result<(), Box> { .args([ "client", "--url", - "http://127.0.0.1:5433", + server_url, "create_database", "e2e/test.sqlite", "sqlite", @@ -67,7 +90,7 @@ fn client_server_integration() -> Result<(), Box> { .args([ "client", "--url", - "http://127.0.0.1:5433", + server_url, "create_database", "e2e/test.sqlite", "sqlite", @@ -78,24 +101,30 @@ fn client_server_integration() -> Result<(), Box> { // Populate and query database. client_query( + server_url, "CREATE TABLE test_table(fname varchar, lname varchar);", "table", "\nRows: 0", )?; client_query( + server_url, "INSERT INTO test_table (fname, lname) VALUES (\"the first\", \"the last\");", "table", "\nRows: 0", )?; client_query( + server_url, "INSERT INTO test_table (fname, lname) VALUES (\"the first2\", \"the last2\");", "table", "\nRows: 0", )?; - client_query("SELECT * FROM test_table;", + client_query( + server_url, + "SELECT * FROM test_table;", "table", " fname | lname \n------------+-----------\n the first | the last \n the first2 | the last2 \n\nRows: 2")?; client_query( + server_url, "SELECT * FROM test_table;", "csv", "fname,lname\nthe first,the last\nthe first2,the last2\n\nRows: 2", diff --git a/tests/reset_db.sh b/tests/reset_db_postgres.sh similarity index 78% rename from tests/reset_db.sh rename to tests/reset_db_postgres.sh index a4d783a6..1d558261 100755 --- a/tests/reset_db.sh +++ b/tests/reset_db_postgres.sh @@ -3,6 +3,6 @@ export PGHOST=localhost export PGUSER=postgres_user export PGPASSWORD=test -rm -rf ./tests/ayb_data +rm -rf ./tests/ayb_data_postgres dropdb test_db createdb test_db diff --git a/tests/reset_db_sqlite.sh b/tests/reset_db_sqlite.sh new file mode 100755 index 00000000..248810f9 --- /dev/null +++ b/tests/reset_db_sqlite.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +rm -rf ./tests/ayb_data_sqlite + diff --git a/tests/test-server-config.toml b/tests/test-server-config-postgres.toml similarity index 71% rename from tests/test-server-config.toml rename to tests/test-server-config-postgres.toml index 5924fccc..39ad84f0 100644 --- a/tests/test-server-config.toml +++ b/tests/test-server-config-postgres.toml @@ -1,4 +1,4 @@ host = "0.0.0.0" port = 5433 database_url = "postgresql://postgres_user:test@localhost:5432/test_db" -data_path = "./tests/ayb_data" +data_path = "./tests/ayb_data_postgres" diff --git a/tests/test-server-config-sqlite.toml b/tests/test-server-config-sqlite.toml new file mode 100644 index 00000000..baffab96 --- /dev/null +++ b/tests/test-server-config-sqlite.toml @@ -0,0 +1,4 @@ +host = "0.0.0.0" +port = 5434 +database_url = "sqlite://tests/ayb_data_sqlite/ayb.sqlite" +data_path = "./tests/ayb_data_sqlite"