diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2b2a8d99..01df0d78 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -37,5 +37,9 @@ jobs: run: cargo fmt --check - name: Ensure clippy finds no issues run: cargo clippy + - name: Install nsjail requirements + run: sudo apt-get install -y libprotobuf-dev protobuf-compiler libnl-route-3-dev + - name: Build nsjail + run: scripts/build_nsjail.sh && mv nsjail tests/ - name: Run tests run: cargo test --verbose diff --git a/.gitignore b/.gitignore index 512407a2..5f869f57 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ tests/ayb_data_postgres tests/ayb_data_sqlite tests/smtp_data_10025 tests/smtp_data_10026 +tests/nsjail diff --git a/Cargo.toml b/Cargo.toml index 66711b9f..964afb17 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ description = "ayb makes it easy to create, host, and share embedded databases l homepage = "https://github.com/marcua/ayb" documentation = "https://github.com/marcua/ayb#readme" license = "Apache-2.0" +default-run = "ayb" [dependencies] actix-web = { version = "4.4.0" } @@ -19,14 +20,14 @@ fernet = { version = "0.2.1" } lettre = { version = "0.10.4", features = ["tokio1-native-tls"] } quoted_printable = { version = "0.5.0" } reqwest = { version = "0.11.22", features = ["json"] } -rusqlite = { version = "0.27.0", features = ["bundled"] } +rusqlite = { version = "0.27.0", features = ["bundled", "limits"] } regex = { version = "1.10.2"} serde = { version = "1.0", features = ["derive"] } serde_json = { version = "1.0.108" } serde_repr = { version = "0.1.17" } sqlx = { version = "0.6.3", features = ["runtime-actix-native-tls", "postgres", "sqlite"] } toml = { version = "0.8.8" } -tokio = { version = "1.35.1", features = ["macros", "rt"] } +tokio = { version = "1.35.1", features = ["macros", "process", "rt"] } prefixed-api-key = { version = "0.1.0", features = ["sha2"]} prettytable-rs = { version = "0.10.0"} urlencoding = { version = "2.1.3" } @@ -36,3 +37,11 @@ url = { version = "2.5.0", features = ["serde"] } [dev-dependencies] assert_cmd = "2.0" assert-json-diff = "2.0.2" + +[[bin]] +name = "ayb" +path = "src/bin/ayb.rs" + +[[bin]] +name = "ayb_isolated_runner" +path = "src/bin/ayb_isolated_runner.rs" diff --git a/README.md b/README.md index a92885e6..3d3b0897 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,58 @@ $ curl -w "\n" -X POST http://127.0.0.1:5433/v1/marcua/test.sqlite/query -H "aut {"fields":["name","score"],"rows":[["PostgreSQL","10"],["SQLite","9"],["DuckDB","9"]]} ``` +### Isolation +`ayb` allows multiple users to run queries against databases that are +stored on the same machine. Isolation enables you to prevent one user +from accessing another user's data, and allows you to restrict the +resources any one user is able to utilize. + +By default, `ayb` uses +[SQLITE_DBCONFIG_DEFENSIVE](https://www.sqlite.org/c3ref/c_dbconfig_defensive.html) +flag and sets +[SQLITE_LIMIT_ATTACHED](https://www.sqlite.org/c3ref/c_limit_attached.html#sqlitelimitattached) +to `0` in order to prevent users from corrupting the database or +attaching to other databases on the filesystem. + +For further isolation, `ayb` uses [nsjail](https://nsjail.dev/) to +isolate each query's filesystem access and resources. When this form +of isolation is enabled, `ayb` starts a new `nsjail`-managed process +to execute the query against the database. We have not yet benchmarked +the performance overhead of this approach. + +To enable isolation, you must first build `nsjail`, which you can do +through [scripts/build_nsjail.sh](scripts/build_nsjail.sh). Note that +`nsjail` depends on a few other packages. If you run into issues +building it, it might be helpful to see its +[Dockerfile](https://github.com/google/nsjail/blob/master/Dockerfile) +to get a sense of those requirements. + +Once you have a path to the +`nsjail` binary, add the following to your `ayb.toml`: + +```toml +[isolation] +nsjail_path = "path/to/nsjail" +``` + +## Testing +`ayb` is largely tested through [end-to-end +tests](tests/e2e.rs) that mimic as realistic an environment as +possible. Individual modules may also provide more specific unit +tests. To run the tests, type: + +```bash +cargo test --verbose +``` + +Because the tests cover [isolation](#isolation), an `nsjail` binary is +required for running the end-to-end tests. To build and place `nsjail` +in the appropriate directory, run: + +```bash +scripts/build_nsjail.sh && mv nsjail tests/ +``` + ## FAQ ### Who is `ayb` for? diff --git a/scripts/build_nsjail.sh b/scripts/build_nsjail.sh new file mode 100755 index 00000000..ae7aec21 --- /dev/null +++ b/scripts/build_nsjail.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +git clone https://github.com/google/nsjail.git nsjail-checkout +cd nsjail-checkout +make +mv nsjail .. +cd .. +rm -rf nsjail-checkout diff --git a/src/main.rs b/src/bin/ayb.rs similarity index 100% rename from src/main.rs rename to src/bin/ayb.rs diff --git a/src/bin/ayb_isolated_runner.rs b/src/bin/ayb_isolated_runner.rs new file mode 100644 index 00000000..22c3026f --- /dev/null +++ b/src/bin/ayb_isolated_runner.rs @@ -0,0 +1,23 @@ +use ayb::hosted_db::sqlite::query_sqlite; +use std::env; +use std::path::PathBuf; + +/// This binary runs a query against a database and returns the +/// result in QueryResults format. To run it, you would type: +/// $ ayb_isolated_runner database.sqlite SELECT xyz FROM ... +/// +/// This command is meant to be run inside a sandbox that isolates +/// parallel invocations of the command from accessing each +/// others' data, memory, and resources. That sandbox can be found +/// in src/hosted_db/sandbox.rs. +fn main() -> Result<(), serde_json::Error> { + let args: Vec = env::args().collect(); + let db_file = &args[1]; + let query = (args[2..]).to_vec(); + let result = query_sqlite(&PathBuf::from(db_file), &query.join(" ")); + match result { + Ok(result) => println!("{}", serde_json::to_string(&result)?), + Err(error) => eprintln!("{}", serde_json::to_string(&error)?), + } + Ok(()) +} diff --git a/src/hosted_db.rs b/src/hosted_db.rs index 7fe6c282..7268374d 100644 --- a/src/hosted_db.rs +++ b/src/hosted_db.rs @@ -1,9 +1,11 @@ pub mod paths; -mod sqlite; +mod sandbox; +pub mod sqlite; use crate::ayb_db::models::DBType; use crate::error::AybError; -use crate::hosted_db::sqlite::run_sqlite_query; +use crate::hosted_db::sqlite::potentially_isolated_sqlite_query; +use crate::http::structs::AybConfigIsolation; use prettytable::{format, Cell, Row, Table}; use serde::{Deserialize, Serialize}; use std::path::PathBuf; @@ -53,9 +55,14 @@ impl QueryResult { } } -pub fn run_query(path: &PathBuf, query: &str, db_type: &DBType) -> Result { +pub async fn run_query( + path: &PathBuf, + query: &str, + db_type: &DBType, + isolation: &Option, +) -> Result { match db_type { - DBType::Sqlite => Ok(run_sqlite_query(path, query)?), + DBType::Sqlite => Ok(potentially_isolated_sqlite_query(path, query, isolation).await?), _ => Err(AybError::Other { message: "Unsupported DB type".to_string(), }), diff --git a/src/hosted_db/paths.rs b/src/hosted_db/paths.rs index 8dc71b75..36f7f1e2 100644 --- a/src/hosted_db/paths.rs +++ b/src/hosted_db/paths.rs @@ -6,13 +6,22 @@ pub fn database_path( entity_slug: &str, database_slug: &str, data_path: &str, + create_database: bool, ) -> Result { let mut path: PathBuf = [data_path, entity_slug].iter().collect(); - if let Err(e) = fs::create_dir_all(&path) { - return Err(AybError::Other { - message: format!("Unable to create entity path for {}: {}", entity_slug, e), - }); + if create_database { + if let Err(e) = fs::create_dir_all(&path) { + return Err(AybError::Other { + message: format!("Unable to create entity path for {}: {}", entity_slug, e), + }); + } } + path.push(database_slug); + + if create_database && !path.exists() { + fs::File::create(path.clone())?; + } + Ok(path) } diff --git a/src/hosted_db/sandbox.rs b/src/hosted_db/sandbox.rs new file mode 100644 index 00000000..18cc49bb --- /dev/null +++ b/src/hosted_db/sandbox.rs @@ -0,0 +1,146 @@ +/* Retrieved and modified from + https://raw.githubusercontent.com/Defelo/sandkasten/83f629175d02ebc70fbb16b8b9e05663ea67ccc7/src/sandbox.rs + On December 6, 2023. + Original license: + + MIT License + + Copyright (c) 2023 Defelo + + Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +use crate::error::AybError; +use serde::{Deserialize, Serialize}; +use std::env::current_exe; +use std::fs::canonicalize; +use std::{ + path::{Path, PathBuf}, + process::Stdio, +}; +use tokio::io::{AsyncReadExt, BufReader}; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct RunResult { + /// The exit code of the processes. + pub status: i32, + /// The stdout output the process produced. + pub stdout: String, + /// The stderr output the process produced. + pub stderr: String, +} + +pub async fn run_in_sandbox( + nsjail: &Path, + db_path: &PathBuf, + query: &str, +) -> Result { + let mut cmd = tokio::process::Command::new(nsjail); + + cmd.arg("--really_quiet") // log fatal messages only + .arg("--iface_no_lo") + .args(["--mode", "o"]) // run once + .args(["--hostname", "ayb"]) + .args(["--bindmount_ro", "/lib:/lib"]) + .args(["--bindmount_ro", "/lib64:/lib64"]) + .args(["--bindmount_ro", "/usr:/usr"]); + + // Set resource limits for the process. In the future, we will + // allow entities to control the resources they dedicate to + // different databases/queries. + cmd.args(["--mount", "none:/tmp:tmpfs:size=100000000"]) // ~95 MB tmpfs + .args(["--max_cpus", "1"]) // One CPU + .args(["--rlimit_as", "64"]) // 64 MB memory limit + .args(["--time_limit", "10"]) // 10 second maximum run + .args(["--rlimit_fsize", "75"]) // 75 MB file size limit + .args(["--rlimit_nofile", "10"]) // 10 files maximum + .args(["--rlimit_nproc", "2"]); // 2 processes maximum + + // Generate a /local/path/to/file:/tmp/file mapping. + let absolute_db_path = canonicalize(db_path)?; + let db_file_name = absolute_db_path + .file_name() + .ok_or(AybError::Other { + message: format!( + "Could not parse file name from path: {}", + absolute_db_path.display() + ), + })? + .to_str() + .ok_or(AybError::Other { + message: format!( + "Could not convert path to string: {}", + absolute_db_path.display() + ), + })?; + let tmp_db_path = Path::new("/tmp").join(db_file_name); + let db_file_mapping = format!("{}:{}", absolute_db_path.display(), tmp_db_path.display()); + cmd.args(["--bindmount", &db_file_mapping]); + + // Generate a /local/path/to/ayb_isolated_runner:/tmp/ayb_isolated_runner mapping. + // We assume `ayb` and `ayb_isolated_runner` will always be in the same directory, + // so we see what the path to the current `ayb` executable is to build the path. + let ayb_path = current_exe()?; + let isolated_runner_path = ayb_path + .parent() + .ok_or(AybError::Other { + message: format!( + "Unable to find parent directory of ayb from {}", + ayb_path.display() + ), + })? + .join("ayb_isolated_runner"); + cmd.args([ + "--bindmount_ro", + &format!( + "{}:/tmp/ayb_isolated_runner", + isolated_runner_path.display() + ), + ]); + + let mut child = cmd + .arg("--") + .arg("/tmp/ayb_isolated_runner") + .arg(tmp_db_path) + .arg(query) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn()?; + + let mut stdout_reader = BufReader::new(child.stdout.take().unwrap()); + let mut stderr_reader = BufReader::new(child.stderr.take().unwrap()); + + let output = child.wait_with_output().await?; + + // read stdout and stderr from process + let mut stdout = Vec::new(); + let mut stderr = Vec::new(); + stdout_reader.read_to_end(&mut stdout).await?; + stderr_reader.read_to_end(&mut stderr).await?; + let stdout = String::from_utf8_lossy(&stdout).into_owned(); + let stderr = String::from_utf8_lossy(&stderr).into_owned(); + + Ok(RunResult { + status: output.status.code().ok_or(AybError::Other { + message: "Process exited with signal".to_string(), + })?, + stdout, + stderr, + }) +} diff --git a/src/hosted_db/sqlite.rs b/src/hosted_db/sqlite.rs index 402aaad3..4b438ca3 100644 --- a/src/hosted_db/sqlite.rs +++ b/src/hosted_db/sqlite.rs @@ -1,11 +1,22 @@ use crate::error::AybError; -use crate::hosted_db::QueryResult; -use rusqlite; +use crate::hosted_db::{sandbox::run_in_sandbox, QueryResult}; +use crate::http::structs::AybConfigIsolation; +use rusqlite::config::DbConfig; +use rusqlite::limits::Limit; use rusqlite::types::ValueRef; -use std::path::PathBuf; +use serde_json; +use std::path::{Path, PathBuf}; -pub fn run_sqlite_query(path: &PathBuf, query: &str) -> Result { +pub fn query_sqlite(path: &PathBuf, query: &str) -> Result { let conn = rusqlite::Connection::open(path)?; + + // Disable the usage of ATTACH + // https://www.sqlite.org/lang_attach.html + conn.set_limit(Limit::SQLITE_LIMIT_ATTACHED, 0); + // Prevent queries from deliberately corrupting the database + // https://www.sqlite.org/c3ref/c_dbconfig_defensive.html + conn.db_config(DbConfig::SQLITE_DBCONFIG_DEFENSIVE)?; + let mut prepared = conn.prepare(query)?; let num_columns = prepared.column_count(); let mut fields: Vec = Vec::new(); @@ -36,3 +47,37 @@ pub fn run_sqlite_query(path: &PathBuf, query: &str) -> Result, +) -> Result { + if let Some(isolation) = isolation { + let result = run_in_sandbox(Path::new(&isolation.nsjail_path), path, query).await?; + + if !result.stderr.is_empty() { + let error: AybError = serde_json::from_str(&result.stderr)?; + return Err(error); + } else if result.status != 0 { + return Err(AybError::Other { + message: format!( + "Error status from sandboxed query runner: {}", + result.status + ), + }); + } else if !result.stdout.is_empty() { + let query_result: QueryResult = serde_json::from_str(&result.stdout)?; + return Ok(query_result); + } else { + return Err(AybError::Other { + message: "No results from sandboxed query runner".to_string(), + }); + } + } + + // No isolation configuration, so run the query without a sandbox. + Ok(query_sqlite(path, query)?) +} diff --git a/src/http/config.rs b/src/http/config.rs index aaa24f01..a8f6bc74 100644 --- a/src/http/config.rs +++ b/src/http/config.rs @@ -33,6 +33,7 @@ pub fn default_server_config() -> AybConfig { origin: "*".to_string(), }, web: None, + isolation: None, } } diff --git a/src/http/endpoints/create_database.rs b/src/http/endpoints/create_database.rs index 29934732..f0d03d33 100644 --- a/src/http/endpoints/create_database.rs +++ b/src/http/endpoints/create_database.rs @@ -4,9 +4,9 @@ use std::str::FromStr; use crate::error::AybError; +use crate::hosted_db::paths::database_path; use crate::http::permissions::can_create_database; -use crate::http::structs::{Database as APIDatabase, EntityDatabasePath}; - +use crate::http::structs::{AybConfig, Database as APIDatabase, EntityDatabasePath}; use crate::http::utils::{get_header, unwrap_authenticated_entity}; use actix_web::{post, web, HttpRequest, HttpResponse}; @@ -15,6 +15,7 @@ async fn create_database( path: web::Path, req: HttpRequest, ayb_db: web::Data>, + ayb_config: web::Data, authenticated_entity: Option>, ) -> Result { let entity_slug = &path.entity; @@ -28,6 +29,8 @@ async fn create_database( let authenticated_entity = unwrap_authenticated_entity(&authenticated_entity)?; if can_create_database(&authenticated_entity, &entity) { let created_database = ayb_db.create_database(&database).await?; + // Create the database file at the appropriate path + let _ = database_path(entity_slug, &path.database, &ayb_config.data_path, true)?; Ok(HttpResponse::Created().json(APIDatabase::from_persisted(&entity, &created_database))) } else { Err(AybError::Other { diff --git a/src/http/endpoints/query.rs b/src/http/endpoints/query.rs index 5248db1c..3df55f3e 100644 --- a/src/http/endpoints/query.rs +++ b/src/http/endpoints/query.rs @@ -25,8 +25,8 @@ async fn query( if can_query(&authenticated_entity, &database) { let db_type = DBType::try_from(database.db_type)?; - let db_path = database_path(entity_slug, database_slug, &ayb_config.data_path)?; - let result = run_query(&db_path, &query, &db_type)?; + let db_path = database_path(entity_slug, database_slug, &ayb_config.data_path, false)?; + let result = run_query(&db_path, &query, &db_type, &ayb_config.isolation).await?; Ok(web::Json(result)) } else { Err(AybError::Other { diff --git a/src/http/server.rs b/src/http/server.rs index 02d67872..e0a71965 100644 --- a/src/http/server.rs +++ b/src/http/server.rs @@ -92,6 +92,9 @@ pub async fn run_server(config_path: &PathBuf) -> std::io::Result<()> { }; println!("Starting server {}:{}...", ayb_conf.host, ayb_conf.port); + if ayb_conf.isolation.is_none() { + println!("Note: Server is running without full isolation. Read more about isolating users from one-another: https://github.com/marcua/ayb/#isolation"); + } HttpServer::new(move || { let cors = build_cors(ayb_conf.cors.clone()); diff --git a/src/http/structs.rs b/src/http/structs.rs index 688e7186..af20126f 100644 --- a/src/http/structs.rs +++ b/src/http/structs.rs @@ -32,6 +32,11 @@ pub struct AybConfigEmail { pub smtp_password: String, } +#[derive(Clone, Serialize, Deserialize)] +pub struct AybConfigIsolation { + pub nsjail_path: String, +} + #[derive(Clone, Serialize, Deserialize)] pub struct AybConfig { pub host: String, @@ -43,6 +48,7 @@ pub struct AybConfig { pub email: AybConfigEmail, pub web: Option, pub cors: AybConfigCors, + pub isolation: Option, } impl AybConfig { diff --git a/tests/test-server-config-postgres.toml b/tests/test-server-config-postgres.toml index c76db3f3..664c7200 100644 --- a/tests/test-server-config-postgres.toml +++ b/tests/test-server-config-postgres.toml @@ -18,3 +18,6 @@ token_expiration_seconds = 3600 [cors] origin = "*" + +[isolation] +nsjail_path = "tests/nsjail" diff --git a/tests/test-server-config-sqlite.toml b/tests/test-server-config-sqlite.toml index 45f02296..033324cd 100644 --- a/tests/test-server-config-sqlite.toml +++ b/tests/test-server-config-sqlite.toml @@ -18,3 +18,6 @@ token_expiration_seconds = 3600 [cors] origin = "*" + +[isolation] +nsjail_path = "tests/nsjail"