Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Isolate databases/queries from one-another #235

Merged
merged 29 commits into from
Dec 28, 2023
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
5eb2038
A version that won't build
marcua Nov 21, 2023
7b1efe3
Uncomment
marcua Nov 21, 2023
591f7cd
Newline
marcua Nov 21, 2023
f1e9f40
Newline
marcua Nov 21, 2023
1231f24
Compiles
marcua Nov 22, 2023
5225ec8
No need for only temp
marcua Nov 22, 2023
4b86391
Compiles AND runs
marcua Nov 25, 2023
c2382a1
Move toward nsjail
marcua Dec 5, 2023
e38f428
Make room for nsjail, but still as a noop
marcua Dec 9, 2023
7d236ca
Bring in #234
marcua Dec 9, 2023
a273155
Works end-to-end (need to implement 'touch' for new DBs)
marcua Dec 22, 2023
b03b696
Create DB file in create_database
marcua Dec 22, 2023
24d839d
Move isolated runner into original crate as second binary, dynamicall…
marcua Dec 23, 2023
8eae1f3
Remove hosted_db_runner
marcua Dec 23, 2023
ef0aeba
Move nsjail builder to scripts dir
marcua Dec 23, 2023
647929f
Resolve conflicts
marcua Dec 23, 2023
9159053
fmt
marcua Dec 23, 2023
0ca9cbe
tokio typo
marcua Dec 23, 2023
665b6d0
New AybError variants
marcua Dec 23, 2023
c52aad1
Code review part 1
marcua Dec 25, 2023
a222227
Update docs, remove binary, add nsjail build step
marcua Dec 25, 2023
5ac62f1
Testing docs and fmt
marcua Dec 25, 2023
6822d7d
Fix build command
marcua Dec 25, 2023
db13cb8
nsjail requirements
marcua Dec 25, 2023
59f4974
More nsjail requirements
marcua Dec 25, 2023
ce97699
Docs cleanup
marcua Dec 25, 2023
a37f9ab
Clippy and code review
marcua Dec 25, 2023
3d07ada
Warn if not fully isolated
marcua Dec 26, 2023
ea30777
Clean up for clarity
marcua Dec 27, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ description = "ayb makes it easy to create, host, and share embedded databases l
homepage = "https://github.com/marcua/ayb"
marcua marked this conversation as resolved.
Show resolved Hide resolved
documentation = "https://github.com/marcua/ayb#readme"
marcua marked this conversation as resolved.
Show resolved Hide resolved
license = "Apache-2.0"
marcua marked this conversation as resolved.
Show resolved Hide resolved
default-run = "ayb"

[dependencies]
actix-web = { version = "4.4.0" }
Expand All @@ -19,14 +20,14 @@ fernet = { version = "0.2.1" }
lettre = { version = "0.10.4", features = ["tokio1-native-tls"] }
quoted_printable = { version = "0.5.0" }
reqwest = { version = "0.11.22", features = ["json"] }
rusqlite = { version = "0.27.0", features = ["bundled"] }
rusqlite = { version = "0.27.0", features = ["bundled", "limits"] }
regex = { version = "1.10.2"}
serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0.108" }
serde_repr = { version = "0.1.17" }
sqlx = { version = "0.6.3", features = ["runtime-actix-native-tls", "postgres", "sqlite"] }
toml = { version = "0.8.8" }
tokio = { version = "1.35.1", features = ["macros", "rt"] }
tokio = { version = "1.35.1", features = ["macros", "process", "rt"] }
prefixed-api-key = { version = "0.1.0", features = ["sha2"]}
prettytable-rs = { version = "0.10.0"}
urlencoding = { version = "2.1.3" }
Expand All @@ -36,3 +37,11 @@ url = { version = "2.5.0", features = ["serde"] }
[dev-dependencies]
assert_cmd = "2.0"
assert-json-diff = "2.0.2"

[[bin]]
marcua marked this conversation as resolved.
Show resolved Hide resolved
name = "ayb"
path = "src/bin/ayb.rs"

[[bin]]
name = "ayb_isolated_runner"
path = "src/bin/ayb_isolated_runner.rs"
9 changes: 9 additions & 0 deletions scripts/build_nsjail.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env bash

git clone https://github.com/google/nsjail.git nsjail-checkout
cd nsjail-checkout
make
mv nsjail ..
cd ..
rm -rf nsjail-checkout

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

File renamed without changes.
16 changes: 16 additions & 0 deletions src/bin/ayb_isolated_runner.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
use ayb::hosted_db::sqlite::query_sqlite;
use serde_json;
use std::env;
use std::path::PathBuf;

fn main() -> Result<(), serde_json::Error> {
marcua marked this conversation as resolved.
Show resolved Hide resolved
let args: Vec<String> = env::args().collect();
let db_file = &args[1];
let query = (&args[2..]).to_vec();
let result = query_sqlite(&PathBuf::from(db_file), &query.join(" "));
match result {
Ok(result) => println!("{}", serde_json::to_string(&result)?),
Err(error) => eprintln!("{}", serde_json::to_string(&error)?),
}
Ok(())
}
13 changes: 10 additions & 3 deletions src/hosted_db.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
pub mod paths;
mod sqlite;
mod sandbox;
pub mod sqlite;

use crate::ayb_db::models::DBType;
use crate::error::AybError;
use crate::hosted_db::sqlite::run_sqlite_query;
use crate::http::structs::AybConfigIsolation;
use prettytable::{format, Cell, Row, Table};
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
Expand Down Expand Up @@ -53,9 +55,14 @@ impl QueryResult {
}
}

pub fn run_query(path: &PathBuf, query: &str, db_type: &DBType) -> Result<QueryResult, AybError> {
pub async fn run_query(
path: &PathBuf,
query: &str,
db_type: &DBType,
isolation: &Option<AybConfigIsolation>,
) -> Result<QueryResult, AybError> {
match db_type {
DBType::Sqlite => Ok(run_sqlite_query(path, query)?),
DBType::Sqlite => Ok(run_sqlite_query(path, query, isolation).await?),
_ => Err(AybError::Other {
message: "Unsupported DB type".to_string(),
}),
Expand Down
17 changes: 13 additions & 4 deletions src/hosted_db/paths.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,22 @@ pub fn database_path(
entity_slug: &str,
database_slug: &str,
data_path: &str,
create_database: bool,
) -> Result<PathBuf, AybError> {
let mut path: PathBuf = [data_path, entity_slug].iter().collect();
if let Err(e) = fs::create_dir_all(&path) {
return Err(AybError::Other {
message: format!("Unable to create entity path for {}: {}", entity_slug, e),
});
if create_database {
if let Err(e) = fs::create_dir_all(&path) {
return Err(AybError::Other {
message: format!("Unable to create entity path for {}: {}", entity_slug, e),
});
}
}

path.push(database_slug);

if create_database && !path.exists() {
fs::File::create(path.clone())?;
}

Ok(path)
}
120 changes: 120 additions & 0 deletions src/hosted_db/sandbox.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/* Retrieved and modified from
https://raw.githubusercontent.com/Defelo/sandkasten/83f629175d02ebc70fbb16b8b9e05663ea67ccc7/src/sandbox.rs
On December 6, 2023.
Original license: MIT.
marcua marked this conversation as resolved.
Show resolved Hide resolved
*/

use crate::error::AybError;
use serde::{Deserialize, Serialize};
use std::env::current_exe;
use std::fs::canonicalize;
use std::{
path::{Path, PathBuf},
process::Stdio,
};
use tokio::io::{AsyncReadExt, BufReader};

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct RunResult {
/// The exit code of the processes.
pub status: i32,
/// The stdout output the process produced.
pub stdout: String,
/// The stderr output the process produced.
pub stderr: String,
}

pub async fn run_in_sandbox(
nsjail: &Path,
db_path: &PathBuf,
query: &str,
) -> Result<RunResult, AybError> {
let mut cmd = tokio::process::Command::new(nsjail);

cmd.arg("--really_quiet") // log fatal messages only
marcua marked this conversation as resolved.
Show resolved Hide resolved
.arg("--iface_no_lo")
.args(["--mode", "o"]) // run once
.args(["--hostname", "ayb"])
.args(["--bindmount_ro", "/lib:/lib"])
.args(["--bindmount_ro", "/lib64:/lib64"])
.args(["--bindmount_ro", "/usr:/usr"])
.args(["--mount", "none:/tmp:tmpfs:size=100000000"]) // TODO(marcua): Restrict disk size more configurably?
marcua marked this conversation as resolved.
Show resolved Hide resolved
// TODO(marcua): Set resource limits more configurably?
.args(["--max_cpus", "1"])
.args(["--rlimit_as", "64"]) // in MB
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the builder idea I left in the previous comment is not done, you could add some comments here to explain what is the meaning of each limit :)

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perfect! Done!

.args(["--time_limit", "10"]) // in seconds
.args(["--rlimit_fsize", "75"]) // in MB
.args(["--rlimit_nofile", "10"])
.args(["--rlimit_nproc", "2"]);

// Generate a /local/path/to/file:/tmp/file mapping.
let absolute_db_path = canonicalize(db_path)?;
let db_file_name = absolute_db_path
.file_name()
.ok_or(AybError::Other {
message: format!("Invalid DB path {}", absolute_db_path.display()),
})?
.to_str()
.ok_or(AybError::Other {
message: format!("Invalid DB path {}", absolute_db_path.display()),
marcua marked this conversation as resolved.
Show resolved Hide resolved
})?;
let tmp_db_path = Path::new("/tmp").join(db_file_name);
let db_file_mapping = format!("{}:{}", absolute_db_path.display(), tmp_db_path.display());
cmd.args(["--bindmount", &db_file_mapping]);

// Generate a /local/path/to/ayb_isolated_runner:/tmp/ayb_isolated_runner mapping.
marcua marked this conversation as resolved.
Show resolved Hide resolved
let ayb_path = current_exe()?;
let isolated_runner_path = ayb_path
.parent()
.ok_or(AybError::Other {
message: format!(
"Unable to find parent directory of ayb from {}",
ayb_path.display()
),
})?
.join("ayb_isolated_runner");
cmd.args([
"--bindmount_ro",
&format!(
"{}:/tmp/ayb_isolated_runner",
isolated_runner_path.display()
),
]);

let mut child = cmd
.arg("--")
.arg("/tmp/ayb_isolated_runner")
.arg(tmp_db_path)
.arg(query)
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.stdin(Stdio::piped())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is piping stdin required?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good eye! :) I was able to remove it with no effect.

.spawn()?;

let stdout_reader = BufReader::new(child.stdout.take().unwrap());
let stderr_reader = BufReader::new(child.stderr.take().unwrap());

let output = child.wait_with_output().await?;

// read stdout and stderr from process
let mut stdout = Vec::new();
let mut stderr = Vec::new();
stdout_reader
marcua marked this conversation as resolved.
Show resolved Hide resolved
.take(1024 * 1024 * 1024)
.read_to_end(&mut stdout)
.await?;
stderr_reader
.take(1024 * 1024 * 1024)
.read_to_end(&mut stderr)
.await?;
let stdout = String::from_utf8_lossy(&stdout).into_owned();
let stderr = String::from_utf8_lossy(&stderr).into_owned();

Ok(RunResult {
status: output.status.code().ok_or(AybError::Other {
message: "Process exited with signal".to_string(),
})?,
stdout,
stderr,
})
}
51 changes: 47 additions & 4 deletions src/hosted_db/sqlite.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
use crate::error::AybError;
use crate::hosted_db::QueryResult;
use rusqlite;
use crate::hosted_db::{sandbox::run_in_sandbox, QueryResult};
use crate::http::structs::AybConfigIsolation;
use rusqlite::config::DbConfig;
use rusqlite::limits::Limit;
use rusqlite::types::ValueRef;
use std::path::PathBuf;
use serde_json;
use std::path::{Path, PathBuf};

pub fn run_sqlite_query(path: &PathBuf, query: &str) -> Result<QueryResult, AybError> {
pub fn query_sqlite(path: &PathBuf, query: &str) -> Result<QueryResult, AybError> {
let conn = rusqlite::Connection::open(path)?;

// Disable the usage of ATTACH
// https://www.sqlite.org/lang_attach.html
conn.set_limit(Limit::SQLITE_LIMIT_ATTACHED, 0);
// Prevent queries from deliberately corrupting the database
// https://www.sqlite.org/c3ref/c_dbconfig_defensive.html
conn.db_config(DbConfig::SQLITE_DBCONFIG_DEFENSIVE)?;

let mut prepared = conn.prepare(query)?;
let num_columns = prepared.column_count();
let mut fields: Vec<String> = Vec::new();
Expand Down Expand Up @@ -36,3 +47,35 @@ pub fn run_sqlite_query(path: &PathBuf, query: &str) -> Result<QueryResult, AybE
rows: results,
})
}

pub async fn run_sqlite_query(
marcua marked this conversation as resolved.
Show resolved Hide resolved
path: &PathBuf,
query: &str,
isolation: &Option<AybConfigIsolation>,
) -> Result<QueryResult, AybError> {
match isolation {
marcua marked this conversation as resolved.
Show resolved Hide resolved
Some(isolation) => {
let result = run_in_sandbox(Path::new(&isolation.nsjail_path), path, query).await?;

if result.stderr.len() > 0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if result.stderr.len() > 0 {
if !result.stderr.is_empty() {

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice! Thank you!

(I noticed cargo clippy also caught this, and realized clippy isn't breaking the build, so I created #250)

let error: AybError = serde_json::from_str(&result.stderr)?;
Err(error)
} else if result.status != 0 {
Err(AybError::Other {
message: format!(
"Error status from sandboxed query runner: {}",
result.status.to_string()
),
})
} else if result.stdout.len() > 0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
} else if result.stdout.len() > 0 {
} else if !result.stdout.is_empty() {

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done! Thank you!

let query_result: QueryResult = serde_json::from_str(&result.stdout)?;
Ok(query_result)
} else {
Err(AybError::Other {
message: "No results from sandboxed query runner".to_string(),
})
}
}
None => Ok(query_sqlite(path, query)?.into()),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could remove a level of nesting by doing the following at the top

if isolation.is_none() {
   return Ok(query_sqlite(path, query)?.into());
}

Copy link
Owner Author

@marcua marcua Dec 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm conflicted on this one. We'd benefit from one lower level of nesting in a relatively short function, but in exchange

  • We would stop using a language feature like match, which I feel is being used appropriately
  • We'd have to introduce an unwrap, since the compiler won't be able to reason about the knowledge we'd gain with the if

Given we're using match in the way it was intended and forcing ourselves to enumerate all of the variants, I wonder if we'd gain much in exchange for reducing indentation in a short function.

Copy link
Contributor

@sofiaritz sofiaritz Dec 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New idea! What about using if let Some(isolation) = isolation and returning the None case at the end of the function? Seems like a reasonable middleground :)

Note: Deleted a previous comment I made because this suggestion replaces it. The two key-points from that comment:

  • Enums like Option or Result are stable and thus won't add any new variants.
  • When ayb handles a request and panics the server keeps running, thus it's not as bad as it may look like :) (not saying that we should just panic :p)
pub async fn potentially_isolated_sqlite_query(
    path: &PathBuf,
    query: &str,
    isolation: &Option<AybConfigIsolation>,
) -> Result<QueryResult, AybError> {
    if let Some(isolation) = isolation {
        let result = run_in_sandbox(Path::new(&isolation.nsjail_path), path, query).await?;
        if !result.stderr.is_empty() {
            let error: AybError = serde_json::from_str(&result.stderr)?;
            return Err(error);
        }
        
        if result.status != 0 {
            return Err(AybError::Other {
                message: format!(
                    "Error status from sandboxed query runner: {}",
                    result.status
                ),
            });
        }
        
        if !result.stdout.is_empty() {
            let query_result: QueryResult = serde_json::from_str(&result.stdout)?;
            return Ok(query_result);
        }
        
        return Err(AybError::Other {
            message: "No results from sandboxed query runner".to_string(),
        });
    }
    
    Ok(query_sqlite(path, query)?)
}

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clean! I like it! I appreciated both of your bullet points on the broader point. I'll implement this and we should be good to go! Woohoo! Thank you!!!

}
}
1 change: 1 addition & 0 deletions src/http/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ pub fn default_server_config() -> AybConfig {
origin: "*".to_string(),
},
web: None,
isolation: None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should add a warning on start-up when isolation is disabled briefly explaining to the instance administrator the consequences of running ayb without any isolation

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very nice! Done!

}
}

Expand Down
7 changes: 5 additions & 2 deletions src/http/endpoints/create_database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ use std::str::FromStr;

use crate::error::AybError;

use crate::hosted_db::paths::database_path;
use crate::http::permissions::can_create_database;
use crate::http::structs::{Database as APIDatabase, EntityDatabasePath};

use crate::http::structs::{AybConfig, Database as APIDatabase, EntityDatabasePath};
use crate::http::utils::{get_header, unwrap_authenticated_entity};
use actix_web::{post, web, HttpRequest, HttpResponse};

Expand All @@ -15,6 +15,7 @@ async fn create_database(
path: web::Path<EntityDatabasePath>,
req: HttpRequest,
ayb_db: web::Data<Box<dyn AybDb>>,
ayb_config: web::Data<AybConfig>,
authenticated_entity: Option<web::ReqData<InstantiatedEntity>>,
) -> Result<HttpResponse, AybError> {
let entity_slug = &path.entity;
Expand All @@ -28,6 +29,8 @@ async fn create_database(
let authenticated_entity = unwrap_authenticated_entity(&authenticated_entity)?;
if can_create_database(&authenticated_entity, &entity) {
let created_database = ayb_db.create_database(&database).await?;
// Create the database file at the appropriate path
let _ = database_path(entity_slug, &path.database, &ayb_config.data_path, true)?;
Ok(HttpResponse::Created().json(APIDatabase::from_persisted(&entity, &created_database)))
} else {
Err(AybError::Other {
Expand Down
4 changes: 2 additions & 2 deletions src/http/endpoints/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ async fn query(

if can_query(&authenticated_entity, &database) {
let db_type = DBType::try_from(database.db_type)?;
let db_path = database_path(entity_slug, database_slug, &ayb_config.data_path)?;
let result = run_query(&db_path, &query, &db_type)?;
let db_path = database_path(entity_slug, database_slug, &ayb_config.data_path, false)?;
let result = run_query(&db_path, &query, &db_type, &ayb_config.isolation).await?;
Ok(web::Json(result))
} else {
Err(AybError::Other {
Expand Down
6 changes: 6 additions & 0 deletions src/http/structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ pub struct AybConfigEmail {
pub smtp_password: String,
}

#[derive(Clone, Serialize, Deserialize)]
pub struct AybConfigIsolation {
pub nsjail_path: String,
}

#[derive(Clone, Serialize, Deserialize)]
pub struct AybConfig {
pub host: String,
Expand All @@ -43,6 +48,7 @@ pub struct AybConfig {
pub email: AybConfigEmail,
pub web: Option<AybConfigWeb>,
pub cors: AybConfigCors,
pub isolation: Option<AybConfigIsolation>,
}

impl AybConfig {
marcua marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
Binary file added tests/nsjail
Binary file not shown.
3 changes: 3 additions & 0 deletions tests/test-server-config-postgres.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,6 @@ token_expiration_seconds = 3600

[cors]
origin = "*"

[isolation]
nsjail_path = "tests/nsjail"
3 changes: 3 additions & 0 deletions tests/test-server-config-sqlite.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,6 @@ token_expiration_seconds = 3600

[cors]
origin = "*"

[isolation]
nsjail_path = "tests/nsjail"
Loading