From f4bfb7e1b49e90635bd19dc2f3b3a9d3e4a66f48 Mon Sep 17 00:00:00 2001 From: Marko Malenic Date: Wed, 18 Dec 2024 14:08:04 +1100 Subject: [PATCH 1/5] fix: service info group, artifact and version, and add flexibility in configuration --- htsget-actix/src/handlers/service_info.rs | 2 +- htsget-axum/src/handlers/service_info.rs | 2 +- htsget-config/src/config/service_info.rs | 80 +++++++++++ htsget-http/src/service_info.rs | 163 ++++++++-------------- 4 files changed, 141 insertions(+), 106 deletions(-) create mode 100644 htsget-config/src/config/service_info.rs diff --git a/htsget-actix/src/handlers/service_info.rs b/htsget-actix/src/handlers/service_info.rs index 8df7c6dc0..f69e2a6b1 100644 --- a/htsget-actix/src/handlers/service_info.rs +++ b/htsget-actix/src/handlers/service_info.rs @@ -21,7 +21,7 @@ pub fn get_service_info_json( PrettyJson(get_base_service_info_json( endpoint, app_state.htsget.clone(), - &app_state.config_service_info, + app_state.config_service_info.clone(), )) } diff --git a/htsget-axum/src/handlers/service_info.rs b/htsget-axum/src/handlers/service_info.rs index 737203ba6..89dc77168 100644 --- a/htsget-axum/src/handlers/service_info.rs +++ b/htsget-axum/src/handlers/service_info.rs @@ -16,7 +16,7 @@ pub fn get_service_info_json( ErasedJson::pretty(get_base_service_info_json( endpoint, app_state.htsget, - &app_state.service_info, + app_state.service_info, )) } diff --git a/htsget-config/src/config/service_info.rs b/htsget-config/src/config/service_info.rs new file mode 100644 index 000000000..d0d4d259f --- /dev/null +++ b/htsget-config/src/config/service_info.rs @@ -0,0 +1,80 @@ +//! Service info configuration. +//! + +use serde::de::Error; +use serde::{Deserialize, Deserializer, Serialize}; +use serde_json::Value; +use std::collections::HashMap; + +/// Service info config. +#[derive(Serialize, Debug, Clone, Default, PartialEq, Eq)] +#[serde(default)] +pub struct ServiceInfo(HashMap); + +impl ServiceInfo { + /// Create a service info. + pub fn new(fields: HashMap) -> Self { + Self(fields) + } + + /// Get the inner value. + pub fn into_inner(self) -> HashMap { + self.0 + } +} + +impl AsRef> for ServiceInfo { + fn as_ref(&self) -> &HashMap { + &self.0 + } +} + +impl<'de> Deserialize<'de> for ServiceInfo { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let fields: HashMap = HashMap::::deserialize(deserializer)? + .into_iter() + .map(|(key, value)| (key.to_lowercase(), value)) + .collect(); + + let err_msg = |invalid_key| format!("reserved service info field `{}`", invalid_key); + + if fields.contains_key("type") { + return Err(Error::custom(err_msg("type"))); + } + + if fields.contains_key("htsget") { + return Err(Error::custom(err_msg("htsget"))); + } + + Ok(ServiceInfo::new(fields)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::tests::test_serialize_and_deserialize; + use crate::config::Config; + use serde_json::json; + + #[test] + fn service_info() { + test_serialize_and_deserialize( + r#" + service_info.environment = "dev" + service_info.organization = { name = "name", url = "https://example.com/" } + "#, + HashMap::from_iter(vec![ + ("environment".to_string(), json!("dev")), + ( + "organization".to_string(), + json!({ "name": "name", "url": "https://example.com/" }), + ), + ]), + |result: Config| result.service_info.0, + ); + } +} diff --git a/htsget-http/src/service_info.rs b/htsget-http/src/service_info.rs index 5a529dea1..b8af5d2a9 100644 --- a/htsget-http/src/service_info.rs +++ b/htsget-http/src/service_info.rs @@ -1,42 +1,33 @@ +use htsget_config::config; +use htsget_config::types::Format; +use htsget_search::HtsGet; use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::HashMap; use tracing::debug; use tracing::instrument; -use htsget_config::types::Format; -use htsget_search::HtsGet; - -use crate::ConfigServiceInfo; use crate::Endpoint; const READS_FORMATS: [&str; 2] = ["BAM", "CRAM"]; const VARIANTS_FORMATS: [&str; 2] = ["VCF", "BCF"]; +const HTSGET_GROUP: &str = "org.ga4gh"; +const HTSGET_ARTIFACT: &str = "htsget"; +const HTSGET_VERSION: &str = "1.3.0"; + /// A struct representing the information that should be present in a service-info response. #[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Default)] #[serde(rename_all = "camelCase")] pub struct ServiceInfo { - pub id: String, - pub name: String, - pub version: String, - pub organization: Organisation, + #[serde(flatten)] + pub fields: HashMap, #[serde(rename = "type")] pub service_type: Type, pub htsget: Htsget, - pub contact_url: String, - pub documentation_url: String, - pub created_at: String, - pub updated_at: String, - pub environment: String, } -#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Default)] -#[serde(rename_all = "camelCase")] -pub struct Organisation { - pub name: String, - pub url: String, -} - -#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Default)] +#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct Type { pub group: String, @@ -44,6 +35,16 @@ pub struct Type { pub version: String, } +impl Default for Type { + fn default() -> Self { + Self { + group: HTSGET_GROUP.to_string(), + artifact: HTSGET_ARTIFACT.to_string(), + version: HTSGET_VERSION.to_string(), + } + } +} + #[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Default)] #[serde(rename_all = "camelCase")] pub struct Htsget { @@ -53,42 +54,37 @@ pub struct Htsget { pub tags_parameters_effective: bool, } -pub fn get_service_info_with( - endpoint: Endpoint, - supported_formats: &[Format], - fields_effective: bool, - tags_effective: bool, -) -> ServiceInfo { - let htsget_info = Htsget { - datatype: match endpoint { - Endpoint::Reads => "reads", - Endpoint::Variants => "variants", - } - .to_string(), - formats: supported_formats - .iter() - .map(|format| format.to_string()) - .filter(|format| match endpoint { - Endpoint::Reads => READS_FORMATS.contains(&format.as_str()), - Endpoint::Variants => VARIANTS_FORMATS.contains(&format.as_str()), - }) - .collect(), - fields_parameters_effective: fields_effective, - tags_parameters_effective: tags_effective, - }; +impl ServiceInfo { + pub fn new( + endpoint: Endpoint, + supported_formats: &[Format], + fields_effective: bool, + tags_effective: bool, + fields: HashMap, + ) -> Self { + let htsget_info = Htsget { + datatype: match endpoint { + Endpoint::Reads => "reads", + Endpoint::Variants => "variants", + } + .to_string(), + formats: supported_formats + .iter() + .map(|format| format.to_string()) + .filter(|format| match endpoint { + Endpoint::Reads => READS_FORMATS.contains(&format.as_str()), + Endpoint::Variants => VARIANTS_FORMATS.contains(&format.as_str()), + }) + .collect(), + fields_parameters_effective: fields_effective, + tags_parameters_effective: tags_effective, + }; - ServiceInfo { - id: "".to_string(), - name: "".to_string(), - version: "".to_string(), - organization: Default::default(), - service_type: Default::default(), - htsget: htsget_info, - contact_url: "".to_string(), - documentation_url: "".to_string(), - created_at: "".to_string(), - updated_at: "".to_string(), - environment: "".to_string(), + Self { + fields, + service_type: Default::default(), + htsget: htsget_info, + } } } @@ -96,55 +92,14 @@ pub fn get_service_info_with( pub fn get_service_info_json( endpoint: Endpoint, searcher: impl HtsGet + Send + Sync + 'static, - config: &ConfigServiceInfo, + config: config::service_info::ServiceInfo, ) -> ServiceInfo { debug!(endpoint = ?endpoint,"getting service-info response for endpoint"); - fill_out_service_info_json( - get_service_info_with( - endpoint, - &searcher.get_supported_formats(), - searcher.are_field_parameters_effective(), - searcher.are_tag_parameters_effective(), - ), - config, + ServiceInfo::new( + endpoint, + &searcher.get_supported_formats(), + searcher.are_field_parameters_effective(), + searcher.are_tag_parameters_effective(), + config.into_inner(), ) } - -/// Fills the service-info json with the data from the server config -fn fill_out_service_info_json( - mut service_info_json: ServiceInfo, - config: &ConfigServiceInfo, -) -> ServiceInfo { - if let Some(id) = config.id() { - service_info_json.id = id.to_string(); - } - if let Some(name) = config.name() { - service_info_json.name = name.to_string(); - } - if let Some(version) = config.version() { - service_info_json.version = version.to_string(); - } - if let Some(organization_name) = config.organization_name() { - service_info_json.organization.name = organization_name.to_string(); - } - if let Some(organization_url) = config.organization_url() { - service_info_json.organization.url = organization_url.to_string(); - } - if let Some(contact_url) = config.contact_url() { - service_info_json.contact_url = contact_url.to_string(); - } - if let Some(documentation_url) = config.documentation_url() { - service_info_json.documentation_url = documentation_url.to_string(); - } - if let Some(created_at) = config.created_at() { - service_info_json.created_at = created_at.to_string(); - } - if let Some(updated_at) = config.updated_at() { - service_info_json.updated_at = updated_at.to_string(); - } - if let Some(environment) = config.environment() { - service_info_json.environment = environment.to_string(); - } - - service_info_json -} From a38e48528c23fa511836ed0cdce1c35178ebefeb Mon Sep 17 00:00:00 2001 From: Marko Malenic Date: Thu, 19 Dec 2024 09:26:47 +1100 Subject: [PATCH 2/5] feat(config): implement path-based locations --- htsget-config/src/config/location.rs | 362 ++++++++++++++++++++++++++- htsget-config/src/config/mod.rs | 179 ++++++++++++- htsget-config/src/resolver.rs | 79 ++++-- htsget-search/src/from_storage.rs | 28 +++ 4 files changed, 624 insertions(+), 24 deletions(-) diff --git a/htsget-config/src/config/location.rs b/htsget-config/src/config/location.rs index 766c07096..b9db7b61f 100644 --- a/htsget-config/src/config/location.rs +++ b/htsget-config/src/config/location.rs @@ -2,13 +2,20 @@ //! use crate::config::advanced::regex_location::RegexLocation; -use crate::error::Result; +use crate::error::{Error::ParseError, Result}; +use crate::storage; +use crate::storage::file::default_authority; use crate::storage::Backend; -use serde::{Deserialize, Serialize}; +use crate::types::Scheme; +use serde::de::Error; +use serde::{Deserialize, Deserializer, Serialize}; +use std::result; +#[cfg(feature = "url-storage")] +use {crate::config::advanced::url::Url, crate::error, http::Uri}; /// The locations of data. #[derive(Serialize, Deserialize, Debug, Clone)] -#[serde(default)] +#[serde(default, from = "LocationsOneOrMany")] pub struct Locations(Vec); impl Locations { @@ -39,10 +46,11 @@ impl Default for Locations { } } -/// Either simple or regex based location. +/// Either simple or regex based location #[derive(Serialize, Deserialize, Debug, Clone)] #[serde(untagged)] pub enum LocationEither { + Simple(Location), Regex(RegexLocation), } @@ -50,19 +58,359 @@ impl LocationEither { /// Get the storage backend. pub fn backend(&self) -> &Backend { match self { + LocationEither::Simple(location) => location.backend(), LocationEither::Regex(regex_location) => regex_location.backend(), } } + /// Get the simple location variant, returning an error otherwise. + pub fn as_simple(&self) -> Result<&Location> { + if let LocationEither::Simple(simple) = self { + Ok(simple) + } else { + Err(ParseError("not a `Simple` variant".to_string())) + } + } + /// Get the regex location variant, returning an error otherwise. pub fn as_regex(&self) -> Result<&RegexLocation> { - let LocationEither::Regex(regex) = self; - Ok(regex) + if let LocationEither::Regex(regex) = self { + Ok(regex) + } else { + Err(ParseError("not a `Regex` variant".to_string())) + } } } impl Default for LocationEither { fn default() -> Self { - Self::Regex(Default::default()) + Self::Simple(Default::default()) + } +} + +/// Location config. +#[derive(Serialize, Deserialize, Debug, Clone, Default)] +#[serde(default, from = "LocationWrapper", deny_unknown_fields)] +pub struct Location { + backend: Backend, + prefix: String, +} + +impl Location { + /// Create a new location. + pub fn new(backend: Backend, prefix: String) -> Self { + Self { backend, prefix } + } + + /// Get the storage backend. + pub fn backend(&self) -> &Backend { + &self.backend + } + + /// Get the prefix. + pub fn prefix(&self) -> &str { + &self.prefix + } +} + +/// Either a single or many locations +#[derive(Serialize, Deserialize, Debug, Clone)] +#[serde(untagged, deny_unknown_fields)] +enum LocationsOneOrMany { + Many(Vec), + One(LocationEither), +} + +impl From for Locations { + fn from(locations: LocationsOneOrMany) -> Self { + match locations { + LocationsOneOrMany::One(location) => Self(vec![location]), + LocationsOneOrMany::Many(locations) => Self(locations), + } + } +} + +/// Deserialize the location from a string with a protocol. +#[derive(Serialize, Debug, Clone, Default)] +#[serde(default, deny_unknown_fields)] +struct StringLocation { + backend: Backend, + prefix: String, +} + +/// Deserialize the location from a map with regular field and values. +#[derive(Serialize, Deserialize, Debug, Clone, Default)] +#[serde(default, deny_unknown_fields)] +struct MapLocation { + backend: Backend, + prefix: String, +} + +/// A wrapper around location deserialization that can deserialize either a string +/// or a map. This is required so that default values behave correctly when deserializing +/// the `Location`. For example, if a location string isn't specified, the `Deserialize` +/// implementation for `StringLocation` can't account for this as it gets passed default values +/// which contain map elements. This wrapper allows deserializing using regular semantics by +/// falling back to the regular `MapLocation` derived deserializer. The reason there needs to be a +/// `StringLocation` and `MapLocation` type is so that `Location` can be deserialized using the +/// `from` attribute without recursion. +#[derive(Serialize, Deserialize, Debug, Clone)] +#[serde(untagged, deny_unknown_fields)] +enum LocationWrapper { + String(StringLocation), + Map(MapLocation), +} + +impl From for Location { + fn from(location: LocationWrapper) -> Self { + match location { + LocationWrapper::String(location) => Location::new(location.backend, location.prefix), + LocationWrapper::Map(location) => Location::new(location.backend, location.prefix), + } + } +} + +impl From for LocationEither { + fn from(location: Location) -> Self { + Self::Simple(location) + } +} + +impl<'de> Deserialize<'de> for StringLocation { + fn deserialize(deserializer: D) -> result::Result + where + D: Deserializer<'de>, + { + let split = |s: &str| { + let (s1, s2) = if let Some(split) = s.split_once("/").map(|(s1, s2)| { + ( + s1.to_string(), + s2.strip_suffix('/').unwrap_or(s2).to_string(), + ) + }) { + split + } else { + (s.to_string(), "".to_string()) + }; + + if s1.is_empty() { + Err(Error::custom("cannot have empty location")) + } else { + Ok((s1, s2)) + } + }; + + let s = String::deserialize(deserializer)?.to_lowercase(); + + if let Some(s) = s.strip_prefix("file://") { + let (path, prefix) = split(s)?; + return Ok(StringLocation { + backend: Backend::File(storage::file::File::new( + Scheme::Http, + default_authority(), + path.to_string(), + )), + prefix, + }); + } + + #[cfg(feature = "s3-storage")] + if let Some(s) = s.strip_prefix("s3://") { + let (bucket, prefix) = split(s)?; + return Ok(StringLocation { + backend: Backend::S3(storage::s3::S3::new(bucket.to_string(), None, false)), + prefix, + }); + } + + #[cfg(feature = "url-storage")] + if let Some(s_stripped) = s + .strip_prefix("http://") + .or_else(|| s.strip_prefix("https://")) + { + let (mut uri, prefix) = split(s_stripped)?; + + if s.starts_with("http://") { + uri = format!("http://{}", uri); + } + if s.starts_with("https://") { + uri = format!("https://{}", uri); + } + + let uri: Uri = uri.parse().map_err(Error::custom)?; + let url = Url::new(uri.clone(), Some(uri), true, vec![], Default::default()) + .try_into() + .map_err(|err: error::Error| Error::custom(err.to_string()))?; + + return Ok(StringLocation { + backend: Backend::Url(url), + prefix, + }); + } + + Err(Error::custom( + "expected file://, s3://, http:// or https:// scheme", + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::tests::test_serialize_and_deserialize; + use crate::config::Config; + + #[test] + fn location_single() { + test_serialize_and_deserialize( + r#" + locations = "file://path/prefix1" + "#, + ("path".to_string(), "prefix1".to_string()), + |result: Config| assert_file_location(result), + ); + test_serialize_and_deserialize( + r#" + locations = "file://path/prefix1/" + "#, + ("path".to_string(), "prefix1".to_string()), + |result: Config| assert_file_location(result), + ); + } + + #[test] + fn location_no_prefix() { + test_serialize_and_deserialize( + r#" + locations = "file://path" + "#, + ("path".to_string(), "".to_string()), + |result: Config| assert_file_location(result), + ); + test_serialize_and_deserialize( + r#" + locations = "file://path/" + "#, + ("path".to_string(), "".to_string()), + |result: Config| assert_file_location(result), + ); + } + + #[test] + fn location_file() { + test_serialize_and_deserialize( + r#" + locations = [ "file://path/prefix1", "file://path/prefix2" ] + "#, + ( + "path".to_string(), + "prefix1".to_string(), + "path".to_string(), + "prefix2".to_string(), + ), + |result: Config| { + let result = result.locations.0; + assert_eq!(result.len(), 2); + if let (LocationEither::Simple(location1), LocationEither::Simple(location2)) = + (result.first().unwrap(), result.get(1).unwrap()) + { + let file1 = location1.backend().as_file().unwrap(); + let file2 = location2.backend().as_file().unwrap(); + + return ( + file1.local_path().to_string(), + location1.prefix().to_string(), + file2.local_path().to_string(), + location2.prefix().to_string(), + ); + } + + panic!(); + }, + ); + } + + #[cfg(feature = "s3-storage")] + #[test] + fn location_s3() { + test_serialize_and_deserialize( + r#" + locations = [ "s3://bucket/prefix1", "s3://bucket/prefix2" ] + "#, + ( + "bucket".to_string(), + "prefix1".to_string(), + "bucket".to_string(), + "prefix2".to_string(), + ), + |result: Config| { + let result = result.locations.0; + assert_eq!(result.len(), 2); + if let (LocationEither::Simple(location1), LocationEither::Simple(location2)) = + (result.first().unwrap(), result.get(1).unwrap()) + { + if let (Backend::S3(s31), Backend::S3(s32)) = (location1.backend(), location2.backend()) { + return ( + s31.bucket().to_string(), + location1.prefix().to_string(), + s32.bucket().to_string(), + location2.prefix().to_string(), + ); + } + } + + panic!(); + }, + ); + } + + #[cfg(feature = "url-storage")] + #[test] + fn location_url() { + test_serialize_and_deserialize( + r#" + locations = [ "https://example.com/prefix1", "http://example.com/prefix2" ] + "#, + ( + "https://example.com/".to_string(), + "prefix1".to_string(), + "http://example.com/".to_string(), + "prefix2".to_string(), + ), + |result: Config| { + let result = result.locations.0; + assert_eq!(result.len(), 2); + if let (LocationEither::Simple(location1), LocationEither::Simple(location2)) = + (result.first().unwrap(), result.get(1).unwrap()) + { + if let (Backend::Url(url1), Backend::Url(url2)) = + (location1.backend(), location2.backend()) + { + return ( + url1.url().to_string(), + location1.prefix().to_string(), + url2.url().to_string(), + location2.prefix().to_string(), + ); + } + } + + panic!(); + }, + ); + } + + fn assert_file_location(result: Config) -> (String, String) { + let result = result.locations.0; + assert_eq!(result.len(), 1); + if let LocationEither::Simple(location1) = result.first().unwrap() { + let file1 = location1.backend().as_file().unwrap(); + return ( + file1.local_path().to_string(), + location1.prefix().to_string(), + ); + } + + panic!(); } } diff --git a/htsget-config/src/config/mod.rs b/htsget-config/src/config/mod.rs index 7d91412c7..58c96f297 100644 --- a/htsget-config/src/config/mod.rs +++ b/htsget-config/src/config/mod.rs @@ -7,12 +7,14 @@ use std::path::{Path, PathBuf}; use crate::config::advanced::FormattingStyle; use crate::config::data_server::DataServerEnabled; -use crate::config::location::{LocationEither, Locations}; +use crate::config::location::{Location, LocationEither, Locations}; use crate::config::parser::from_path; use crate::config::service_info::ServiceInfo; use crate::config::ticket_server::TicketServerConfig; use crate::error::Error::{ArgParseError, TracingError}; use crate::error::Result; +use crate::storage::file::File; +use crate::storage::Backend; use clap::{Args as ClapArgs, Command, FromArgMatches, Parser}; use serde::{Deserialize, Serialize}; use tracing::subscriber::set_global_default; @@ -159,7 +161,35 @@ impl Config { } /// Set the local resolvers from the data server config. - pub fn resolvers_from_data_server_config(self) -> Result { + pub fn resolvers_from_data_server_config(mut self) -> Result { + self + .locations + .as_mut_slice() + .iter_mut() + .map(|location| { + if let LocationEither::Simple(simple) = location { + // Fall through only if the backend is File and default + let file_location = if let Ok(location) = simple.backend().as_file() { + location + } else { + return Ok(()); + }; + + if let DataServerEnabled::Some(ref data_server) = self.data_server { + let prefix = simple.prefix().to_string(); + + // Don't update the local path as that comes in from the config. + let file: File = data_server.try_into()?; + let file = file.set_local_path(file_location.local_path().to_string()); + + *location = LocationEither::Simple(Location::new(Backend::File(file), prefix)); + } + } + + Ok(()) + }) + .collect::>>()?; + Ok(self) } } @@ -182,11 +212,12 @@ pub(crate) mod tests { use super::*; use crate::config::parser::from_str; - use crate::storage::Backend; use crate::tls::tests::with_test_certificates; use crate::types::Scheme; use figment::Jail; use http::uri::Authority; + #[cfg(feature = "url-storage")] + use http::Uri; use serde::de::DeserializeOwned; use serde_json::json; @@ -503,4 +534,146 @@ pub(crate) mod tests { }, ); } + + #[test] + fn simple_locations_env() { + test_config_from_env( + vec![ + ("HTSGET_DATA_SERVER_ADDR", "127.0.0.1:8080"), + ("HTSGET_LOCATIONS", "[file://data/bam, file://data/cram]"), + ], + |config| { + assert_multiple(config); + }, + ); + } + + #[test] + fn simple_locations() { + test_config_from_file( + r#" + data_server.addr = "127.0.0.1:8080" + data_server.local_path = "path" + + locations = "file://data" + "#, + |config| { + assert_eq!(config.locations().len(), 1); + let config = config.locations.into_inner(); + let location = config[0].as_simple().unwrap(); + assert_eq!(location.prefix(), ""); + assert_file_location(location, "data"); + }, + ); + } + + #[cfg(feature = "s3-storage")] + #[test] + fn simple_locations_s3() { + test_config_from_file( + r#" + locations = "s3://bucket" + "#, + |config| { + assert_eq!(config.locations().len(), 1); + let config = config.locations.into_inner(); + let location = config[0].as_simple().unwrap(); + assert_eq!(location.prefix(), ""); + assert!(matches!(location.backend(), + Backend::S3(s3) if s3.bucket() == "bucket")); + }, + ); + } + + #[cfg(feature = "url-storage")] + #[test] + fn simple_locations_url() { + test_config_from_file( + r#" + locations = "https://example.com" + "#, + |config| { + assert_eq!(config.locations().len(), 1); + let config = config.locations.into_inner(); + let location = config[0].as_simple().unwrap(); + assert_eq!(location.prefix(), ""); + assert!(matches!(location.backend(), + Backend::Url(url) if url.url() == &"https://example.com".parse::().unwrap())); + }, + ); + } + + #[test] + fn simple_locations_multiple() { + test_config_from_file( + r#" + data_server.addr = "127.0.0.1:8080" + locations = ["file://data/bam", "file://data/cram"] + "#, + |config| { + assert_multiple(config); + }, + ); + } + + #[cfg(feature = "s3-storage")] + #[test] + fn simple_locations_multiple_mixed() { + test_config_from_file( + r#" + data_server.addr = "127.0.0.1:8080" + data_server.local_path = "root" + locations = ["file://dir_one/bam", "file://dir_two/cram", "s3://bucket/vcf"] + "#, + |config| { + assert_eq!(config.locations().len(), 3); + let config = config.locations.into_inner(); + + let location = config[0].as_simple().unwrap(); + assert_eq!(location.prefix(), "bam"); + assert_file_location(location, "dir_one"); + + let location = config[1].as_simple().unwrap(); + assert_eq!(location.prefix(), "cram"); + assert_file_location(location, "dir_two"); + + let location = config[2].as_simple().unwrap(); + assert_eq!(location.prefix(), "vcf"); + assert!(matches!(location.backend(), + Backend::S3(s3) if s3.bucket() == "bucket")); + }, + ); + } + + #[test] + fn no_data_server() { + test_config_from_file( + r#" + data_server = "None" + "#, + |config| { + assert!(config.data_server().as_data_server_config().is_err()); + }, + ); + } + + fn assert_multiple(config: Config) { + assert_eq!(config.locations().len(), 2); + let config = config.locations.into_inner(); + + println!("{:#?}", config); + + let location = config[0].as_simple().unwrap(); + assert_eq!(location.prefix(), "bam"); + assert_file_location(location, "data"); + + let location = config[1].as_simple().unwrap(); + assert_eq!(location.prefix(), "cram"); + assert_file_location(location, "data"); + } + + fn assert_file_location(location: &Location, local_path: &str) { + assert!(matches!(location.backend(), + Backend::File(file) if file.local_path() == local_path && file.scheme() == Scheme::Http && file.authority() == &Authority::from_static("127.0.0.1:8080"))); + } } diff --git a/htsget-config/src/resolver.rs b/htsget-config/src/resolver.rs index d551464b0..aebdaf948 100644 --- a/htsget-config/src/resolver.rs +++ b/htsget-config/src/resolver.rs @@ -7,6 +7,7 @@ use crate::storage; use crate::storage::{Backend, ResolvedId}; use crate::types::{Query, Response, Result}; use async_trait::async_trait; +use std::path::PathBuf; use tracing::instrument; /// A trait which matches the query id, replacing the match in the substitution text. @@ -80,6 +81,16 @@ impl IdResolver for LocationEither { }; match self { + LocationEither::Simple(location) => { + if query.id().starts_with(location.prefix()) { + return Some(ResolvedId::new( + PathBuf::from(location.prefix()) + .join(query.id()) + .to_str()? + .to_string(), + )); + } + } LocationEither::Regex(regex_location) => { if regex_location.regex().is_match(query.id()) { if let Some(guard) = regex_location.guard() { @@ -113,16 +124,18 @@ impl StorageResolver for LocationEither { Backend::File(file) => Some(T::from_file(file, query).await), #[cfg(feature = "s3-storage")] Backend::S3(s3) => { - let Self::Regex(regex_location) = self; - - let s3 = if s3.bucket().is_empty() { - let first_match = regex_location - .regex() - .captures(&_matched_id)? - .get(1)? - .as_str() - .to_string(); - &s3.clone().with_bucket(first_match) + let s3 = if let Self::Regex(regex_location) = self { + if s3.bucket().is_empty() { + let first_match = regex_location + .regex() + .captures(&_matched_id)? + .get(1)? + .as_str() + .to_string(); + &s3.clone().with_bucket(first_match) + } else { + s3 + } } else { s3 }; @@ -180,6 +193,7 @@ impl StorageResolver for Locations { #[cfg(test)] mod tests { use super::*; + use crate::config::location::Location; use crate::config::tests::{test_config_from_env, test_config_from_file}; use crate::storage; use crate::types::Format::Bam; @@ -244,6 +258,9 @@ mod tests { Default::default(), ); expected_resolved_request(vec![regex_location.into()], "127.0.0.1:8080/id-test-1").await; + + let location = Location::new(Backend::File(file), "".to_string()); + expected_resolved_request(vec![location.into()], "127.0.0.1:8080/id-1").await; } #[cfg(feature = "s3-storage")] @@ -257,6 +274,9 @@ mod tests { Default::default(), ); expected_resolved_request(vec![regex_location.into()], "id2/id-test").await; + + let location = Location::new(Backend::S3(s3_storage), "".to_string()); + expected_resolved_request(vec![location.into()], "id2/id-1").await; } #[cfg(feature = "s3-storage")] @@ -277,6 +297,12 @@ mod tests { Default::default(), ); expected_resolved_request(vec![regex_location.clone().into()], "id/1").await; + + let location = Location::new( + Backend::S3(storage::s3::S3::new("bucket".to_string(), None, false)), + "".to_string(), + ); + expected_resolved_request(vec![location.into()], "bucket/id-1").await; } #[cfg(feature = "url-storage")] @@ -302,6 +328,9 @@ mod tests { "https://example.com/id-test", ) .await; + + let location = Location::new(Backend::Url(url_storage), "".to_string()); + expected_resolved_request(vec![location.into()], "https://example.com/id-1").await; } #[test] @@ -339,6 +368,28 @@ mod tests { .into_inner(), "id-2-test-2" ); + + let resolver = Locations::new(vec![ + Location::new(Default::default(), "id-1".to_string()).into(), + Location::new(Default::default(), "id-2".to_string()).into(), + ]); + + assert_eq!( + resolver + .as_slice() + .resolve_id(&Query::new_with_default_request("id-1", Bam)) + .unwrap() + .into_inner(), + "id-1/id-1" + ); + assert_eq!( + resolver + .as_slice() + .resolve_id(&Query::new_with_default_request("id-2", Bam)) + .unwrap() + .into_inner(), + "id-2/id-2" + ); } #[test] @@ -349,7 +400,7 @@ mod tests { regex = "regex" "#, |config| { - let LocationEither::Regex(regex) = config.locations().first().unwrap(); + let regex = config.locations().first().unwrap().as_regex().unwrap(); assert_eq!(regex.regex().as_str(), "regex"); }, ); @@ -366,7 +417,7 @@ mod tests { allow_formats = ["BAM"] "#, |config| { - let LocationEither::Regex(regex) = config.locations().first().unwrap(); + let regex = config.locations().first().unwrap().as_regex().unwrap(); assert_eq!(regex.guard().unwrap().allow_formats(), &vec![Bam]); }, ); @@ -375,7 +426,7 @@ mod tests { #[test] fn config_resolvers_env() { test_config_from_env(vec![("HTSGET_LOCATIONS", "[{regex=regex}]")], |config| { - let LocationEither::Regex(regex) = config.locations().first().unwrap(); + let regex = config.locations().first().unwrap().as_regex().unwrap(); assert_eq!(regex.regex().as_str(), "regex"); }); } @@ -411,7 +462,7 @@ mod tests { assert_eq!(storage.endpoint(), expected_storage.endpoint()); assert_eq!(storage.path_style(), expected_storage.path_style()); - let LocationEither::Regex(regex) = config.locations().first().unwrap(); + let regex = config.locations().first().unwrap().as_regex().unwrap(); assert_eq!(regex.regex().to_string(), "regex"); assert_eq!(regex.substitution_string(), "substitution_string"); assert_eq!(regex.guard().unwrap(), &allow_guard); diff --git a/htsget-search/src/from_storage.rs b/htsget-search/src/from_storage.rs index 8fcec3a75..ca72580b8 100644 --- a/htsget-search/src/from_storage.rs +++ b/htsget-search/src/from_storage.rs @@ -95,7 +95,9 @@ pub(crate) mod tests { htsget_storage::s3::S3Storage, htsget_test::aws_mocks::with_s3_test_server, std::fs::create_dir, }; + use htsget_config::config::location::{Location, LocationEither}; use htsget_config::storage; + use htsget_config::storage::Backend; use htsget_config::types::Class::Body; use htsget_config::types::Scheme::Http; use htsget_storage::local::FileStorage; @@ -176,6 +178,32 @@ pub(crate) mod tests { .await; } + #[tokio::test] + async fn search_resolvers() { + with_config_local_storage( + |_, local_storage| async { + let locations = Locations::new(vec![LocationEither::Simple(Location::new( + Backend::File(local_storage), + "".to_string(), + ))]); + + let filename = "spec-v4.3"; + let query = Query::new_with_default_request(filename, Format::Vcf); + let response = locations.search(query).await; + + assert_eq!(response, expected_vcf_response(filename)); + + Some(( + VCF_FILE_NAME_SPEC.to_string(), + (response.unwrap(), Body).into(), + )) + }, + "data/vcf", + &[], + ) + .await; + } + fn expected_vcf_response(filename: &str) -> Result { Ok(Response::new( Format::Vcf, From 0a13f96099dae3015a340c2be771533872147715 Mon Sep 17 00:00:00 2001 From: Marko Malenic Date: Thu, 19 Dec 2024 09:27:02 +1100 Subject: [PATCH 3/5] docs: update docs for path-based and regex config --- htsget-config/README.md | 669 ++++++++---------- htsget-config/examples/config-files/c4gh.toml | 24 +- .../examples/config-files/default.toml | 71 +- .../examples/config-files/s3_storage.toml | 23 +- .../config-files/tls_data_server.toml | 17 +- .../config-files/tls_ticket_server.toml | 18 +- .../examples/config-files/url_storage.toml | 27 +- htsget-config/src/storage/local.rs | 180 ----- 8 files changed, 370 insertions(+), 659 deletions(-) delete mode 100644 htsget-config/src/storage/local.rs diff --git a/htsget-config/README.md b/htsget-config/README.md index 4f9ec7d27..560448f5e 100644 --- a/htsget-config/README.md +++ b/htsget-config/README.md @@ -8,142 +8,145 @@ [actions-badge]: https://github.com/umccr/htsget-rs/actions/workflows/action.yml/badge.svg [actions-url]: https://github.com/umccr/htsget-rs/actions?query=workflow%3Atests+branch%3Amain +## Overview + Configuration for [htsget-rs]. [htsget-rs]: https://github.com/umccr/htsget-rs -## Overview +## Quickstart +The simplest way to use htsget-rs is to create a [toml] config file and specify a storage location: -This crate is used to configure htsget-rs using a config file or environment variables. +```toml +locations = "file://data" +``` -## Usage +Then launch the server using the config file: -To configure htsget-rs, a TOML config file can be defined. There is also support for reading config from environment variables. -Any config options set by environment variables override values in the config file. +```sh +cargo run --all-features -p htsget-axum -- --config +``` -The configuration consists of TOML tables, such as config for the ticket server, data server, service-info, or resolvers. +This will serve files under the [`data`][data] directory: -As a starting point, see the [basic TOML][basic] example file which should work for many use-cases. +```sh +curl 'http://localhost:8080/reads/bam/htsnexus_test_NA12878' +``` -#### Ticket server config +Locations allow htsget-rs access to bioinformatics files and indexes. Instead of local files, htsget-rs can access +files on s3, which returns pre-signed URLs for tickets: -The ticket server responds to htsget requests by returning a set of URL tickets that the client must fetch and concatenate. -To configure the ticket server, set the following options: +```toml +locations = "s3://bucket" +``` -| Option | Description | Type | Default | -|-----------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------|-----------------------------| -| `ticket_server_addr` | The address for the ticket server. | Socket address | `'127.0.0.1:8080'` | -| `ticket_server_tls` | Enable TLS for the ticket server. See [TLS](#tls) for more details. | TOML table | Not enabled | -| `ticket_server_cors_allow_credentials` | Controls the CORS Access-Control-Allow-Credentials for the ticket server. | Boolean | `false` | -| `ticket_server_cors_allow_origins` | Set the CORS Access-Control-Allow-Origin returned by the ticket server, this can be set to `All` to send a wildcard, `Mirror` to echo back the request sent by the client, or a specific array of origins. | `'All'`, `'Mirror'` or a array of origins | `['http://localhost:8080']` | -| `ticket_server_cors_allow_headers` | Set the CORS Access-Control-Allow-Headers returned by the ticket server, this can be set to `All` to allow all headers, or a specific array of headers. | `'All'`, or a array of headers | `'All'` | -| `ticket_server_cors_allow_methods` | Set the CORS Access-Control-Allow-Methods returned by the ticket server, this can be set to `All` to allow all methods, or a specific array of methods. | `'All'`, or a array of methods | `'All'` | -| `ticket_server_cors_max_age` | Set the CORS Access-Control-Max-Age for the ticket server which controls how long a preflight request can be cached for. | Seconds | `86400` | -| `ticket_server_cors_expose_headers` | Set the CORS Access-Control-Expose-Headers returned by the ticket server, this can be set to `All` to expose all headers, or a specific array of headers. | `'All'`, or a array of headers | `[]` | +or on a remote HTTP server (either `http://` or `https://`): -TLS is supported by setting the `ticket_server_key` and `ticket_server_cert` options. An example of config for the ticket server: ```toml -ticket_server_addr = '127.0.0.1:8080' -ticket_server_cors_allow_credentials = false -ticket_server_cors_allow_origins = 'Mirror' -ticket_server_cors_allow_headers = ['Content-Type'] -ticket_server_cors_allow_methods = ['GET', 'POST'] -ticket_server_cors_max_age = 86400 -ticket_server_cors_expose_headers = [] +locations = "https://example.com" ``` -#### Data server config - -The local data server responds to tickets produced by the ticket server by serving local filesystem data. -To configure the data server, set the following options: - -| Option | Description | Type | Default | -|-------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------|-----------------------------| -| `data_server_addr` | The address for the data server. | Socket address | `'127.0.0.1:8081'` | -| `data_server_local_path` | The local path which the data server can access to serve files. | Filesystem path | `'./'` | -| `data_server_serve_at` | The path which the data server will prefix to all response URLs for tickets. | URL path | `''` | -| `data_server_tls` | Enable TLS for the data server. See [TLS](#tls) for more details. | TOML table | Not enabled | -| `data_server_cors_allow_credentials` | Controls the CORS Access-Control-Allow-Credentials for the data server. | Boolean | `false` | -| `data_server_cors_allow_origins` | Set the CORS Access-Control-Allow-Origin returned by the data server, this can be set to `All` to send a wildcard, `Mirror` to echo back the request sent by the client, or a specific array of origins. | `'All'`, `'Mirror'` or a array of origins | `['http://localhost:8080']` | -| `data_server_cors_allow_headers` | Set the CORS Access-Control-Allow-Headers returned by the data server, this can be set to `All` to allow all headers, or a specific array of headers. | `'All'`, or a array of headers | `'All'` | -| `data_server_cors_allow_methods` | Set the CORS Access-Control-Allow-Methods returned by the data server, this can be set to `All` to allow all methods, or a specific array of methods. | `'All'`, or a array of methods | `'All'` | -| `data_server_cors_max_age` | Set the CORS Access-Control-Max-Age for the data server which controls how long a preflight request can be cached for. | Seconds | `86400` | -| `data_server_cors_expose_headers` | Set the CORS Access-Control-Expose-Headers returned by the data server, this can be set to `All` to expose all headers, or a specific array of headers. | `'All'`, or a array of headers | `[]` | - -TLS is supported by setting the `data_server_key` and `data_server_cert` options. An example of config for the data server: +Multiple locations can be specified by providing a list and an id prefix after the location: + ```toml -data_server_addr = '127.0.0.1:8081' -data_server_local_path = './' -data_server_serve_at = '' -data_server_key = 'key.pem' -data_server_cert = 'cert.pem' -data_server_cors_allow_credentials = false -data_server_cors_allow_origins = 'Mirror' -data_server_cors_allow_headers = ['Content-Type'] -data_server_cors_allow_methods = ['GET', 'POST'] -data_server_cors_max_age = 86400 -data_server_cors_expose_headers = [] +locations = ["file://data/bam", "file://data/cram"] ``` -Sometimes it may be useful to disable the data server as all responses to the ticket server will be handled elsewhere, such as with an AWS S3 data server. +This allows htsget-rs to serve data only when the request also contains the prefix: -To disable the data server, set the following option: +```sh +curl 'http://localhost:8080/reads/bam/htsnexus_test_NA12878' +curl 'http://localhost:8080/reads/cram/htsnexus_test_NA12878?format=CRAM' +``` -
-data_server_enabled = false
-
+Locations can be mixed, and don't all need to have the same directory or resource: -#### Service info config +```toml +data_server.local_path = "root" +locations = ["file://dir_two/bam", "file://dir_one/cram", "s3://bucket/vcf"] +``` -The service info config controls what is returned when the [`service-info`][service-info] path is queried.
-To configure the service-info, set the following options: +htsget-rs spawns a separate server process to respond to htsget tickets for file locations, +so setting `data_server.local_path` to the root directory which contains all subdirectories is +required to give this server access to the local directory. -| Option | Description | Type | Default | -|---------------------------------------------------------|---------------------------------------------|-----------|----------| -| `id` | Service ID. | String | Not set | -| `name` | Service name. | String | Not set | -| `version` | Service version. | String | Not set | -| `organization_name` | Organization name. | String | Not set | -| `organization_url` | Organization URL. | String | Not set | -| `contact_url` | Service contact URL | String | Not set | -| `documentation_url` | Service documentation URL. | String | Not set | -| `created_at` | When the service was created. | String | Not set | -| `updated_at` | When the service was last updated. | String | Not set | -| `environment` | The environment the service is running in. | String | Not set | +The data server process can be disabled by setting it to `None` if no file locations are being used: -An example of config for the service info: ```toml -id = 'id' -name = 'name' -version = '0.1' -organization_name = 'name' -organization_url = 'https://example.com/' -contact_url = 'mailto:nobody@example.com' -documentation_url = 'https://example.com/' -created_at = '2022-01-01T12:00:00Z' -updated_at = '2022-01-01T12:00:00Z' -environment = 'dev' +data_server = "None" ``` -#### Resolvers +> [!NOTE] +> For S3 locations, the bucket is not included in the request to htsget-rs. To include the bucket as well, +> see deriving the bucket from the first capture group in [advanced config](#bucket). + +> [!IMPORTANT] +> Some parts of htsget-rs require extra feature flags for conditional compilation, that's why the examples specify +> using `--all-features`. Notably, `--features s3-storage` enables the `S3` location type, and `--features url-storage` +> enabled the remote HTTP server location type. If using a subset of features, for example S3 locations only, then +> a single feature can be enabled instead of using `--all-features`. -The resolvers component of htsget-rs is used to map query IDs to the location of the resource. This is the component of the -code that takes the [`id`][id], which is everything after `reads/` or `variants/` in the http path, and maps it to a data location. +### Server config -For example, if the request to htsget-rs is: +htsget-rs spawn up to two server instances - the ticket server responds to the initial htsget request, and optionally, +the data server, which responds to the htsget tickets. -```sh -curl 'http://localhost:8080/reads/some_id/file' +The socket address of the servers can be changed by specifying `addr`: + +```toml +ticket_server.addr = "127.0.0.1:8000" +data_server.addr = "127.0.0.1:8001" ``` -Then the resolvers controls how the server finds `some_id/file`, which may be stored locally, in the cloud, or at an arbitrary URL location. -The resolvers maps `some_id/file` to a location using regexes and substitution strings. The location of the file does not -need to have the same name as the id. +TLS can be configured to enabled HTTPS support by providing a certificate and private key: -A query ID is matched with a regex, and is then mapped with a substitution string that has access to the regex capture groups. -Resolvers are configured in an array, where the first matching resolver is resolver used to map the ID. +```toml +ticket_server.tls.key = "key.pem" +ticket_server.tls.cert = "cert.pem" + +data_server.tls.key = "key.pem" +data_server.tls.cert = "cert.pem" +``` + +### Service info config + +The service info config controls what is returned when the [`service-info`][service-info] path is queried. The following +option accepts any nested value, which gets converted to a JSON response: + +```toml +service_info.environment = "dev" +service_info.organization = { name = "name", url = "https://example.com/" } +``` + +### Environment variables -To create a resolver, add a `[[resolvers]]` array of tables, and set the following options: +Most options can also be set using environment variables. Any environment variables will override options set in the +config file. Arrays are delimited with `[` and `]`, and items are separated by commas: + +| Variable | Description | Example | +|---------------------------------|----------------------------------------------------------------|----------------------------------------------------| +| `HTSGET_TICKET_SERVER_ADDR` | Set the ticket server socket address. | "127.0.0.1:8080" | +| `HTSGET_TICKET_SERVER_TLS_KEY` | See [server config](#server-config) | "key.pem" | +| `HTSGET_TICKET_SERVER_TLS_CERT` | See [server config](#server-config) | "cert.pem" | +| `HTSGET_DATA_SERVER_ADDR` | Set the data server socket address. | "127.0.0.1:8081" | +| `HTSGET_DATA_SERVER_LOCAL_PATH` | Set the path that the data server has access to. | "dir/path" | +| `HTSGET_DATA_SERVER_TLS_KEY` | See [server config](#server-config) | "key.pem" | +| `HTSGET_DATA_SERVER_TLS_CERT` | See `server config](#server-config) | "cert.pem" | +| `HTSGET_SERVICE_INFO` | Set the service info, see [service info](#service-info-config) | "{ organization = { name = name, url = url }}" | +| `HTSGET_LOCATIONS` | Set the locations. | "[file://data/prefix_one, s3://bucket/prefix_two]" | +| `HTSGET_CONFIG` | Set the config file location. | "dir/config.toml" | + +## Advanced config + +The following section describes advanced configuration which is more flexible, but adds complexity. + +### Regex-based location + +Instead of the simple path-based locations described above, htsget-rs supports arbitrary regex-based id resolution. +This allows matching an [`id`][id], which is everything after `reads/` or `variants/` in the http path, and mapping +it to a location using regex substitution. + +To create a regex location, add a `[[locations]]` array of tables, and set the following options: | Option | Description | Type | Default | |-----------------------|-------------------------------------------------------------------------------------------------------------------------|---------------------------------------|---------| @@ -151,106 +154,114 @@ To create a resolver, add a `[[resolvers]]` array of tables, and set the followi | `substitution_string` | The replacement expression used to map the matched query ID. This has access to the match groups in the `regex` option. | String with access to capture groups | `'$0'` | For example, below is a `regex` option which matches a `/` between two groups, and inserts an additional `data` -in between the groups with the `substitution_string`. +in between the groups with the `substitution_string`: ```toml -[[resolvers]] +[[locations]] regex = '(?P.*?)/(?P.*)' substitution_string = '$group1/data/$group2' ``` -This would mean that a request to `http://localhost:8080/reads/some_id/file` would search for files at `some_id/data/file.bam` and `some_id/data/file.bam.bai`. +This would mean that a request to `http://localhost:8080/reads/some_id/file` would search for files at `some_id/data/file.bam`. -For more information about regex options see the [regex crate](https://docs.rs/regex/). +The regex locations also have access to further configuration of storage locations for `file://`, `s3://`, or `http://` +locations. These are called `File`, `S3`, and `Url` respectively. -Each resolver also maps to a certain storage backend. This storage backend can be used to set query IDs which are served from local storage, from S3-style bucket storage, or from HTTP URLs. -To set the storage backend for a resolver, add a `[resolvers.storage]` table. Some storage backends require feature flags to be set when compiling htsget-rs. +To manually configure `File` locations, set `backend.kind = "File"`, and specify any additional options from below the `backend` table: -To use `LocalStorage`, set `backend = 'Local'` under `[resolvers.storage]`, and specify any additional options from below: +| Option | Description | Type | Default | +|--------------------------|------------------------------------------------------------------------------------------------------------------------------------|------------------------------|--------------------| +| `scheme` | The scheme present on URL tickets. | Either `'Http'` or `'Https'` | `'Http'` | +| `authority` | The authority present on URL tickets. This should likely match the `data_server.addr`. | URL authority | `'127.0.0.1:8081'` | +| `local_path` | The local filesystem path which the data server uses to respond to tickets. This should likely match the `data_server.local_path`. | Filesystem path | `'./'` | -| Option | Description | Type | Default | -|--------------------------|-------------------------------------------------------------------------------------------------------------------------------------|------------------------------|--------------------| -| `scheme` | The scheme present on URL tickets. | Either `'Http'` or `'Https'` | `'Http'` | -| `authority` | The authority present on URL tickets. This should likely match the `data_server_addr`. | URL authority | `'127.0.0.1:8081'` | -| `local_path` | The local filesystem path which the data server uses to respond to tickets. This should likely match the `data_server_local_path`. | Filesystem path | `'./'` | -| `path_prefix` | The path prefix which the URL tickets will have. This should likely match the `data_server_serve_at` path. | URL path | `''` | -| `use_data_server_config` | Whether to use the data server config to fill in the above values. This overrides any other options specified from this table. | Boolean | `false` | - -By default, if the above options are left unspecified, they inherit values from the [`data_server`][data-server] config. -For example, the following sets the `scheme`, `authority`, `local_path` and `path_prefix` to values used by the `data_server`. +For example: ```toml -[[resolvers]] -regex = '.*' -substitution_string = '$0' +data_server.addr = "127.0.0.1:8000" -[resolvers.storage] -backend = 'Local' +[[locations]] +regex = ".*" +substitution_string = "$0" + +backend.kind = "Local" +backend.scheme = "Http" +backend.authority = "127.0.0.1:8000" +backend.local_path = "path" ``` -To use `S3Storage`, build htsget-rs with the `s3-storage` feature enabled, set `backend = 'S3'` under `[resolvers.storage]`, and specify: +To manually configure `S3` locations, set `backend.kind = "S3"`, and specify options from below under the `backend` table: + +| Option | Description | Type | Default | +|------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|--------------------------------------------------------------------------------------------------------------------------| +| `bucket` | The AWS S3 bucket where resources can be retrieved from. | String | Derived from the `location` `regex` property if empty. This uses the first capture group in the `regex` as the `bucket`. | +| `endpoint` | A custom endpoint to override the default S3 service address. This is useful for using S3 locally or with storage backends such as MinIO. See [MinIO](#minio). | String | Not set, uses regular AWS S3 services. | +| `path_style` | The S3 path style to request from the storage backend. If `true`, "path style" is used, e.g. `host.com/bucket/object.bam`, otherwise `bucket.host.com/object` style is used. | Boolean | `false` | -| Option | Description | Type | Default | -|--------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|---------------------------------------------------------------------------------------------------------------------------| -| `bucket` | The AWS S3 bucket where resources can be retrieved from. | String | Derived from the `resolvers` `regex` property if empty. This uses the first capture group in the `regex` as the `bucket`. | -| `endpoint` | A custom endpoint to override the default S3 service address. This is useful for using S3 locally or with storage backends such as MinIO. See [MinIO](#minio). | String | Not set, uses regular AWS S3 services. | -| `path_style` | The S3 path style to request from the storage backend. If `true`, "path style" is used, e.g. `host.com/bucket/object.bam`, otherwise `bucket.host.com/object` style is used. | Boolean | `false` | +For example, the following backend manually sets the `bucket` and uses path style requests: -For example, a `resolvers` value of: ```toml -[[resolvers]] -regex = '^(example_bucket)/(?P.*)$' -substitution_string = '$key' +[[locations]] +regex = "prefix/(?P.*)$" +substitution_string = "$key" -[resolvers.storage] -backend = 'S3' -# Uses the first capture group in the regex as the bucket. +backend.kind = "S3" +backend.bucket = "bucket" +backend.path_style = true ``` -Will use "example_bucket" as the S3 bucket if that resolver matches, because this is the first capture group in the `regex`. -Note, to use this feature, at least one capture group must be defined in the `regex`. +To manually configure `Url` locations, set `backend.kind = "Url"`, specify any additional options from below under the `backend` table: -`UrlStorage` is a storage backend which can be used to serve data from a remote HTTP URL. When using this storage backend, htsget-rs will fetch data from a `url` which is set in the config. It will also forward any headers received with the initial query, which is useful for authentication. -To use `UrlStorage`, build htsget-rs with the `url-storage` feature enabled, set `backend = 'Url'` under `[resolvers.storage]`, and specify any additional options from below: +| Option | Description | Type | Default | +|--------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------|-----------------------------------------------------------------------------------------------------------------| +| `url` | The URL to fetch data from. | HTTP URL | `"https://127.0.0.1:8081/"` | +| `response_url` | The URL to return to the client for fetching tickets. | HTTP URL | `"https://127.0.0.1:8081/"` | +| `forward_headers` | When constructing the URL tickets, copy HTTP headers received in the initial query. | Boolean | `true` | +| `header_blacklist` | List of headers that should not be forwarded. | Array of headers | `[]` | +| `tls` | Additionally enables client authentication, or sets non-native root certificates for TLS. See [server configuration](#server-configuration) for more details. | TOML table | TLS is always allowed, however the default performs no client authentication and uses native root certificates. | -| Option | Description | Type | Default | -|--------------------------------------|-----------------------------------------------------------------------------------------------------------------------------|--------------------------|-----------------------------------------------------------------------------------------------------------------| -| `url` | The URL to fetch data from. | HTTP URL | `"https://127.0.0.1:8081/"` | -| `response_url` | The URL to return to the client for fetching tickets. | HTTP URL | `"https://127.0.0.1:8081/"` | -| `forward_headers` | When constructing the URL tickets, copy HTTP headers received in the initial query. | Boolean | `true` | -| `header_blacklist` | List of headers that should not be forwarded. | Array of headers | `[]` | -| `tls` | Additionally enables client authentication, or sets non-native root certificates for TLS. See [TLS](#tls) for more details. | TOML table | TLS is always allowed, however the default performs no client authentication and uses native root certificates. | +For example, the following forwards all headers to response tickets except `Host`, and constructs tickets using `https://example.com` instead of `http://localhost:8080`: -When using `UrlStorage`, the following requests will be made to the `url`. -* `GET` request to fetch only the headers of the data file (e.g. `GET /data.bam`, with `Range: bytes=0-`). -* `GET` request to fetch the entire index file (e.g. `GET /data.bam.bai`). -* `HEAD` request on the data file to get its length (e.g. `HEAD /data.bam`). +```toml +[[locations]] +regex = ".*" +substitution_string = "$0" -By default, all headers received in the initial query will be included when making these requests. To exclude certain headers from being forwarded, set the `header_blacklist` option. Note that the blacklisted headers are removed from the requests made to `url` and from the URL tickets as well. +backend.kind = "Url" +backend.url = "http://localhost:8080" +backend.response_url = "https://example.com" +backend.forward_headers = true +backend.header_blacklist = ["Host"] +``` -Example of a resolver with `UrlStorage`: +Regex-based locations also support multiple locations: ```toml -[[resolvers]] +[[locations]] +regex = "prefix/(?P.*)$" +substitution_string = "$key" +backend.kind = "S3" +backend.bucket = "bucket" +backend.path_style = true + +[[locations]] regex = ".*" substitution_string = "$0" - -[resolvers.storage] -backend = 'Url' -url = "http://localhost:8080" -response_url = "https://example.com" -forward_headers = true -header_blacklist = ["Host"] +backend.kind = "Url" +backend.url = "http://localhost:8080" +forward_headers = false ``` -There are additional examples of config files located under [`examples/config-files`][examples-config-files]. +If there is an overlap in regex matches, the first location specified will be the one used. + +Additional config file examples are available under [`example/config-files`][examples-config-files]. -#### Allow guard -Additionally, the resolver component has a feature, which allows resolving IDs based on the other fields present in a query. -This is useful as it allows the resolver to match an ID only if a particular set of query parameters are also present. For example, -a resolver can be set to only resolve IDs if the format is also BAM. +### Allow guard -This component can be configured by setting the `[resolver.allow_guard]` table with. The following options are available to restrict which queries are resolved by a resolver: +Additionally, locations support resolving IDs based on the other fields present in a query. +This is useful to allow the location to match an ID only if a particular set of query parameters are also present. + +This component can be configured by setting the `guard` table with: | Option | Description | Type | Default | |-------------------------|-----------------------------------------------------------------------------------------|-----------------------------------------------------------------------|-------------------------------------| @@ -259,59 +270,42 @@ This component can be configured by setting the `[resolver.allow_guard]` table w | `allow_tags` | Resolve the query ID if the query also contains the tags set by this option. | Array of tags or `'All'` | `'All'` | | `allow_formats` | Resolve the query ID if the query is one of the formats specified by this option. | An array of formats containing `'BAM'`, `'CRAM'`, `'VCF'`, or `'BCF'` | `['BAM', 'CRAM', 'VCF', 'BCF']` | | `allow_classes` | Resolve the query ID if the query is one of the classes specified by this option. | An array of classes containing eithr `'body'` or `'header'` | `['body', 'header']` | -| `allow_interval_start` | Resolve the query ID if the query reference start position is at least this option. | Unsigned 32-bit integer start position, 0-based, inclusive | Not set, allows all start positions | -| `allow_interval_end` | Resolve the query ID if the query reference end position is at most this option. | Unsigned 32-bit integer end position, 0-based exclusive. | Not set, allows all end positions | +| `allow_interval.start` | Resolve the query ID if the query reference start position is at least this option. | Unsigned 32-bit integer start position, 0-based, inclusive | Not set, allows all start positions | +| `allow_interval.end` | Resolve the query ID if the query reference end position is at most this option. | Unsigned 32-bit integer end position, 0-based exclusive | Not set, allows all end positions | -An example of a fully configured resolver: +For example, match only if the request queries `chr1` with positions between `100` and `1000`: ```toml -[[resolvers]] -regex = '.*' -substitution_string = '$0' - -[resolvers.storage] -backend = 'S3' -bucket = 'bucket' - -[resolvers.allow_guard] -allow_reference_names = ['chr1'] -allow_fields = ['QNAME'] -allow_tags = ['RG'] -allow_formats = ['BAM'] -allow_classes = ['body'] -allow_interval_start = 100 -allow_interval_end = 1000 -``` - -In this example, the resolver will only match the query ID if the query is for `chr1` with positions between `100` and `1000`. +[[locations]] +regex = ".*" +substitution_string = "$0" -#### TLS +backend.kind = "S3" +backend.bucket = "bucket" -TLS can be configured for the ticket server, data server, or the url storage client. These options read private keys and -certificates from PEM-formatted files. Certificates must be in X.509 format and private keys can be RSA, PKCS8, or SEC1 (EC) encoded. -The following options are available: +guard.allow_reference_names = ["chr1"] +guard.allow_interval.start = 100 +guard.allow_interval.end = 1000 +``` -| Option | Description | Type | Default | -|------------------------|----------------------------------------------------------------------------------------------------------------------------------------------|-------------------|---------| -| `key` | The path to the PEM formatted X.509 certificate. Specifies TLS for servers or client authentication for clients. | Filesystem path | Not Set | -| `cert` | The path to the PEM formatted RSA, PKCS8, or SEC1 encoded EC private key. Specifies TLS for servers or client authentication for clients. | Filesystem path | Not Set | -| `root_store` | The path to the PEM formatted root certificate store. Only used to specify non-native root certificates for the HTTP client in `UrlStorage`. | Filesystem path | Not Set | +### Server configuration -When used by the ticket and data servers, `key` and `cert` enable TLS, and when used with the url storage client, they enable client authentication. -The root store is only used by the url storage client. Note, the url storage client always allows TLS, however the default configuration performs no client authentication -and uses the native root certificate store. +To use custom root certificates for `Url` locations, set the following: -For example, TLS for the ticket server can be enabled by specifying the key and cert options: ```toml -ticket_server_tls.cert = "cert.pem" -ticket_server_tls.key = "key.pem" +[[locations]] +regex = ".*" +substitution_string = "$0" + +backend.kind = "Url" +backend.tls.root_store = "root.crt" ``` This project uses [rustls] for all TLS logic, and it does not depend on OpenSSL. The rustls library can be more strict when accepting certificates and keys. If generating certificates for `root_store` using OpenSSL, the correct extensions, such as `subjectAltName` should be included. -An example of generating a custom root CA and certificates for a `UrlStorage` backend: +An example of generating a custom root CA and certificates for a `Url` backend: ```sh # Create a root CA @@ -320,173 +314,43 @@ openssl req -x509 -noenc -subj '/CN=localhost' -newkey rsa -keyout root.key -out # Create a certificate signing request openssl req -noenc -newkey rsa -keyout server.key -out server.csr -subj '/CN=localhost' -addext subjectAltName=DNS:localhost -# Create the `UrlStorage` server's certificate +# Create the `Url` server's certificate openssl x509 -req -in server.csr -CA root.crt -CAkey root.key -days 365 -out server.crt -copy_extensions copy # An additional client certificate signing request and certificate can be created in the same way as the server # certificate if using client authentication. ``` -The `root.crt` can then be used in htsget-rs to allow authenticating to a `UrlStorage` backend using `server.crt`: +CORS can also be configured for the data and ticket servers by specifying the `cors` option: ```toml -# Trust the root CA that signed the server's certificate. -tls.root_store = "root.crt" -``` - -Alternatively, projects such as [mkcert] can be used to simplify this process. - -Further TLS examples are available under [`examples/config-files`][examples-config-files]. - -[examples-config-files]: examples/config-files -[rustls]: https://github.com/rustls/rustls -[mkcert]: https://github.com/FiloSottile/mkcert - -#### Config file location - -The htsget-rs binaries ([htsget-axum], [htsget-actix] and [htsget-lambda]) support some command line options. The config file location can -be specified by setting the `--config` option: - -```shell -cargo run -p htsget-axum -- --config "config.toml" +ticket_server.cors.allow_credentials = false +ticket_server.cors.allow_origins = "Mirror" +ticket_server.cors.allow_headers = "All" +ticket_server.cors.allow_methods = ["GET", "POST"] +ticket_server.cors.max_age = 86400 +ticket_server.cors.expose_headers = [] ``` -The config can also be read from an environment variable: - -```shell -export HTSGET_CONFIG="config.toml" -``` - -If no config file is specified, the default configuration is used. Further, the default configuration file can be printed to stdout by passing -the `--print-default-config` flag: - -```shell -cargo run -p htsget-axum -- --print-default-config -``` - -Use the `--help` flag to see more details on command line options. - -[htsget-actix]: ../htsget-actix -[htsget-axum]: ../htsget-axum -[htsget-lambda]: ../htsget-lambda - -#### Log formatting - -The [Tracing][tracing] crate is used by htsget-rs is for logging functionality. The `RUST_LOG` variable is -read to configure the level that trace logs are emitted. - -For example, the following indicates trace level for all htsget crates, and info level for all other crates: - -```sh -export RUST_LOG='info,htsget_lambda=trace,htsget_lambda=trace,htsget_config=trace,htsget_http=trace,htsget_search=trace,htsget_test=trace' -``` - -See [here][rust-log] for more information on setting this variable. - -The style of formatting can be configured by setting the following option: - -| Option | Description | Type | Default | -|---------------------------------------------------------|--------------------------------------|--------------------------------------------------------|----------| -| `formatting_style` | The style of log formatting to use. | One of `'Full'`, `'Compact'`, `'Pretty'`, or `'Json'` | `'Full'` | - -See [here][formatting-style] for more information on how these values look. - -[tracing]: https://github.com/tokio-rs/tracing -[rust-log]: https://rust-lang-nursery.github.io/rust-cookbook/development_tools/debugging/config_log.html -[formatting-style]: https://docs.rs/tracing-subscriber/latest/tracing_subscriber/fmt/index.html#formatters - -#### Environment variables - -All the htsget-rs config options can be set using environment variables, which is convenient for runtimes such as AWS Lambda. -The ticket server, data server and service info options are flattened and can be set directly using -environment variable. It is not recommended to set the resolvers using environment variables, however it can be done by setting a single environment variable which -contains a list of structures, where a key name and value pair is used to set the nested options. - -Environment variables will override options set in the config file. Note, arrays are delimited with `[` and `]` in environment variables, and items are separated by commas. - -The following environment variables - corresponding to the TOML config - are available: - -| Variable | Description | -|-----------------------------------------------|-------------------------------------------------------------------------------------| -| `HTSGET_TICKET_SERVER_ADDR` | See [`ticket_server_addr`](#ticket_server_addr) | -| `HTSGET_TICKET_SERVER_TLS_KEY` | See [`TLS`](#tls) | -| `HTSGET_TICKET_SERVER_TLS_CERT` | See [`TLS`](#tls) | -| `HTSGET_TICKET_SERVER_CORS_ALLOW_CREDENTIALS` | See [`ticket_server_cors_allow_credentials`](#ticket_server_cors_allow_credentials) | -| `HTSGET_TICKET_SERVER_CORS_ALLOW_ORIGINS` | See [`ticket_server_cors_allow_origins`](#ticket_server_cors_allow_origins) | -| `HTSGET_TICKET_SERVER_CORS_ALLOW_HEADERS` | See [`ticket_server_cors_allow_headers`](#ticket_server_cors_allow_headers) | -| `HTSGET_TICKET_SERVER_CORS_ALLOW_METHODS` | See [`ticket_server_cors_allow_methods`](#ticket_server_cors_allow_methods) | -| `HTSGET_TICKET_SERVER_CORS_MAX_AGE` | See [`ticket_server_cors_max_age`](#ticket_server_cors_max_age) | -| `HTSGET_TICKET_SERVER_CORS_EXPOSE_HEADERS` | See [`ticket_server_cors_expose_headers`](#ticket_server_cors_expose_headers) | -| `HTSGET_DATA_SERVER_ADDR` | See [`data_server_addr`](#data_server_addr) | -| `HTSGET_DATA_SERVER_LOCAL_PATH` | See [`data_server_local_path`](#data_server_local_path) | -| `HTSGET_DATA_SERVER_SERVE_AT` | See [`data_server_serve_at`](#data_server_serve_at) | -| `HTSGET_DATA_SERVER_TLS_KEY` | See [`TLS`](#tls) | -| `HTSGET_DATA_SERVER_TLS_CERT` | See [`TLS`](#tls) | -| `HTSGET_DATA_SERVER_CORS_ALLOW_CREDENTIALS` | See [`data_server_cors_allow_credentials`](#data_server_cors_allow_credentials) | -| `HTSGET_DATA_SERVER_CORS_ALLOW_ORIGINS` | See [`data_server_cors_allow_origins`](#data_server_cors_allow_origins) | -| `HTSGET_DATA_SERVER_CORS_ALLOW_HEADERS` | See [`data_server_cors_allow_headers`](#data_server_cors_allow_headers) | -| `HTSGET_DATA_SERVER_CORS_ALLOW_METHODS` | See [`data_server_cors_allow_methods`](#data_server_cors_allow_methods) | -| `HTSGET_DATA_SERVER_CORS_MAX_AGE` | See [`data_server_cors_max_age`](#data_server_cors_max_age) | -| `HTSGET_DATA_SERVER_CORS_EXPOSE_HEADERS` | See [`data_server_cors_expose_headers`](#data_server_cors_expose_headers) | -| `HTSGET_ID` | See [`id`](#id) | -| `HTSGET_NAME` | See [`name`](#name) | -| `HTSGET_VERSION` | See [`version`](#version) | -| `HTSGET_ORGANIZATION_NAME` | See [`organization_name`](#organization_name) | -| `HTSGET_ORGANIZATION_URL` | See [`organization_url`](#organization_url) | -| `HTSGET_CONTACT_URL` | See [`contact_url`](#contact_url) | -| `HTSGET_DOCUMENTATION_URL` | See [`documentation_url`](#documentation_url) | -| `HTSGET_CREATED_AT` | See [`created_at`](#created_at) | -| `HTSGET_UPDATED_AT` | See [`updated_at`](#updated_at) | -| `HTSGET_ENVIRONMENT` | See [`environment`](#environment) | -| `HTSGET_RESOLVERS` | See [`resolvers`](#resolvers) | -| `HTSGET_FORMATTING_STYLE` | See [`formatting_style`](#formatting_style) | - -In order to use `HTSGET_RESOLVERS`, the entire resolver config array must be set. The nested array of resolvers structure can be set using name key and value pairs, for example: - -```shell -export HTSGET_RESOLVERS="[{ - regex=regex, - substitution_string=substitution_string, - storage={ - type=S3, - bucket=bucket - }, - allow_guard={ - allow_reference_names=[chr1], - allow_fields=[QNAME], - allow_tags=[RG], - allow_formats=[BAM], - allow_classes=[body], - allow_interval_start=100, - allow_interval_end=1000 - } -}]" -``` - -Similar to the [data_server](#data_server) option, the data server can be disabled by setting the equivalent environment variable: - -```shell -export HTSGET_DATA_SERVER_ENABLED=false -``` -[service-info]: https://samtools.github.io/hts-specs/htsget.html#ga4gh-service-info +Use `"Mirror"` to mirror CORS requests, and `"All"` to allow all methods, headers, or origins. The `ticket_server` table +above can be replaced with `data_server` to configure CORS for the data server. ### MinIO -Operating a local object storage like [MinIO][minio] can be achieved by leveraging the `endpoint` directive as shown below: +Operating a local object storage like [MinIO][minio] can be achieved by using `endpoint` under `"S3"` locations as shown below: ```toml -[[resolvers]] -regex = '.*' -substitution_string = '$0' - -[resolvers.storage] -backend = 'S3' -bucket = 'bucket' -endpoint = 'http://127.0.0.1:9000' -path_style = true +[[locations]] +regex = ".*" +substitution_string = "$0" + +backend.kind = 'S3' +backend.bucket = 'bucket' +backend.endpoint = 'http://127.0.0.1:9000' +backend.path_style = true ``` -Care must be taken to ensure that the [correct][env-variables] `AWS_DEFAULT_REGION`, `AWS_ACCESS_KEY` and `AWS_SECRET_ACCESS_KEY` is set to allow +Care must be taken to ensure that the [correct][env-variables] `AWS_DEFAULT_REGION`, `AWS_ACCESS_KEY` and `AWS_SECRET_ACCESS_KEY` are set to allow the AWS sdk to reach the endpoint. Additional configuration of the MinIO server is required to use [virtual-hosted][virtual-addressing] style addressing by setting the `MINIO_DOMAIN` environment variable. [Path][path-addressing] style addressing can be forced using `path_style = true`. @@ -494,73 +358,110 @@ See the MinIO deployment [example][minio-deployment] for more information on how ### Crypt4GH -There is experimental support for serving [Crypt4GH][c4gh] encrypted files. This can be enabled by compiling with the -`experimental` feature flag. +There is experimental support for serving [Crypt4GH][c4gh] encrypted files. This allows htsget-rs to read Crypt4GH files and serve them encrypted, directly to the client. In the process of serving the data, htsget-rs will decrypt the headers of the Crypt4GH files and re-encrypt them so that the client can read them. When the client receives byte ranges from htsget-rs and concatenates them, the output bytes will be Crypt4GH encrypted, and will need to be decrypted before they can be read. All file formats (BAM, CRAM, VCF, and BCF) are supported using Crypt4GH. -To use this feature, set `location = 'Local'` under `resolvers.storage.keys` to specify the private and public keys: +To use this feature, set `keys.kind = "File"` under the `location` table to specify the private and public keys: -| Option | Description | Type | Default | -|------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------|---------| -| `private_key` | The path to PEM formatted private key which htsget-rs uses to decrypt Crypt4GH data. | Filesystem path | Not Set | -| `recipient_public_key` | The path to the PEM formatted public key which the recipient of the data will use. This is what the client will use to decrypt the returned data, using the corresponding private key. | Filesystem path | Not Set | +| Option | Description | Type | Default | +|-----------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------|---------| +| `private` | The path to PEM formatted private key which htsget-rs uses to decrypt Crypt4GH data. | Filesystem path | Not Set | +| `public` | The path to the PEM formatted public key which the recipient of the data will use. This is what the client will use to decrypt the returned data, using the corresponding private key. | Filesystem path | Not Set | For example: ```toml [[resolvers]] -regex = '.*' -substitution_string = '$0' +regex = ".*" +substitution_string = "$0" -[resolvers.storage.keys] -location = 'Local' -private_key = 'data/c4gh/keys/bob.sec' # pragma: allowlist secret -recipient_public_key = 'data/c4gh/keys/alice.pub' +location.keys.kind = "File" +location.keys.private = "data/c4gh/keys/bob.sec" # pragma: allowlist secret +location.keys.public = "data/c4gh/keys/alice.pub" ``` -Keys can also be retrieved from [AWS Secrets Manager][secrets-manager]. Compile with the `s3-storage` feature flag and specify `location = 'SecretsManager'` under -`resolvers.storage.keys` to fetch keys from Secrets Manager. When using Secrets Manager, the `private_key` and `recipient_public_key` +Keys can also be retrieved from [AWS Secrets Manager][secrets-manager]. Compile with the `s3-storage` feature flag and specify `keys.kind = "SecretsManager"` under +`location` to fetch keys from Secrets Manager. When using Secrets Manager, the `private` and `public` correspond to ARNs or secret names in Secrets Manager storing PEM formatted keys. For example: ```toml -[[resolvers]] -regex = '.*' -substitution_string = '$0' +[[locations]] +regex = ".*" +substitution_string = "$0" -[resolvers.storage.keys] -location = 'SecretsManager' -private_key = 'private_key_secret_name' # pragma: allowlist secret -recipient_public_key = 'public_key_secret_name' +location.keys.kind = "SecretsManager" +location.keys.private = "private_key_secret_name" # pragma: allowlist secret +location.keys.public = "public_key_secret_name" ``` The htsget-rs server expects the Crypt4GH file to end with `.c4gh`, and the index file to be unencrypted. See the [`data/c4gh`][data-c4gh] for examples of file structure. Any of the storage types are supported, i.e. `Local`, `S3`, or `Url`. +### Log formatting + +The `RUST_LOG` variable is read to configure the level that trace logs are emitted. + +For example, the following indicates trace level for all htsget crates, and info level for all other crates: + +```sh +export RUST_LOG='info,htsget_lambda=trace,htsget_lambda=trace,htsget_config=trace,htsget_http=trace,htsget_search=trace,htsget_test=trace' +``` + +See [here][rust-log] for more information on setting this variable. + +The style of formatting can be configured by setting the following option: + +| Option | Description | Type | Default | +|---------------------------------------------------------|--------------------------------------|--------------------------------------------------------|----------| +| `formatting_style` | The style of log formatting to use. | One of `'Full'`, `'Compact'`, `'Pretty'`, or `'Json'` | `'Full'` | + +See [here][formatting-style] for more information on how these values look. + +### Environment variables + +Advanced configuration options also support environment variables. Generally, options separated by `.` in a config file +are separated by `_` in the corresponding environment variable. For example, to set the ticket server allow origins, +use `HTSGET_TICKET_SERVER_CORS_ALLOW_ORIGINS`. It is not recommended to set regex-based locations using environment +variables because the variables needs to contain the nested array structure of storage backends. + ### As a library This crate reads config files and environment variables using [figment], and accepts command-line arguments using clap. The main function for this is `from_config`, -which is used to obtain the `Config` struct. The crate also contains the `regex_resolver` abstraction, which is used for matching a query ID with -regex, and changing it by using a substitution string. +which is used to obtain the `Config` struct. The crate also contains the `resolver` abstraction, which is used for matching a query ID with +regex, and changing it by using a substitution string. Advanced configuration options are specified in the [`advanced.rs`][advanced] submodule. +[advanced]: src/config/advanced/mod.rs [figment]: https://github.com/SergioBenitez/Figment -#### Feature flags +### Feature flags This crate has the following features: -* `s3-storage`: used to enable `S3Storage` functionality. -* `url-storage`: used to enable `UrlStorage` functionality. +* `s3-storage`: used to enable `S3` location functionality. +* `url-storage`: used to enable `Url` location functionality. * `experimental`: used to enable experimental features that aren't necessarily part of the htsget spec, such as Crypt4GH support through `C4GHStorage`. ## License This project is licensed under the [MIT license][license]. +[tracing]: https://github.com/tokio-rs/tracing +[rust-log]: https://rust-lang-nursery.github.io/rust-cookbook/development_tools/debugging/config_log.html +[formatting-style]: https://docs.rs/tracing-subscriber/latest/tracing_subscriber/fmt/index.html#formatters +[examples-config-files]: examples/config-files +[rustls]: https://github.com/rustls/rustls +[htsget-actix]: ../htsget-actix +[htsget-axum]: ../htsget-axum +[htsget-lambda]: ../htsget-lambda +[tracing]: https://github.com/tokio-rs/tracing +[rust-log]: https://rust-lang-nursery.github.io/rust-cookbook/development_tools/debugging/config_log.html +[formatting-style]: https://docs.rs/tracing-subscriber/latest/tracing_subscriber/fmt/index.html#formatters +[service-info]: https://samtools.github.io/hts-specs/htsget.html#ga4gh-service-info [path-addressing]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#path-style-access [env-variables]: https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html [virtual-addressing]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#virtual-hosted-style-access @@ -571,5 +472,5 @@ This project is licensed under the [MIT license][license]. [data-c4gh]: ../data/c4gh [secrets-manager]: https://docs.aws.amazon.com/secretsmanager/latest/userguide/intro.html [id]: https://samtools.github.io/hts-specs/htsget.html#url-parameters -[basic]: examples/config-files/basic.toml -[data-server]: README.md#data-server-config \ No newline at end of file +[toml]: https://toml.io/en/ +[data]: ../data \ No newline at end of file diff --git a/htsget-config/examples/config-files/c4gh.toml b/htsget-config/examples/config-files/c4gh.toml index 4a350fec5..00ccd1e17 100644 --- a/htsget-config/examples/config-files/c4gh.toml +++ b/htsget-config/examples/config-files/c4gh.toml @@ -1,23 +1,23 @@ # An example of running htsget-rs with Crypt4GH enabled. # Run with `cargo run -p htsget-axum --features experimental -- --config htsget-config/examples/config-files/c4gh.toml` -ticket_server_addr = "127.0.0.1:8080" -data_server_addr = "127.0.0.1:8081" +ticket_server.addr = "127.0.0.1:8080" +data_server.addr = "127.0.0.1:8081" [[resolvers]] regex = ".*" substitution_string = "$0" -[resolvers.storage] -backend = 'Local' +[locations.backend] +kind = "File" -[resolvers.storage.keys] -location = "Local" -private_key = "data/c4gh/keys/bob.sec" # pragma: allowlist secret -recipient_public_key = "data/c4gh/keys/alice.pub" +[locations.backend.keys] +kind = "Local" +private = "data/c4gh/keys/bob.sec" # pragma: allowlist secret +public = "data/c4gh/keys/alice.pub" # Or, use AWS secrets manager to store keys. -#[resolvers.storage.keys] -#location = "SecretsManager" -#private_key = "htsget/test_c4gh_private_key" # pragma: allowlist secret -#recipient_public_key = "htsget/test_c4gh_public_key" +#[locations.backend.keys] +#kind = "SecretsManager" +#private = "htsget/test_c4gh_private_key" # pragma: allowlist secret +#public = "htsget/test_c4gh_public_key" diff --git a/htsget-config/examples/config-files/default.toml b/htsget-config/examples/config-files/default.toml index 93ea36080..6845e7905 100644 --- a/htsget-config/examples/config-files/default.toml +++ b/htsget-config/examples/config-files/default.toml @@ -1,40 +1,37 @@ # Config generated by running `cargo run -p htsget-axum -- -p` formatting_style = "Full" -ticket_server_addr = "127.0.0.1:8080" -ticket_server_cors_allow_credentials = false -ticket_server_cors_allow_origins = ["http://localhost:8080"] -ticket_server_cors_allow_headers = "All" -ticket_server_cors_allow_methods = "All" -ticket_server_cors_max_age = 86400 -ticket_server_cors_expose_headers = [] -data_server_enabled = true -data_server_addr = "127.0.0.1:8081" -data_server_local_path = "./" -data_server_serve_at = "" -data_server_cors_allow_credentials = false -data_server_cors_allow_origins = ["http://localhost:8080"] -data_server_cors_allow_headers = "All" -data_server_cors_allow_methods = "All" -data_server_cors_max_age = 86400 -data_server_cors_expose_headers = [] - -[[resolvers]] -regex = ".*" -substitution_string = "$0" -storage = "Local" - -[resolvers.allow_guard] -allow_reference_names = "All" -allow_fields = "All" -allow_tags = "All" -allow_formats = [ - "BAM", - "CRAM", - "VCF", - "BCF", -] -allow_classes = [ - "body", - "header", -] + +[ticket_server] +addr = "127.0.0.1:8080" + +[ticket_server.cors] +allow_credentials = false +allow_origins = "Mirror" +allow_headers = "Mirror" +allow_methods = "Mirror" +max_age = 2592000 +expose_headers = "All" + +[data_server] +addr = "127.0.0.1:8081" +local_path = "./" + +[data_server.cors] +allow_credentials = false +allow_origins = "Mirror" +allow_headers = "Mirror" +allow_methods = "Mirror" +max_age = 2592000 +expose_headers = "All" + +[service_info] + +[[locations]] +prefix = "" + +[locations.backend] +kind = "File" +scheme = "HTTP" +authority = "127.0.0.1:8081" +local_path = "./" diff --git a/htsget-config/examples/config-files/s3_storage.toml b/htsget-config/examples/config-files/s3_storage.toml index 5cde4dffe..7948e4576 100644 --- a/htsget-config/examples/config-files/s3_storage.toml +++ b/htsget-config/examples/config-files/s3_storage.toml @@ -1,19 +1,18 @@ # An example for a server which uses s3 storage with data located in "bucket". # Run with `cargo run -p htsget-axum --features s3-storage -- --config htsget-config/examples/config-files/s3_storage.toml` -ticket_server_cors_allow_headers = "All" -ticket_server_cors_allow_methods = "All" -ticket_server_cors_allow_credentials = true -ticket_server_cors_max_age = 300 +ticket_server.cors.allow_headers = "All" +ticket_server.cors.allow_methods = "All" +ticket_server.cors.allow_credentials = true +ticket_server.cors.max_age = 300 -data_server_enabled = false +data_server = "None" -[[resolvers]] -regex = '^(bucket)/(?P.*)$' -substitution_string = '$key' -storage.backend = 'S3' +[[locations]] +regex = "^(bucket)/(?P.*)$" +substitution_string = "$key" +backend.kind = "S3" # Or, set the bucket manually -#[resolvers.storage] -#backend = 'S3' -#bucket = 'bucket' +#backend.kind = "S3" +#backend.bucket = "bucket" diff --git a/htsget-config/examples/config-files/tls_data_server.toml b/htsget-config/examples/config-files/tls_data_server.toml index d2e4316e5..5d8987d85 100644 --- a/htsget-config/examples/config-files/tls_data_server.toml +++ b/htsget-config/examples/config-files/tls_data_server.toml @@ -1,16 +1,13 @@ # An example config file for a TLS data server that uses a local storage backend. # Run with `cargo run -p htsget-axum -- --config htsget-config/examples/config-files/tls_data_server.toml` -ticket_server_addr = "0.0.0.0:8080" -data_server_addr = "0.0.0.0:8081" -data_server_cors_allow_origins = "All" -data_server_tls.cert = "cert.pem" -data_server_tls.key = "key.pem" +ticket_server.addr = "0.0.0.0:8080" +data_server.addr = "0.0.0.0:8081" +data_server.cors.allow_origins = "All" +data_server.tls.cert = "cert.pem" +data_server.tls.key = "key.pem" -[[resolvers]] +[[locations]] regex = ".*" substitution_string = "$0" - -[resolvers.storage] -backend = 'Local' -use_data_server_config = true +backend.kind = "Local" diff --git a/htsget-config/examples/config-files/tls_ticket_server.toml b/htsget-config/examples/config-files/tls_ticket_server.toml index 9bd196ffa..d73bb5968 100644 --- a/htsget-config/examples/config-files/tls_ticket_server.toml +++ b/htsget-config/examples/config-files/tls_ticket_server.toml @@ -1,16 +1,14 @@ # An example config file for a TLS ticket server that uses S3 as a storage backend. # Run with `cargo run -p htsget-axum --features s3-storage -- --config htsget-config/examples/config-files/tls_ticket_server.toml` -ticket_server_addr = "0.0.0.0:8080" -ticket_server_cors_allow_origins = "All" -ticket_server_tls.cert = "cert.pem" -ticket_server_tls.key = "key.pem" -data_server_addr = "0.0.0.0:8081" +ticket_server.addr = "0.0.0.0:8080" +ticket_server.cors_allow_origins = "All" +ticket_server.tls.cert = "cert.pem" +ticket_server.tls.key = "key.pem" +data_server.addr = "0.0.0.0:8081" -[[resolvers]] +[[locations]] regex = ".*" substitution_string = "$0" - -[resolvers.storage] -backend = 'S3' -bucket = "bucket" +backend.kind = "S3" +backend.bucket = "bucket" diff --git a/htsget-config/examples/config-files/url_storage.toml b/htsget-config/examples/config-files/url_storage.toml index 372b00800..078a41b4c 100644 --- a/htsget-config/examples/config-files/url_storage.toml +++ b/htsget-config/examples/config-files/url_storage.toml @@ -3,27 +3,26 @@ # `cargo run -p htsget-axum --features url-storage -- --config htsget-config/examples/config-files/url_storage.toml` # in the project directory. -ticket_server_addr = "127.0.0.1:8082" -ticket_server_cors_allow_origins = "All" +ticket_server.addr = "127.0.0.1:8082" +ticket_server.cors.allow_origins = "All" -ticket_server_cert = "cert.pem" -ticket_server_key = "key.pem" +ticket_server.cert = "cert.pem" +ticket_server.key = "key.pem" -data_server_enabled = false +data_server = "None" -[[resolvers]] +[[locations]] regex = ".*" substitution_string = "$0" -[resolvers.storage] -backend = 'Url' -url = "http://127.0.0.1:8081" -response_url = "https://127.0.0.1:8081" -forward_headers = true +backend.kind = "Url" +backend.url = "http://127.0.0.1:8081" +backend.response_url = "https://127.0.0.1:8081" +backend.forward_headers = true # Set client authentication -#tls.key = "key.pem" -#tls.cert = "cert.pem" +#backend.tls.key = "key.pem" +#backend.tls.cert = "cert.pem" # Set root certificates -#tls.root_store = "cert.pem" +#backend.tls.root_store = "cert.pem" diff --git a/htsget-config/src/storage/local.rs b/htsget-config/src/storage/local.rs deleted file mode 100644 index a514d916f..000000000 --- a/htsget-config/src/storage/local.rs +++ /dev/null @@ -1,180 +0,0 @@ -use std::str::FromStr; - -use http::uri::Authority; -use serde::{Deserialize, Serialize}; - -use crate::config::{default_localstorage_addr, default_path, DataServerConfig}; -#[cfg(feature = "experimental")] -use crate::storage::c4gh::C4GHKeys; -use crate::tls::KeyPairScheme; -use crate::types::Scheme; - -pub(crate) fn default_authority() -> Authority { - Authority::from_static(default_localstorage_addr()) -} - -fn default_local_path() -> String { - default_path().into() -} - -#[derive(Serialize, Deserialize, Debug, Clone)] -#[serde(default)] -pub struct Local { - scheme: Scheme, - #[serde(with = "http_serde::authority")] - authority: Authority, - local_path: String, - path_prefix: String, - use_data_server_config: bool, - #[serde(skip_serializing)] - #[cfg(feature = "experimental")] - keys: Option, -} - -impl Local { - /// Create a new local storage. - pub fn new( - scheme: Scheme, - authority: Authority, - local_path: String, - path_prefix: String, - use_data_server_config: bool, - ) -> Self { - Self { - scheme, - authority, - local_path, - path_prefix, - use_data_server_config, - #[cfg(feature = "experimental")] - keys: None, - } - } - - /// Get the scheme. - pub fn scheme(&self) -> Scheme { - self.scheme - } - - /// Get the authority. - pub fn authority(&self) -> &Authority { - &self.authority - } - - /// Get the local path. - pub fn local_path(&self) -> &str { - &self.local_path - } - - /// Get the path prefix. - pub fn path_prefix(&self) -> &str { - &self.path_prefix - } - - /// Get whether config should be inherited from the data server config. - pub fn use_data_server_config(&self) -> bool { - self.use_data_server_config - } - - #[cfg(feature = "experimental")] - /// Set the C4GH keys. - pub fn set_keys(mut self, keys: Option) -> Self { - self.keys = keys; - self - } - - #[cfg(feature = "experimental")] - /// Get the C4GH keys. - pub fn keys(&self) -> Option<&C4GHKeys> { - self.keys.as_ref() - } -} - -impl Default for Local { - fn default() -> Self { - Self::new( - Scheme::Http, - default_authority(), - default_local_path(), - Default::default(), - false, - ) - } -} - -impl From<&DataServerConfig> for Local { - fn from(config: &DataServerConfig) -> Self { - Self::new( - config.tls().get_scheme(), - Authority::from_str(&config.addr().to_string()).expect("expected valid authority"), - config.local_path().to_string_lossy().to_string(), - config.serve_at().to_string(), - true, - ) - } -} - -#[cfg(test)] -mod tests { - use std::net::SocketAddr; - use std::path::PathBuf; - - use crate::config::cors::CorsConfig; - use crate::config::tests::test_config_from_file; - use crate::storage::Storage; - use crate::types::Scheme::Http; - - use super::*; - - #[test] - fn config_storage_local_file() { - test_config_from_file( - r#" - [[resolvers]] - regex = "regex" - - [resolvers.storage] - backend = "Local" - local_path = "path" - scheme = "HTTPS" - path_prefix = "path" - "#, - |config| { - println!("{:?}", config.resolvers().first().unwrap().storage()); - assert!(matches!( - config.resolvers().first().unwrap().storage(), - Storage::Local(local_storage) if local_storage.local_path() == "path" && local_storage.scheme() == Scheme::Https && local_storage.path_prefix() == "path" - )); - }, - ); - } - - #[test] - fn local_storage_from_data_server_config() { - let data_server_config = DataServerConfig::new( - true, - SocketAddr::from_str("127.0.0.1:8080").unwrap(), - PathBuf::from("data"), - "/data".to_string(), - None, - CorsConfig::default(), - ); - let result: Local = (&data_server_config).into(); - let expected = Local::new( - Http, - Authority::from_static("127.0.0.1:8080"), - "data".to_string(), - "/data".to_string(), - true, - ); - - assert_eq!(result.scheme(), expected.scheme()); - assert_eq!(result.authority(), expected.authority()); - assert_eq!(result.local_path(), expected.local_path()); - assert_eq!(result.path_prefix(), expected.path_prefix()); - assert_eq!( - result.use_data_server_config(), - expected.use_data_server_config() - ); - } -} From 26844d6d98b4b4887a75500f097deb36516f367b Mon Sep 17 00:00:00 2001 From: Marko Malenic Date: Mon, 6 Jan 2025 10:41:42 +1100 Subject: [PATCH 4/5] docs(config): ensure all examples and example files are working --- htsget-config/README.md | 29 ++++++++++------- .../examples/config-files/basic.toml | 16 ++-------- htsget-config/examples/config-files/c4gh.toml | 24 ++++++-------- .../examples/config-files/s3_storage.toml | 12 +++---- .../config-files/tls_data_server.toml | 12 ++++--- .../config-files/tls_ticket_server.toml | 13 +++++--- .../examples/config-files/url_storage.toml | 31 ++++++++++--------- 7 files changed, 69 insertions(+), 68 deletions(-) diff --git a/htsget-config/README.md b/htsget-config/README.md index 560448f5e..6ed67398b 100644 --- a/htsget-config/README.md +++ b/htsget-config/README.md @@ -49,7 +49,7 @@ locations = "https://example.com" Multiple locations can be specified by providing a list and an id prefix after the location: ```toml -locations = ["file://data/bam", "file://data/cram"] +locations = [ "file://data/bam", "file://data/cram" ] ``` This allows htsget-rs to serve data only when the request also contains the prefix: @@ -63,7 +63,7 @@ Locations can be mixed, and don't all need to have the same directory or resourc ```toml data_server.local_path = "root" -locations = ["file://dir_two/bam", "file://dir_one/cram", "s3://bucket/vcf"] +locations = [ "file://dir_two/bam", "file://dir_one/cram", "s3://bucket/vcf" ] ``` htsget-rs spawns a separate server process to respond to htsget tickets for file locations, @@ -184,7 +184,7 @@ data_server.addr = "127.0.0.1:8000" regex = ".*" substitution_string = "$0" -backend.kind = "Local" +backend.kind = "File" backend.scheme = "Http" backend.authority = "127.0.0.1:8000" backend.local_path = "path" @@ -298,6 +298,7 @@ regex = ".*" substitution_string = "$0" backend.kind = "Url" +backend.url = "https://example.com" backend.tls.root_store = "root.crt" ``` @@ -312,10 +313,10 @@ An example of generating a custom root CA and certificates for a `Url` backend: openssl req -x509 -noenc -subj '/CN=localhost' -newkey rsa -keyout root.key -out root.crt # Create a certificate signing request -openssl req -noenc -newkey rsa -keyout server.key -out server.csr -subj '/CN=localhost' -addext subjectAltName=DNS:localhost +openssl req -noenc -newkey rsa -keyout key.pem -out server.csr -subj '/CN=localhost' -addext subjectAltName=DNS:localhost # Create the `Url` server's certificate -openssl x509 -req -in server.csr -CA root.crt -CAkey root.key -days 365 -out server.crt -copy_extensions copy +openssl x509 -req -in server.csr -CA root.crt -CAkey root.key -days 365 -out cert.pem -copy_extensions copy # An additional client certificate signing request and certificate can be created in the same way as the server # certificate if using client authentication. @@ -375,13 +376,15 @@ To use this feature, set `keys.kind = "File"` under the `location` table to spec For example: ```toml -[[resolvers]] +[[locations]] regex = ".*" substitution_string = "$0" -location.keys.kind = "File" -location.keys.private = "data/c4gh/keys/bob.sec" # pragma: allowlist secret -location.keys.public = "data/c4gh/keys/alice.pub" +backend.kind = "File" + +backend.keys.kind = "File" +backend.keys.private = "data/c4gh/keys/bob.sec" # pragma: allowlist secret +backend.keys.public = "data/c4gh/keys/alice.pub" ``` Keys can also be retrieved from [AWS Secrets Manager][secrets-manager]. Compile with the `s3-storage` feature flag and specify `keys.kind = "SecretsManager"` under @@ -395,9 +398,11 @@ For example: regex = ".*" substitution_string = "$0" -location.keys.kind = "SecretsManager" -location.keys.private = "private_key_secret_name" # pragma: allowlist secret -location.keys.public = "public_key_secret_name" +backend.kind = "File" + +backend.keys.kind = "SecretsManager" +backend.keys.private = "private_key_secret_name" # pragma: allowlist secret +backend.keys.public = "public_key_secret_name" ``` The htsget-rs server expects the Crypt4GH file to end with `.c4gh`, and the index file to be unencrypted. See the [`data/c4gh`][data-c4gh] for examples of file structure. diff --git a/htsget-config/examples/config-files/basic.toml b/htsget-config/examples/config-files/basic.toml index 49457147c..5d73e70dd 100644 --- a/htsget-config/examples/config-files/basic.toml +++ b/htsget-config/examples/config-files/basic.toml @@ -1,18 +1,8 @@ # An example of running htsget-rs. -# Run with `cargo run --all-features -- --config htsget-config/examples/config-files/basic.toml` +# Run with `cargo run -p htsget-axum --all-features -- --config htsget-config/examples/config-files/basic.toml` ticket_server_addr = "127.0.0.1:8080" data_server_addr = "127.0.0.1:8081" -# Serve data locally from the `data` directory. -[[resolvers]] -regex = '.*' -substitution_string = '$0' -storage.backend = 'Local' - -# Serve data from S3 if the id is prefixed with `example_bucket`. -[[resolvers]] -regex = '^(example_bucket)/(?P.*)$' -substitution_string = '$key' -storage.backend = 'S3' -# Uses the first capture group in the regex as the bucket. +# Serve data locally from the `data` directory or from an S3 bucket called `bucket` depending on the prefix. +locations = [ "file://data/file_prefix", "s3://bucket/s3_prefix" ] diff --git a/htsget-config/examples/config-files/c4gh.toml b/htsget-config/examples/config-files/c4gh.toml index 00ccd1e17..84645ee16 100644 --- a/htsget-config/examples/config-files/c4gh.toml +++ b/htsget-config/examples/config-files/c4gh.toml @@ -1,23 +1,19 @@ # An example of running htsget-rs with Crypt4GH enabled. -# Run with `cargo run -p htsget-axum --features experimental -- --config htsget-config/examples/config-files/c4gh.toml` +# Run with `cargo run -p htsget-axum --features experimental,s3-storage -- --config htsget-config/examples/config-files/c4gh.toml` ticket_server.addr = "127.0.0.1:8080" data_server.addr = "127.0.0.1:8081" -[[resolvers]] +[[locations]] regex = ".*" substitution_string = "$0" +backend.kind = "File" -[locations.backend] -kind = "File" +backend.keys.kind = "File" +backend.keys.private = "data/c4gh/keys/bob.sec" # pragma: allowlist secret +backend.keys.public = "data/c4gh/keys/alice.pub" -[locations.backend.keys] -kind = "Local" -private = "data/c4gh/keys/bob.sec" # pragma: allowlist secret -public = "data/c4gh/keys/alice.pub" - -# Or, use AWS secrets manager to store keys. -#[locations.backend.keys] -#kind = "SecretsManager" -#private = "htsget/test_c4gh_private_key" # pragma: allowlist secret -#public = "htsget/test_c4gh_public_key" +## Or, use AWS secrets manager to store keys. +#backend.keys.kind = "SecretsManager" +#backend.keys.private = "htsget/test_c4gh_private_key" # pragma: allowlist secret +#backend.keys.public = "htsget/test_c4gh_public_key" diff --git a/htsget-config/examples/config-files/s3_storage.toml b/htsget-config/examples/config-files/s3_storage.toml index 7948e4576..5b7b123ca 100644 --- a/htsget-config/examples/config-files/s3_storage.toml +++ b/htsget-config/examples/config-files/s3_storage.toml @@ -3,16 +3,16 @@ ticket_server.cors.allow_headers = "All" ticket_server.cors.allow_methods = "All" -ticket_server.cors.allow_credentials = true +ticket_server.cors.allow_credentials = false ticket_server.cors.max_age = 300 data_server = "None" -[[locations]] -regex = "^(bucket)/(?P.*)$" -substitution_string = "$key" -backend.kind = "S3" +locations = "s3://bucket" -# Or, set the bucket manually +## Or, set using regex locations +#[[locations]] +#regex = "^(bucket)/(?P.*)$" +#substitution_string = "$key" #backend.kind = "S3" #backend.bucket = "bucket" diff --git a/htsget-config/examples/config-files/tls_data_server.toml b/htsget-config/examples/config-files/tls_data_server.toml index 5d8987d85..e7274323c 100644 --- a/htsget-config/examples/config-files/tls_data_server.toml +++ b/htsget-config/examples/config-files/tls_data_server.toml @@ -7,7 +7,11 @@ data_server.cors.allow_origins = "All" data_server.tls.cert = "cert.pem" data_server.tls.key = "key.pem" -[[locations]] -regex = ".*" -substitution_string = "$0" -backend.kind = "Local" +locations = "file://data" + +## Or, set using regex locations +#[[locations]] +#regex = ".*" +#substitution_string = "$0" +#backend.kind = "File" +#backend.local_path = "data" diff --git a/htsget-config/examples/config-files/tls_ticket_server.toml b/htsget-config/examples/config-files/tls_ticket_server.toml index d73bb5968..d5600d7bc 100644 --- a/htsget-config/examples/config-files/tls_ticket_server.toml +++ b/htsget-config/examples/config-files/tls_ticket_server.toml @@ -7,8 +7,11 @@ ticket_server.tls.cert = "cert.pem" ticket_server.tls.key = "key.pem" data_server.addr = "0.0.0.0:8081" -[[locations]] -regex = ".*" -substitution_string = "$0" -backend.kind = "S3" -backend.bucket = "bucket" +locations = "s3://bucket" + +## Or, set using regex locations +#[[locations]] +#regex = ".*" +#substitution_string = "$0" +#backend.kind = "S3" +#backend.bucket = "bucket" diff --git a/htsget-config/examples/config-files/url_storage.toml b/htsget-config/examples/config-files/url_storage.toml index 078a41b4c..b25a16ef6 100644 --- a/htsget-config/examples/config-files/url_storage.toml +++ b/htsget-config/examples/config-files/url_storage.toml @@ -6,23 +6,26 @@ ticket_server.addr = "127.0.0.1:8082" ticket_server.cors.allow_origins = "All" -ticket_server.cert = "cert.pem" -ticket_server.key = "key.pem" +ticket_server.tls.cert = "cert.pem" +ticket_server.tls.key = "key.pem" data_server = "None" -[[locations]] -regex = ".*" -substitution_string = "$0" +locations = "https://127.0.0.1:8081" -backend.kind = "Url" -backend.url = "http://127.0.0.1:8081" -backend.response_url = "https://127.0.0.1:8081" -backend.forward_headers = true - -# Set client authentication +## Or, set using regex locations +#[[locations]] +#regex = ".*" +#substitution_string = "$0" +# +#backend.kind = "Url" +#backend.url = "http://127.0.0.1:8081" +#backend.response_url = "https://127.0.0.1:8081" +#backend.forward_headers = true +# +## Set client authentication #backend.tls.key = "key.pem" #backend.tls.cert = "cert.pem" - -# Set root certificates -#backend.tls.root_store = "cert.pem" +# +## Set root certificates +#backend.tls.root_store = "root.crt" From 95b3c9ba4d7393223d1f7f37fec2d78fff00aca4 Mon Sep 17 00:00:00 2001 From: Marko Malenic Date: Mon, 6 Jan 2025 10:44:15 +1100 Subject: [PATCH 5/5] fix(config): locations prefix should not be appended to the id so that there is not a double prefix --- htsget-config/src/resolver.rs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/htsget-config/src/resolver.rs b/htsget-config/src/resolver.rs index aebdaf948..4638814ca 100644 --- a/htsget-config/src/resolver.rs +++ b/htsget-config/src/resolver.rs @@ -7,7 +7,6 @@ use crate::storage; use crate::storage::{Backend, ResolvedId}; use crate::types::{Query, Response, Result}; use async_trait::async_trait; -use std::path::PathBuf; use tracing::instrument; /// A trait which matches the query id, replacing the match in the substitution text. @@ -83,12 +82,7 @@ impl IdResolver for LocationEither { match self { LocationEither::Simple(location) => { if query.id().starts_with(location.prefix()) { - return Some(ResolvedId::new( - PathBuf::from(location.prefix()) - .join(query.id()) - .to_str()? - .to_string(), - )); + return Some(ResolvedId::new(query.id().to_string())); } } LocationEither::Regex(regex_location) => { @@ -380,7 +374,7 @@ mod tests { .resolve_id(&Query::new_with_default_request("id-1", Bam)) .unwrap() .into_inner(), - "id-1/id-1" + "id-1" ); assert_eq!( resolver @@ -388,7 +382,7 @@ mod tests { .resolve_id(&Query::new_with_default_request("id-2", Bam)) .unwrap() .into_inner(), - "id-2/id-2" + "id-2" ); }