Skip to content

Commit

Permalink
Improve handling of operator-related errors (metalbear-co#2063)
Browse files Browse the repository at this point in the history
* Abort on error, suggest config change

* General http error -> request build failed

* Fixed fetching crds

* Error message improved

* Changelog entry

* Update mirrord/cli/src/error.rs

Co-authored-by: Eyal Bukchin <[email protected]>

* Update mirrord/cli/src/error.rs

Co-authored-by: Eyal Bukchin <[email protected]>

---------

Co-authored-by: Eyal Bukchin <[email protected]>
  • Loading branch information
Razz4780 and eyalb181 authored Nov 23, 2023
1 parent e7c52ba commit 451edb3
Show file tree
Hide file tree
Showing 5 changed files with 188 additions and 166 deletions.
1 change: 1 addition & 0 deletions changelog.d/2049.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improved handling of operator-related errors.
145 changes: 55 additions & 90 deletions mirrord/cli/src/connection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@ use mirrord_analytics::AnalyticsReporter;
use mirrord_config::{feature::network::outgoing::OutgoingFilterConfig, LayerConfig};
use mirrord_intproxy::agent_conn::AgentConnectInfo;
use mirrord_kube::api::{kubernetes::KubernetesAPI, AgentManagment};
use mirrord_operator::client::{OperatorApi, OperatorApiError, OperatorSessionConnection};
use mirrord_operator::client::OperatorApi;
use mirrord_progress::Progress;
use mirrord_protocol::{ClientMessage, DaemonMessage};
use tokio::sync::mpsc;
use tracing::{debug, trace};

use crate::{CliError, Result};

Expand All @@ -17,57 +16,6 @@ pub(crate) struct AgentConnection {
pub receiver: mpsc::Receiver<DaemonMessage>,
}

pub(crate) async fn create_operator_session<P>(
config: &LayerConfig,
progress: &P,
analytics: &mut AnalyticsReporter,
) -> Result<Option<OperatorSessionConnection>, CliError>
where
P: Progress + Send + Sync,
{
let mut sub_progress = progress.subtask("checking operator");

match OperatorApi::create_session(config, progress, analytics).await {
Ok(Some(session)) => {
sub_progress.success(Some("connected to operator"));
Ok(Some(session))
}
Ok(None) => {
sub_progress.success(Some("no operator detected"));

Ok(None)
}
Err(OperatorApiError::ConcurrentStealAbort) => {
sub_progress.failure(Some("operator concurrent port steal lock"));

Err(CliError::OperatorConcurrentSteal)
}
Err(OperatorApiError::UnsupportedFeature {
feature,
operator_version,
}) => {
sub_progress.failure(Some("unsupported operator feature"));

Err(CliError::FeatureNotSupportedInOperatorError {
feature,
operator_version,
})
}
Err(err) => {
sub_progress.failure(Some(
"unable to check if operator exists, probably due to RBAC",
));

trace!(
"{}",
miette::Error::from(CliError::OperatorConnectionFailed(err))
);

Ok(None)
}
}
}

/// Creates an agent if needed then connects to it.
pub(crate) async fn create_and_connect<P>(
config: &LayerConfig,
Expand All @@ -89,49 +37,66 @@ where
}
}

if config.operator && let Some(session) = create_operator_session(config, progress, analytics).await? {
Ok((
AgentConnectInfo::Operator(session.info),
AgentConnection { sender: session.tx, receiver: session.rx },
))
} else {
if config.feature.copy_target.enabled {
return Err(CliError::FeatureRequiresOperatorError("copy target".into()));
}

if matches!(config.target, mirrord_config::target::TargetConfig{ path: Some(mirrord_config::target::Target::Deployment{..}), ..}) {
// This is CLI Only because the extensions also implement this check with better messaging.
progress.print( "When targeting multi-pod deployments, mirrord impersonates the first pod in the deployment.");
progress.print("Support for multi-pod impersonation requires the mirrord operator, which is part of mirrord for Teams.");
progress.print("To try it out, join the waitlist with `mirrord waitlist <email address>`, or at this link: https://metalbear.co/#waitlist-form");
if config.operator {
let mut subtask = progress.subtask("checking operator");

match OperatorApi::create_session(config, &subtask, analytics).await? {
Some(session) => {
subtask.success(Some("connected to the operator"));
return Ok((
AgentConnectInfo::Operator(session.info),
AgentConnection {
sender: session.tx,
receiver: session.rx,
},
));
}
None => subtask.success(Some("no operator detected")),
}
}

let k8s_api = KubernetesAPI::create(config)
.await
.map_err(CliError::KubernetesApiFailed)?;
if config.feature.copy_target.enabled {
return Err(CliError::FeatureRequiresOperatorError("copy target".into()));
}

let _ = k8s_api.detect_openshift(progress).await.map_err(|err| {
debug!("couldn't determine OpenShift: {err}");
});
if matches!(
config.target,
mirrord_config::target::TargetConfig {
path: Some(mirrord_config::target::Target::Deployment { .. }),
..
}
) {
// This is CLI Only because the extensions also implement this check with better messaging.
progress.print( "When targeting multi-pod deployments, mirrord impersonates the first pod in the deployment.");
progress.print("Support for multi-pod impersonation requires the mirrord operator, which is part of mirrord for Teams.");
progress.print("To try it out, join the waitlist with `mirrord waitlist <email address>`, or at this link: https://metalbear.co/#waitlist-form");
}

let agent_connect_info = tokio::time::timeout(
Duration::from_secs(config.agent.startup_timeout),
k8s_api.create_agent(progress, Some(config)),
)
let k8s_api = KubernetesAPI::create(config)
.await
.map_err(|_| CliError::AgentReadyTimeout)?
.map_err(CliError::CreateAgentFailed)?;

let (sender, receiver) = k8s_api
.create_connection(agent_connect_info.clone())
.await
.map_err(CliError::AgentConnectionFailed)?;
.map_err(CliError::KubernetesApiFailed)?;

let _ = k8s_api.detect_openshift(progress).await.map_err(|err| {
tracing::debug!("couldn't determine OpenShift: {err}");
});

let agent_connect_info = tokio::time::timeout(
Duration::from_secs(config.agent.startup_timeout),
k8s_api.create_agent(progress, Some(config)),
)
.await
.map_err(|_| CliError::AgentReadyTimeout)?
.map_err(CliError::CreateAgentFailed)?;

let (sender, receiver) = k8s_api
.create_connection(agent_connect_info.clone())
.await
.map_err(CliError::AgentConnectionFailed)?;

Ok((
AgentConnectInfo::DirectKubernetes(agent_connect_info),
AgentConnection { sender, receiver },
))
}
Ok((
AgentConnectInfo::DirectKubernetes(agent_connect_info),
AgentConnection { sender, receiver },
))
}

pub const AGENT_CONNECT_INFO_ENV_KEY: &str = "MIRRORD_AGENT_CONNECT_INFO";
45 changes: 41 additions & 4 deletions mirrord/cli/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use miette::Diagnostic;
use mirrord_console::error::ConsoleError;
use mirrord_intproxy::error::IntProxyError;
use mirrord_kube::error::KubeApiError;
use mirrord_operator::client::OperatorApiError;
use mirrord_operator::client::{HttpError, OperatorApiError};
use thiserror::Error;

pub(crate) type Result<T, E = CliError> = miette::Result<T, E>;
Expand Down Expand Up @@ -43,19 +43,26 @@ pub(crate) enum InternalProxySetupError {

#[derive(Debug, Error, Diagnostic)]
pub(crate) enum CliError {
#[error("Failed to connect to the operator. We have found the operator and unable to connect to it. {0:#?}")]
#[error("Failed to connect to the operator, probably due to RBAC: {0}")]
#[diagnostic(help(
r#"
Please check the following:
1. The operator is running and the logs are not showing any errors.
2. You have sufficient permissions to port forward to the operator.
If you want to run without the operator, please set the following in the mirrord configuration file:
{{
"operator": false
}}
Please remember that some features are supported only when using mirrord operator (https://mirrord.dev/docs/teams/introduction/#supported-features).
{GENERAL_HELP}"#
))]
OperatorConnectionFailed(#[from] OperatorApiError),
OperatorConnectionFailed(String),
#[error("Failed to connect to the operator. Someone else is stealing traffic from the requested target")]
#[diagnostic(help(
r#"
If you want to run anyway please set the following:
If you want to run anyway, please set the following:
{{
"feature": {{
Expand Down Expand Up @@ -237,8 +244,38 @@ pub(crate) enum CliError {
))]
FeatureRequiresOperatorError(String),
#[error("Feature `{feature}` is not supported in mirrord operator {operator_version}.")]
#[diagnostic(help("{GENERAL_HELP}"))]
FeatureNotSupportedInOperatorError {
feature: String,
operator_version: String,
},
#[error("Selected mirrord target is not valid: {0}")]
#[diagnostic(help("{GENERAL_HELP}"))]
InvalidTargetError(String),
#[error("Failed to build a websocket connect request: {0:#?}")]
#[diagnostic(help(
r#"This is a bug. Please report it in our Discord or GitHub repository. {GENERAL_HELP}"#
))]
ConnectRequestBuildError(HttpError),
}

impl From<OperatorApiError> for CliError {
fn from(value: OperatorApiError) -> Self {
match value {
OperatorApiError::ConcurrentStealAbort => Self::OperatorConcurrentSteal,
OperatorApiError::UnsupportedFeature {
feature,
operator_version,
} => Self::FeatureNotSupportedInOperatorError {
feature,
operator_version,
},
OperatorApiError::CreateApiError(e) => Self::KubernetesApiFailed(e),
OperatorApiError::InvalidTarget { reason } => Self::InvalidTargetError(reason),
OperatorApiError::ConnectRequestBuildError(e) => Self::ConnectRequestBuildError(e),
OperatorApiError::KubeError { error, operation } => {
Self::OperatorConnectionFailed(format!("{operation} failed: {error}"))
}
}
}
}
10 changes: 6 additions & 4 deletions mirrord/cli/src/operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use mirrord_config::{
config::{ConfigContext, MirrordConfig},
LayerFileConfig,
};
use mirrord_kube::{api::kubernetes::create_kube_api, error::KubeApiError};
use mirrord_kube::api::kubernetes::create_kube_api;
use mirrord_operator::{
client::OperatorApiError,
crd::{LicenseInfoOwned, MirrordOperatorCrd, MirrordOperatorSpec, OPERATOR_STATUS_NAME},
Expand Down Expand Up @@ -135,9 +135,11 @@ async fn operator_status(config: Option<String>) -> Result<()> {
let mirrord_status = match status_api
.get(OPERATOR_STATUS_NAME)
.await
.map_err(KubeApiError::KubeError)
.map_err(OperatorApiError::KubeApiError)
.map_err(CliError::OperatorConnectionFailed)
.map_err(|error| OperatorApiError::KubeError {
error,
operation: "getting operator status".into(),
})
.map_err(CliError::from)
{
Ok(status) => status,
Err(err) => {
Expand Down
Loading

0 comments on commit 451edb3

Please sign in to comment.