Skip to content

Commit

Permalink
Report setup failures in a single section
Browse files Browse the repository at this point in the history
Also re-work logging in affected places a bit.

CMK-18277
  • Loading branch information
jherbel committed Jul 15, 2024
1 parent b5d0d60 commit 8d37f86
Show file tree
Hide file tree
Showing 8 changed files with 319 additions and 260 deletions.
33 changes: 24 additions & 9 deletions src/bin/scheduler/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use clap::Parser;
use log::info;
use logging::log_and_return_error;
use robotmk::lock::Locker;
use robotmk::results::SchedulerPhase;
use robotmk::results::{SchedulerPhase, SetupFailure, SetupFailures};
use robotmk::section::WriteSection;
use robotmk::termination::Cancelled;
use std::time::Duration;
Expand Down Expand Up @@ -46,16 +46,12 @@ fn run() -> AnyhowResult<()> {
bail!("Terminated")
}

let plans = setup::general::setup(&global_config, plans).context("General setup failed")?;
let (plans, general_setup_failures) =
setup::general::setup(&global_config, plans).context("General setup failed")?;
info!("General setup completed");

write_phase(&SchedulerPhase::ManagedRobots, &global_config)?;
let plans = setup::unpack_managed::setup(
&global_config.results_directory,
&global_config.results_directory_locker,
plans,
)
.context("Writing robotmk_management_errors section failed")?;
let (plans, unpacking_managed_failures) = setup::unpack_managed::setup(plans);
info!("Managed robot setup completed");

if let Some(grace_period) = args.grace_period {
Expand All @@ -67,9 +63,18 @@ fn run() -> AnyhowResult<()> {
}

write_phase(&SchedulerPhase::RCCSetup, &global_config)?;
let plans = setup::rcc::setup(&global_config, plans).context("RCC-specific setup failed")?;
let (plans, rcc_setup_failures) =
setup::rcc::setup(&global_config, plans).context("RCC-specific setup failed")?;
info!("RCC-specific setup completed");

write_setup_failures(
general_setup_failures
.into_iter()
.chain(unpacking_managed_failures)
.chain(rcc_setup_failures),
&global_config,
)?;

if global_config.cancellation_token.is_cancelled() {
bail!("Terminated")
}
Expand Down Expand Up @@ -98,6 +103,16 @@ fn write_phase(
)
}

fn write_setup_failures(
failures: impl Iterator<Item = SetupFailure>,
global_config: &internal_config::GlobalConfig,
) -> AnyhowResult<()> {
SetupFailures(failures.collect()).write(
global_config.results_directory.join("setup_failures.json"),
&global_config.results_directory_locker,
)
}

#[tokio::main]
async fn await_grace_period(
grace_period: u64,
Expand Down
170 changes: 111 additions & 59 deletions src/bin/scheduler/setup/general.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@ use crate::internal_config::{sort_plans_by_grouping, GlobalConfig, Plan, Source}

use anyhow::{anyhow, Context, Result as AnyhowResult};
use camino::{Utf8Path, Utf8PathBuf};
use log::info;
use log::error;
use robotmk::environment::Environment;
use robotmk::results::{plan_results_directory, GeneralSetupFailures};
use robotmk::section::WriteSection;
use std::collections::{HashMap, HashSet};
use robotmk::results::{plan_results_directory, SetupFailure};
use std::collections::HashSet;
use std::fs::{create_dir_all, remove_dir_all, remove_file};

pub fn setup(global_config: &GlobalConfig, plans: Vec<Plan>) -> AnyhowResult<Vec<Plan>> {
pub fn setup(
global_config: &GlobalConfig,
plans: Vec<Plan>,
) -> AnyhowResult<(Vec<Plan>, Vec<SetupFailure>)> {
if global_config.working_directory.exists() {
remove_dir_all(&global_config.working_directory)
.context("Failed to remove working directory")?;
Expand All @@ -30,25 +32,21 @@ pub fn setup(global_config: &GlobalConfig, plans: Vec<Plan>) -> AnyhowResult<Vec
let (surviving_plans, managed_dir_failures) = setup_managed_directories(plans);
let (mut surviving_plans, working_dir_failures) =
setup_working_directories(global_config, surviving_plans);
GeneralSetupFailures {
working_directories: working_dir_failures,
managed_directories: managed_dir_failures,
}
.write(
global_config
.results_directory
.join("general_setup_failures.json"),
&global_config.results_directory_locker,
)?;

sort_plans_by_grouping(&mut surviving_plans);
Ok(surviving_plans)
Ok((
surviving_plans,
managed_dir_failures
.into_iter()
.chain(working_dir_failures)
.collect(),
))
}

fn setup_working_directories(
global_config: &GlobalConfig,
plans: Vec<Plan>,
) -> (Vec<Plan>, HashMap<String, String>) {
) -> (Vec<Plan>, Vec<SetupFailure>) {
let (surviving_plans, plan_failures) = setup_plans_working_directory(plans);
let (surviving_plans, rcc_failures) =
setup_rcc_working_directories(&global_config.working_directory, surviving_plans);
Expand All @@ -58,36 +56,49 @@ fn setup_working_directories(
)
}

fn setup_plans_working_directory(plans: Vec<Plan>) -> (Vec<Plan>, HashMap<String, String>) {
fn setup_plans_working_directory(plans: Vec<Plan>) -> (Vec<Plan>, Vec<SetupFailure>) {
let mut surviving_plans = Vec::new();
let mut failures = HashMap::new();
let mut failures = vec![];
for plan in plans.into_iter() {
if let Err(e) = create_dir_all(&plan.working_directory) {
let error = anyhow!(e).context(format!(
"Failed to create working directory {} of plan {}",
plan.working_directory, plan.id
));
info!("{error:#}");
failures.insert(plan.id.clone(), format!("{error:#}"));
let error = anyhow!(e);
error!(
"Plan {}: Failed to create working directory. Plan won't be scheduled.
Error: {error:#}",
plan.id
);
failures.push(SetupFailure {
plan_id: plan.id.clone(),
summary: "Failed to create working directory".to_string(),
details: format!("{error:#}"),
});
continue;
}
#[cfg(windows)]
{
use super::windows_permissions::grant_full_access;
use log::info;
use robotmk::session::Session;

if let Session::User(user_session) = &plan.session {
info!(
"Granting full access for {} to user `{}`.",
&plan.working_directory, &user_session.user_name
);
if let Err(e) = grant_full_access(&user_session.user_name, &plan.working_directory)
{
let error = anyhow!(e).context(format!(
"Failed to set permissions for working directory {} of plan {}",
plan.working_directory, plan.id
));
info!("{error:#}");
failures.insert(plan.id.clone(), format!("{error:#}"));
let error = anyhow!(e);
error!(
"Plan {}: Failed to set permissions for working directory. \
Plan won't be scheduled.
Error: {error:#}",
plan.id
);
failures.push(SetupFailure {
plan_id: plan.id.clone(),
summary: "Failed to set permissions for working directory".to_string(),
details: format!("{error:#}"),
});
continue;
};
}
Expand All @@ -100,17 +111,19 @@ fn setup_plans_working_directory(plans: Vec<Plan>) -> (Vec<Plan>, HashMap<String
fn setup_rcc_working_directories(
working_directory: &Utf8Path,
plans: Vec<Plan>,
) -> (Vec<Plan>, HashMap<String, String>) {
) -> (Vec<Plan>, Vec<SetupFailure>) {
let (rcc_plans, system_plans): (Vec<Plan>, Vec<Plan>) = plans
.into_iter()
.partition(|plan| matches!(plan.environment, Environment::Rcc(_)));
let (surviving_plans, environment_failures) = setup_with_one_directory_per_user(
&environment_building_working_directory(working_directory),
rcc_plans,
"environment building",
);
let (mut surviving_plans, rcc_setup_failures) = setup_with_one_directory_per_user(
&rcc_setup_working_directory(working_directory),
surviving_plans,
"RCC setup",
);
surviving_plans.extend(system_plans);
(
Expand All @@ -125,47 +138,71 @@ fn setup_rcc_working_directories(
fn setup_with_one_directory_per_user(
target: &Utf8Path,
plans: Vec<Plan>,
) -> (Vec<Plan>, HashMap<String, String>) {
description_for_failure_reporting: &str,
) -> (Vec<Plan>, Vec<SetupFailure>) {
let mut surviving_plans = Vec::new();
let mut failures = HashMap::new();
let mut failures = vec![];
if let Err(e) = create_dir_all(target) {
let error = anyhow!(e).context(format!("Failed to create directory {target}",));
info!("{error:#}");
let error = anyhow!(e);
for plan in plans {
failures.insert(plan.id.clone(), format!("{error:#}"));
error!(
"Plan {}: Failed to create {description_for_failure_reporting} directory. \
Plan won't be scheduled.
Error: {error:#}",
plan.id
);
failures.push(SetupFailure {
plan_id: plan.id.clone(),
summary: format!("Failed to create {description_for_failure_reporting} directory"),
details: format!("{error:#}"),
});
}
return (surviving_plans, failures);
}
for (session, plans_in_session) in plans_by_sessions(plans) {
let user_target = &target.join(&session.id());
if let Err(e) = create_dir_all(user_target) {
let error = anyhow!(e).context(format!(
"Failed to create directory {} for session {}",
user_target, &session
));
info!("{error:#}");
let error = anyhow!(e);
for plan in plans_in_session {
failures.insert(plan.id.clone(), format!("{error:#}"));
error!(
"Plan {}: Failed to create user-specific {description_for_failure_reporting} \
directory. Plan won't be scheduled.
Error: {error:#}",
plan.id
);
failures.push(SetupFailure {
plan_id: plan.id.clone(),
summary: format!("Failed to create user-specific {description_for_failure_reporting} directory"),
details: format!("{error:#}"),
});
}
continue;
}
#[cfg(windows)]
{
use super::windows_permissions::grant_full_access;
use log::info;
use robotmk::session::Session;

if let Session::User(user_session) = &session {
info!(
"Granting full access for {} to user `{}`.",
user_target, &user_session.user_name
);
if let Err(e) = grant_full_access(&user_session.user_name, user_target) {
let error = anyhow!(e).context(format!(
"Failed to grant full access for {} to user `{}`.",
user_target, &user_session.user_name
));
info!("{error:#}");
let error = anyhow!(e);
for plan in plans_in_session {
failures.insert(plan.id.clone(), format!("{error:#}"));
error!(
"Plan {}: Failed to adjust permissions for user-specific \
{description_for_failure_reporting} directory. Plan won't be scheduled.
Error: {error:#}",
plan.id
);
failures.push(SetupFailure {
plan_id: plan.id.clone(),
summary: format!("Failed to adjust permissions for user-specific {description_for_failure_reporting} directory"),
details: format!("{error:#}"),
});
}
continue;
};
Expand All @@ -184,29 +221,44 @@ fn setup_results_directories(global_config: &GlobalConfig, plans: &[Plan]) -> An
clean_up_results_directory(global_config, plans).context("Failed to clean up results directory")
}

fn setup_managed_directories(plans: Vec<Plan>) -> (Vec<Plan>, HashMap<String, String>) {
fn setup_managed_directories(plans: Vec<Plan>) -> (Vec<Plan>, Vec<SetupFailure>) {
let mut surviving_plans = Vec::new();
let mut failures = HashMap::new();
let mut failures = vec![];
for plan in plans {
if let Source::Managed { target, .. } = &plan.source {
if let Err(e) = create_dir_all(target) {
let error = anyhow!(e).context(anyhow!(
"Failed to create managed directory {} for plan {}",
target,
let error = anyhow!(e);
error!(
"Plan {}: Failed to create managed directory. Plan won't be scheduled.
Error: {error:#}",
plan.id
));
info!("{error:#}");
failures.insert(plan.id.clone(), format!("{error:#}"));
);
failures.push(SetupFailure {
plan_id: plan.id.clone(),
summary: "Failed to create managed directory".to_string(),
details: format!("{error:#}"),
});
continue;
}
#[cfg(windows)]
{
use super::windows_permissions::grant_full_access;
use log::info;
use robotmk::session::Session;

if let Session::User(user_session) = &plan.session {
if let Err(error) = grant_full_access(&user_session.user_name, target) {
info!("{error:#}");
failures.insert(plan.id.clone(), format!("{error:#}"));
error!(
"Plan {}: Failed to adjust permissions of managed directory. Plan won't be scheduled.
Error: {error:#}",
plan.id
);
failures.push(SetupFailure {
plan_id: plan.id.clone(),
summary: "Failed to adjust permissions of managed directory"
.to_string(),
details: format!("{error:#}"),
});
continue;
}
info!(
Expand Down
7 changes: 0 additions & 7 deletions src/bin/scheduler/setup/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,3 @@ fn plans_by_sessions(plans: Vec<Plan>) -> HashMap<Session, Vec<Plan>> {
}
plans_by_session
}

fn failed_plan_ids_human_readable<'a>(failed_plan_ids: impl Iterator<Item = &'a String>) -> String {
failed_plan_ids
.map(|plan_id| plan_id.as_str())
.collect::<Vec<&str>>()
.join(", ")
}
Loading

0 comments on commit 8d37f86

Please sign in to comment.