Skip to content

Commit

Permalink
feat: change cluster state management (#235)
Browse files Browse the repository at this point in the history
  • Loading branch information
mtweeman authored Nov 7, 2024
1 parent fd0efe9 commit 0d4088c
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 37 deletions.
14 changes: 7 additions & 7 deletions components/terraform/instance/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,13 @@ resource "oci_identity_dynamic_group" "servers" {

resource "oci_identity_policy" "compute_instances_list" {
compartment_id = var.compartment_ocid
name = "compute-instances-list"
description = "Listing compute instances in servers pool"
name = "cluster-state-mgmt"
description = "Cluster state management"
statements = [
# listing compute instances in user-data script for machine image
"allow dynamic-group ${oci_identity_dynamic_group.servers.name} to inspect instances in compartment id ${var.compartment_ocid}",
# using OCI KMS service to get key for vault auto-unsealing
# https://developer.hashicorp.com/vault/docs/configuration/seal/ocikms#authentication
"allow dynamic-group ${oci_identity_dynamic_group.servers.name} to use keys in compartment id ${var.compartment_ocid}",
# Cluster state management in machine-images user-data.sh script
"allow dynamic-group ${oci_identity_dynamic_group.servers.name} to inspect vaults in compartment id ${var.compartment_ocid}",
"allow dynamic-group ${oci_identity_dynamic_group.servers.name} to inspect secrets in compartment id ${var.compartment_ocid}",
"allow dynamic-group ${oci_identity_dynamic_group.servers.name} to read secret-bundle in compartment id ${var.compartment_ocid}",
"allow dynamic-group ${oci_identity_dynamic_group.servers.name} to use secret in compartment id ${var.compartment_ocid}",
]
}
73 changes: 43 additions & 30 deletions machine-images/scripts/user-data.sh
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
#!/usr/bin/env bash

LOGFILE="/root/user-data.log"
set -eo pipefail
exec 3>&1 4>&2 1>"${LOGFILE}" 2>&1
trap "echo 'ERROR: An error occurred during execution, check log ${LOGFILE} for details.' >&3" ERR
trap '{ set +x; } 2>/dev/null; echo -n "[$(date -uIs)] "; set -x' DEBUG


function main() {
USER_DATA_OUTPUT_LOG="user-data-output.log"

echo "$(date -uIs): Checking running instances" >> "${USER_DATA_OUTPUT_LOG}"
all_running_instances=$(oci compute instance list --compartment-id "${COMPARTMENT_OCID}" --all --sort-by TIMECREATED | jq -r '[.data[] | select(.["lifecycle-state"] == "RUNNING")] | length')
availability_domain_0_running_instances=$(oci compute instance list --compartment-id "${COMPARTMENT_OCID}" --availability-domain "${AVAILABILITY_DOMAIN}" --sort-by TIMECREATED | jq -r '[.data[] | select(.["lifecycle-state"] == "RUNNING")] | length')
echo "$(date -uIs): Quantity of all running instances: ${all_running_instances}" >> "${USER_DATA_OUTPUT_LOG}"
echo "$(date -uIs): Quantity of availability domain '0' running instances: ${availability_domain_0_running_instances}" >> "${USER_DATA_OUTPUT_LOG}"

if [[ "${all_running_instances}" -le 1 && "${availability_domain_0_running_instances}" -le 1 ]]; then
get_cluster_initiated_flag "hs-prod-fra-vault" "cluster-initiated"

if [[ "${CLUSTER_INITIATED}" == "false" ]]; then
set_cluster_initiated_flag
initiate_cluster
set_env_variables
deploy_cd_tool_for_container_orchestration_tool
Expand All @@ -24,8 +25,31 @@ function main() {
}


function get_cluster_initiated_flag() {
local vault_name=$1
local secret_name=$2
local vault_id

vault_id=$(oci kms management vault list \
--compartment-id "${COMPARTMENT_OCID}" \
--all | jq -r --arg vault_name "${vault_name}" '.data[] | select(.["display-name"] == $vault_name) | .id')
SECRET_ID=$(oci vault secret list \
--compartment-id "${COMPARTMENT_OCID}" \
--name "${secret_name}" \
--vault-id "${vault_id}" | jq -r '.data[].id')
CLUSTER_INITIATED=$(oci secrets secret-bundle get \
--secret-id "${SECRET_ID}" | jq -r '.data."secret-bundle-content".content' | base64 -d)
}


function set_cluster_initiated_flag() {
oci vault secret update-base64 \
--secret-id "${SECRET_ID}" \
--secret-content-content "dHJ1ZQ==" # Base64 encoded "true"
}


function initiate_cluster() {
echo "$(date -uIs): First node, initiating cluster" >> "${USER_DATA_OUTPUT_LOG}"
curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION="${K3S_VERSION}" sh -s - server \
--cluster-init \
--write-kubeconfig-mode 600 \
Expand All @@ -34,29 +58,23 @@ function initiate_cluster() {
}


function deploy_cd_tool_for_container_orchestration_tool() {
{
echo "$(date -uIs): Deploying CD tool for container orchestration tool"
k3s kubectl create namespace argocd
curl -sSfL https://raw.githubusercontent.com/argoproj/argo-cd/v2.11.3/manifests/install.yaml | k3s kubectl apply -n argocd -f -
} >> "${USER_DATA_OUTPUT_LOG}"
}


function set_env_variables() {
echo "export KUBECONFIG=/etc/rancher/k3s/k3s.yaml" >> .bashrc

. .bashrc
}


function deploy_cd_tool_for_container_orchestration_tool() {
k3s kubectl create namespace argocd
curl -sSfL https://raw.githubusercontent.com/argoproj/argo-cd/v2.11.3/manifests/install.yaml | k3s kubectl apply -n argocd -f -
}


function deploy_business_application() {
{
echo "$(date -uIs): Deploying business application"
helm repo add hajle-silesia https://raw.githubusercontent.com/hajle-silesia/cd-config/master/docs
helm repo update
helm upgrade --install hajle-silesia hajle-silesia/helm -n argocd 2>&1
} >> "${USER_DATA_OUTPUT_LOG}"
helm repo add hajle-silesia https://raw.githubusercontent.com/hajle-silesia/cd-config/master/docs
helm repo update
helm upgrade --install hajle-silesia hajle-silesia/helm -n argocd
}


Expand All @@ -67,13 +85,11 @@ function wait_lb() {
break
fi
sleep 5
echo "$(date -uIs): Waiting for internal load balancer availability" >> "${USER_DATA_OUTPUT_LOG}"
done
}


function join_cluster() {
echo "$(date -uIs): Joining cluster" >> "${USER_DATA_OUTPUT_LOG}"
curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION="${K3S_VERSION}" sh -s - server \
--server "https://${INTERNAL_LB}:6443" \
--write-kubeconfig-mode 600 \
Expand All @@ -83,15 +99,12 @@ function join_cluster() {


function delete_unready_nodes() {
echo "$(date -uIs): Deleting unready nodes" >> "${USER_DATA_OUTPUT_LOG}"
hostname="$(hostname)"
unready_nodes=$(kubectl get nodes --no-headers | grep "NotReady" | awk '{print $1}')

for node in ${unready_nodes}; do
if [[ "${node}" != "${hostname}" ]]; then
echo "$(date -uIs): Deleting node ${node}" >> "${USER_DATA_OUTPUT_LOG}"
kubectl delete node "${node}"
echo "$(date -uIs): Deleted node ${node}" >> "${USER_DATA_OUTPUT_LOG}"
fi
done
}
Expand Down

0 comments on commit 0d4088c

Please sign in to comment.