diff --git a/02_configure_host.sh b/02_configure_host.sh index 011c3eefd..862316d46 100755 --- a/02_configure_host.sh +++ b/02_configure_host.sh @@ -345,7 +345,7 @@ ANSIBLE_FORCE_COLOR=true ansible-playbook \ -e "{use_firewalld: True}" \ -e "provisioning_interface=$PROVISIONING_NETWORK_NAME" \ -e "external_interface=$BAREMETAL_NETWORK_NAME" \ - -e "{vm_host_ports: [80, ${LOCAL_REGISTRY_PORT}, 8000, ${INSTALLER_PROXY_PORT}, ${AGENT_BOOT_SERVER_PORT}]}" \ + -e "{vm_host_ports: [80, ${LOCAL_REGISTRY_PORT}, 8000, ${INSTALLER_PROXY_PORT}, ${AGENT_BOOT_SERVER_PORT}, 3260]}" \ -e "vbmc_port_range=$VBMC_BASE_PORT:$VBMC_MAX_PORT" \ $ALMA_PYTHON_OVERRIDE \ -i ${VM_SETUP_PATH}/inventory.ini \ diff --git a/agent/01_agent_requirements.sh b/agent/01_agent_requirements.sh index 9a61058d9..24eddff0d 100755 --- a/agent/01_agent_requirements.sh +++ b/agent/01_agent_requirements.sh @@ -45,3 +45,8 @@ if [[ "${MIRROR_COMMAND}" == oc-mirror ]]; then rm -f ${oc_mirror_file} fi fi + +if [[ "${AGENT_E2E_TEST_BOOT_MODE}" == "ISCSI" ]]; then + # Install shell to administer local storage + sudo dnf -y install targetcli +fi diff --git a/agent/05_agent_configure.sh b/agent/05_agent_configure.sh index ed0c4655d..0d4f39cc6 100755 --- a/agent/05_agent_configure.sh +++ b/agent/05_agent_configure.sh @@ -324,6 +324,10 @@ function generate_cluster_manifests() { export AGENT_NO_PROXY=${NO_PROXY} fi + if [[ ${AGENT_E2E_TEST_BOOT_MODE} == ISCSI ]]; then + export AGENT_ROOT_DEVICE_HINTS=${ISCSI_DEVICE_NAME} + fi + # Create manifests ansible-playbook -vvv \ -e install_path=${SCRIPTDIR}/${INSTALL_CONFIG_PATH} \ diff --git a/agent/06_agent_create_cluster.sh b/agent/06_agent_create_cluster.sh index 61afe868b..18478e1f7 100755 --- a/agent/06_agent_create_cluster.sh +++ b/agent/06_agent_create_cluster.sh @@ -11,6 +11,7 @@ source $SCRIPTDIR/utils.sh source $SCRIPTDIR/validation.sh source $SCRIPTDIR/release_info.sh source $SCRIPTDIR/agent/common.sh +source $SCRIPTDIR/agent/iscsi_utils.sh early_deploy_validation @@ -95,14 +96,19 @@ function set_file_acl() { fi } -function attach_agent_iso() { - - set_file_acl - +function get_agent_iso() { local agent_iso="${OCP_DIR}/agent.$(uname -p).iso" if [ ! -f "${agent_iso}" -a -f "${OCP_DIR}/agent.iso" ]; then agent_iso="${OCP_DIR}/agent.iso" fi + echo "${agent_iso}" +} + +function attach_agent_iso() { + + set_file_acl + + agent_iso=$(get_agent_iso) for (( n=0; n<${2}; n++ )) do @@ -321,8 +327,11 @@ function setup_pxe_boot() { # Set up a local http server for files needed for PXE or minimal ISO function setup_boot_server() { + boot_artifacts_dir=${SCRIPTDIR}/${OCP_DIR}/boot-artifacts + if [[ -d ${boot_artifacts_dir} ]] && [[ "$(ls -A ${boot_artifacts_dir})" ]]; then # Copy the generated artifacts to the http server location - cp ${SCRIPTDIR}/${OCP_DIR}/boot-artifacts/* ${BOOT_SERVER_DIR} + cp ${boot_artifacts_dir}/* ${BOOT_SERVER_DIR} + fi # Run a local http server to provide the necessary artifacts echo "package main; import (\"net/http\"); func main() { http.Handle(\"/\", http.FileServer(http.Dir(\"${BOOT_SERVER_DIR}\"))); if err := http.ListenAndServe(\":${AGENT_BOOT_SERVER_PORT}\", nil); err != nil { panic(err) } }" > ${BOOT_SERVER_DIR}/agentpxeserver.go @@ -339,6 +348,56 @@ function agent_pxe_boot() { done } +# Configure the instances for booting off an iSCSI disk +function agent_setup_iscsi_boot() { + set_file_acl + + # The boot server is started since iSCSI uses a similar mechanism to + # retrieve the file for iSCSI boot + mkdir -p ${BOOT_SERVER_DIR} + setup_boot_server + + # Start server iscsid + sudo systemctl enable --now iscsid + + # Create the separate network used for iSCSI booting + agent_create_iscsi_network +} + +# Create the iscsi targets +function agent_iscsi_targets() { + agent_iso=$(get_agent_iso) + + for (( n=0; n<${2}; n++ )) + do + # Note that name use for target must not have an underscore + local name=${1}-${n} + iscsi_disk=${SCRIPTDIR}/"iscsi-${name}" + agent_create_iscsi_target ${name} ${agent_iso} ${iscsi_disk} + agent_create_iscsi_pxe_file ${BOOT_SERVER_DIR} + done +} + +# Add the network to the domain and restart to boot the nodes +function agent_iscsi_update_nodes() { + for (( n=0; n<${2}; n++ )) + do + local domain_name=${CLUSTER_NAME}_${1}_${n} + local name=${1}-${n} + local index=${n} + if [[ ${1} == "worker" ]]; then + index=$((${NUM_MASTERS} + $index)) + fi + + agent_add_iscsi_network_to_domain ${domain_name} ${name} ${index} + domain_running=$(sudo virsh list) + if echo ${domain_running} | grep -q "${domain_name}"; then + sudo virsh destroy ${domain_name} + fi + sudo virsh start ${domain_name} + done +} + function create_appliance() { local asset_dir="$(realpath "${1}")" @@ -380,6 +439,20 @@ case "${AGENT_E2E_TEST_BOOT_MODE}" in agent_pxe_boot worker $NUM_WORKERS ;; + "ISCSI" ) + # TODO - check that MINIMAL_ISO is set + create_image ${asset_dir} ${openshift_install} + + agent_setup_iscsi_boot + + agent_iscsi_targets master $NUM_MASTERS + agent_iscsi_targets worker $NUM_WORKERS + + # Update the nodes and restart + agent_iscsi_update_nodes master $NUM_MASTERS + agent_iscsi_update_nodes worker $NUM_WORKERS + ;; + "DISKIMAGE" ) # Create the config ISO mkdir -p ${config_image_dir} diff --git a/agent/cleanup.sh b/agent/cleanup.sh index 45b0e64fa..dfae6a663 100755 --- a/agent/cleanup.sh +++ b/agent/cleanup.sh @@ -15,6 +15,14 @@ early_cleanup_validation rm -rf "${OCP_DIR}/manifests" rm -rf "${OCP_DIR}/output" +function agent_remove_iscsi_disks() { + for (( n=0; n<${2}; n++ )) + do + iscsi_disk=${SCRIPTDIR}/"iscsi-${1}-${n}" + sudo rm -f ${iscsi_disk} + done +} + if [[ "${AGENT_E2E_TEST_BOOT_MODE}" == "DISKIMAGE" ]]; then sudo rm -rf "${OCP_DIR}/cache" sudo rm -rf "${OCP_DIR}/temp" @@ -37,3 +45,24 @@ if [[ $NUM_MASTERS == 1 && $IP_STACK == "v6" ]]; then sudo sed -i "/${AGENT_NODE0_IPSV6} oauth-openshift.apps.${CLUSTER_DOMAIN}/d" /etc/hosts sudo sed -i "/${AGENT_NODE0_IPSV6} thanos-querier-openshift-monitoring.apps.${CLUSTER_DOMAIN}/d" /etc/hosts fi + +if [[ "${AGENT_E2E_TEST_BOOT_MODE}" == "ISCSI" ]]; then + # Remove network created for ISCSI + iscsi_network=$(sudo virsh net-list) + if echo ${iscsi_network} | grep -q "${ISCSI_NETWORK}"; then + sudo virsh net-destroy ${ISCSI_NETWORK} + fi + + iscsi_inactive=$(sudo virsh net-list --inactive) + if echo ${iscsi_inactive} | grep -q "${ISCSI_NETWORK}"; then + sudo virsh net-undefine ${ISCSI_NETWORK} + fi + + # Remove ISCSI targets + if [[ -x "$(command -v targetcli)" ]] ; then + sudo targetcli clearconfig confirm=True + fi + + agent_remove_iscsi_disks master $NUM_MASTERS + agent_remove_iscsi_disks worker $NUM_WORKERS +fi diff --git a/agent/common.sh b/agent/common.sh index 3757bba03..e114c7584 100644 --- a/agent/common.sh +++ b/agent/common.sh @@ -10,6 +10,7 @@ export AGENT_USE_APPLIANCE_MODEL=${AGENT_USE_APPLIANCE_MODEL:-"false"} export AGENT_APPLIANCE_HOTPLUG=${AGENT_APPLIANCE_HOTPLUG:-"false"} export AGENT_PLATFORM_TYPE=${AGENT_PLATFORM_TYPE:-"baremetal"} export AGENT_PLATFORM_NAME=${AGENT_PLATFORM_NAME:-"oci"} +export AGENT_ROOT_DEVICE_HINTS=${AGENT_ROOT_DEVICE_HINTS:-""} export AGENT_BM_HOSTS_IN_INSTALL_CONFIG=${AGENT_BM_HOSTS_IN_INSTALL_CONFIG:-"false"} @@ -17,6 +18,10 @@ export AGENT_MINIMAL_ISO=${AGENT_MINIMAL_ISO:-"false"} export BOND_CONFIG=${BOND_CONFIG:-"none"} +export ISCSI_NETWORK="iscsi" +export ISCSI_NETWORK_SUBNET=${ISCSI_NETWORK_SUBNET:-"192.168.145"} +export ISCSI_DEVICE_NAME=${ISCSI_DEVICE_NAME:-"/dev/sdb"} + # Image reference for OpenShift-based Appliance Builder. # See: https://github.com/openshift/appliance export APPLIANCE_IMAGE=${APPLIANCE_IMAGE:-"quay.io/edge-infrastructure/openshift-appliance:latest"} @@ -31,12 +36,11 @@ export EXTRA_MANIFESTS_PATH="${OCP_DIR}/openshift" # The necessary files will be copied to boot-artifacts by the installer for either: # 1. PXE, when the 'openshift-install agent create pxe-files' command is run # 2. Minimal ISO, when the 'openshift-install agent create image' command is run and bootArtifacts is set -# in install-config.yaml +# in install-config.yaml, OR +# 3. ISCSI, to contain the iPXE file needed for iSCSI booting export BOOT_SERVER_DIR=${WORKING_DIR}/boot-artifacts export PXE_BOOT_FILE=agent.x86_64.ipxe -if [[ "${AGENT_E2E_TEST_BOOT_MODE}" == "PXE" || "${AGENT_MINIMAL_ISO}" == "true" ]]; then - export BOOT_SERVER_URL=http://$(wrap_if_ipv6 ${PROVISIONING_HOST_EXTERNAL_IP}):${AGENT_BOOT_SERVER_PORT} -fi +export BOOT_SERVER_URL=http://$(wrap_if_ipv6 ${PROVISIONING_HOST_EXTERNAL_IP}):${AGENT_BOOT_SERVER_PORT} # Configure the instances for PXE booting function agent_pxe_boot() { diff --git a/agent/iscsi_utils.sh b/agent/iscsi_utils.sh new file mode 100755 index 000000000..9a411bee1 --- /dev/null +++ b/agent/iscsi_utils.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +set -euxo pipefail + +SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )" + +source $SCRIPTDIR/agent/common.sh + +ISCSI_INITIATOR_BASE="iqn.2024-01.ostest.test.metalkube.org" + +function agent_add_iscsi_network_to_domain() { + local domain_name=${1} + local host_name=${2} + local index=${3} + + # add the iscsi network + sudo virt-xml ${domain_name} --add-device --network network=${ISCSI_NETWORK},model=virtio,"boot_order=1" + + # add the hostname binding so that the host can resolve the 'hostname' variable in pxe file + host_mac=$(sudo virsh domiflist ${domain_name} | grep ${ISCSI_NETWORK} | awk '{print $5}') + iscsi_addr=$((20 + $index)) + host_ip="${ISCSI_NETWORK_SUBNET}."${iscsi_addr} + sudo virsh net-update ${ISCSI_NETWORK} add-last ip-dhcp-host "" +} + +function agent_create_iscsi_network() { + sudo virsh net-define /dev/stdin < + ${ISCSI_NETWORK} + + + + + + + + + +EOF + + sudo virsh net-start ${ISCSI_NETWORK} +} + +function agent_create_iscsi_target() { + + local name=${1} + local agent_iso=${2} + local iscsi_disk=${3} + + # create disks + sudo qemu-img create -f raw ${iscsi_disk} 120G + + # Create iqn + sudo targetcli backstores/fileio create name=$name size=120G file_or_dev=${iscsi_disk} + + # Create initiator + sudo targetcli /iscsi create ${ISCSI_INITIATOR_BASE}:$name + + # Create a lun + sudo targetcli /iscsi/${ISCSI_INITIATOR_BASE}:$name/tpg1/luns create /backstores/fileio/$name + + # Allow access to initiator + sudo targetcli /iscsi/${ISCSI_INITIATOR_BASE}:$name/tpg1/acls create ${ISCSI_INITIATOR_BASE}:$name + + # Override iscsi timeout values. Not setting this can result in the error on the target machine: + # Unable to recover from DataOut timeout while in ERL=0, closing iSCSI connection + sudo targetcli /iscsi/${ISCSI_INITIATOR_BASE}:$name/tpg1/acls/${ISCSI_INITIATOR_BASE}:$name set attribute dataout_timeout=60 + sudo targetcli /iscsi/${ISCSI_INITIATOR_BASE}:$name/tpg1/acls/${ISCSI_INITIATOR_BASE}:$name set attribute dataout_timeout_retries=10 + + # Save configuration. + sudo targetcli / saveconfig + + # Copy the ISO to disk + sudo dd conv=notrunc if=${agent_iso} of=${iscsi_disk} status=progress +} + +function agent_create_iscsi_pxe_file() { + + local boot_dir=${1} + + # Set 'hostname' variable in file. It will be resolved by host during PXE boot + # in order to access a unique target for this host. +cat > "${boot_dir}/agent.x86_64-iscsi.ipxe" << EOF +#!ipxe +set initiator-iqn ${ISCSI_INITIATOR_BASE}:\${hostname} +sanboot --keep iscsi:${ISCSI_NETWORK_SUBNET}.1::::${ISCSI_INITIATOR_BASE}:\${hostname} +EOF +} diff --git a/agent/roles/manifests/templates/agent-config_bond_yaml.j2 b/agent/roles/manifests/templates/agent-config_bond_yaml.j2 index d3650836f..9ea4d6789 100644 --- a/agent/roles/manifests/templates/agent-config_bond_yaml.j2 +++ b/agent/roles/manifests/templates/agent-config_bond_yaml.j2 @@ -13,7 +13,7 @@ rendezvousIP: {{ ips[0] }} {% else %} rendezvousIP: {{ ipsv6[0] }} {% endif %} -{% if (boot_mode == "PXE") or (agent_minimal_iso == "true" and mirror_images %} +{% if (boot_mode == "PXE") or (boot_mode == "ISCSI") or (agent_minimal_iso == "true" and mirror_images %} bootArtifactsBaseURL: {{ boot_server_url }} {% endif %} hosts: diff --git a/agent/roles/manifests/templates/agent-config_yaml.j2 b/agent/roles/manifests/templates/agent-config_yaml.j2 index 5f9c078d4..a99d034c4 100644 --- a/agent/roles/manifests/templates/agent-config_yaml.j2 +++ b/agent/roles/manifests/templates/agent-config_yaml.j2 @@ -18,19 +18,24 @@ rendezvousIP: {{ ips[0] }} {% else %} rendezvousIP: {{ ipsv6[0] }} {% endif %} -{% if (boot_mode == "PXE") or (agent_minimal_iso == "true" and mirror_images) %} +{% if (boot_mode == "PXE") or (boot_mode == "ISCSI") or (agent_minimal_iso == "true" and mirror_images) %} bootArtifactsBaseURL: {{ boot_server_url }} {% endif %} {% if agent_minimal_iso == "true" %} minimalISO: true {% endif %} -{% if (agent_install_config_bm_hosts == "false") and (networking_mode != "DHCP" or agent_nmstate_dhcp == 'true') %} +{% if (agent_install_config_bm_hosts == "false") %} hosts: {% for hostname in hostnames %} - hostname: {{ hostname }} +{% if agent_root_device_hints %} + rootDeviceHints: + deviceName: {{ agent_root_device_hints }} +{% endif %} interfaces: - name: eth0 macAddress: {{ macs[loop.index0] }} +{% if (networking_mode != "DHCP" or agent_nmstate_dhcp == 'true') %} networkConfig: interfaces: {{ net.interfaces("eth0", macs[loop.index0])|indent(4, True) }} @@ -56,5 +61,6 @@ hosts: {{ net.dns_dualstack(provisioning_host_external_ip, provisioning_host_external_ip_dualstack)|indent(4, True) }} {{ net.route_dualstack("eth0", provisioning_host_external_ip, provisioning_host_external_ip_dualstack)|indent(4, True) }} {% endif %} +{% endif %} {% endfor %} {% endif %} diff --git a/agent/roles/manifests/vars/main.yml b/agent/roles/manifests/vars/main.yml index 13279a948..5bdf1035d 100644 --- a/agent/roles/manifests/vars/main.yml +++ b/agent/roles/manifests/vars/main.yml @@ -14,6 +14,7 @@ agent_nodes_macs: "{{ lookup('env', 'AGENT_NODES_MACS_STR') }}" agent_nodes_ips: "{{ lookup('env', 'AGENT_NODES_IPS_STR') }}" agent_nodes_ipsv6: "{{ lookup('env', 'AGENT_NODES_IPSV6_STR') }}" agent_nodes_hostnames: "{{ lookup('env', 'AGENT_NODES_HOSTNAMES_STR') }}" +agent_root_device_hints: "{{ lookup('env', 'AGENT_ROOT_DEVICE_HINTS', default='') }}" agent_use_ztp_manifests: "{{ lookup('env', 'AGENT_USE_ZTP_MANIFESTS') }}" agent_test_cases: "{{ lookup('env', 'AGENT_TEST_CASES') }}" base_domain: "{{ lookup('env', 'BASE_DOMAIN') }}" diff --git a/common.sh b/common.sh index dd086da0a..d7d909f55 100644 --- a/common.sh +++ b/common.sh @@ -486,18 +486,18 @@ if [[ ! -z ${AGENT_E2E_TEST_SCENARIO} ]]; then # We're interested in booting a plain iPXE, so setting back the libivirt # firmware to the default - if [[ "${AGENT_E2E_TEST_BOOT_MODE}" == "PXE" ]]; then + if [[ "${AGENT_E2E_TEST_BOOT_MODE}" == "PXE" ]] || [[ "${AGENT_E2E_TEST_BOOT_MODE}" == "ISCSI" ]]; then LIBVIRT_FIRMWARE=bios fi fi if [[ ! -z ${AGENT_E2E_TEST_BOOT_MODE} ]]; then case "$AGENT_E2E_TEST_BOOT_MODE" in - "ISO" | "PXE" | "DISKIMAGE") + "ISO" | "PXE" | "DISKIMAGE" | "ISCSI") # Valid value ;; *) - printf "Found invalid value \"$AGENT_E2E_TEST_BOOT_MODE\" for AGENT_E2E_TEST_BOOT_MODE. Supported values: ISO (default), PXE, DISKIMAGE." + printf "Found invalid value \"$AGENT_E2E_TEST_BOOT_MODE\" for AGENT_E2E_TEST_BOOT_MODE. Supported values: ISO (default), PXE, DISKIMAGE, or ISCSI." exit 1 ;; esac diff --git a/config_example.sh b/config_example.sh index 06322f85e..97f19cb53 100755 --- a/config_example.sh +++ b/config_example.sh @@ -719,7 +719,7 @@ set -x # Set a single config variable AGENT_E2E_TEST_SCENARIO to create a cluster for the different scenarios # i.e. Single Node Openshift(SNO), Highly Available (HA), Compact cluster, control plane with 5 replicas # and no workers (5CONTROL), or control plane with 4 replicas and no workers (4CONTROL). -# The boot mode for the agent machines can only be set to ISO or PXE. +# The boot mode for the agent machines can be set to ISO, PXE, or ISCSI. # For backward compatibility of CI jobs, the default boot mode is ISO. # The only supported values for AGENT_E2E_TEST_SCENARIO are # - 4CONTROL_IPV4 @@ -851,3 +851,13 @@ set -x # The location won't be synced or updated preserving any local # changes applied to it. # export METAL3_DEV_ENV='' + +# AGENT_ROOT_DEVICE_HINTS +# Default: Undefined +# +# Setting this to a non-empty string will set the RootDeviceHint value for the +# assisted-installer to use on the node when installing the final image. When +# the boot mode is ISCSI, the ISCSI_DEVICE_NAME will be used for the hint. +# be used. +# +# export AGENT_ROOT_DEVICE_HINTS=""