OpenShift的安装方式很多了,现在又多了一种,agent based installer。最大的特点是,不需要额外的bootstrap节点了。这可是天大的好消息,因为,以前安装之前,和客户交流,客户总是不理解,为什么红帽说支持3节点部署,但是却要求提供4台服务器。也不能怪客户,按照一般的理解,之前红帽是不支持严格意义上的3节点部署,就因为有这个bootstrap. 现在好了,agent based installer是真正世俗意义上的支持3节点部署了。
从官方文档来看,能压缩掉bootstrap,是因为bootstrap相关的服务,都压缩到一个master节点上,并使用了assisted installer流程,来达到真正的3节点安装的。
本文,就用agent based installer来装一个单节点的ocp集群。
# switch to you install version
export BUILDNUMBER=4.12.9
pushd /data/ocp4/${BUILDNUMBER}
tar -xzf openshift-client-linux-${BUILDNUMBER}.tar.gz -C /usr/local/bin/
tar -xzf openshift-install-linux-${BUILDNUMBER}.tar.gz -C /usr/local/bin/
# tar -xzf oc-mirror.tar.gz -C /usr/local/bin/
# chmod +x /usr/local/bin/oc-mirror
install -m 755 /data/ocp4/clients/butane-amd64 /usr/local/bin/butane
install -m 755 /data/ocp4/clients/coreos-installer_amd64 /usr/local/bin/coreos-installer
popd
# create a user and create the cluster under the user
useradd -m 3node
su - 3node
ssh-keygen
cat << EOF > ~/.ssh/config
StrictHostKeyChecking no
UserKnownHostsFile=/dev/null
EOF
chmod 600 ~/.ssh/config
cat << 'EOF' >> ~/.bashrc
export BASE_DIR='/home/3node/'
EOF
# export BASE_DIR='/home/3node/'
export BUILDNUMBER=4.12.9
mkdir -p ${BASE_DIR}/data/{sno/disconnected,install}
# set some parameter of you rcluster
NODE_SSH_KEY="$(cat ${BASE_DIR}/.ssh/id_rsa.pub)"
INSTALL_IMAGE_REGISTRY=quaylab.infra.wzhlab.top:5443
# PULL_SECRET='{"auths":{"registry.redhat.io": {"auth": "ZHVtbXk6ZHVtbXk=","email": "noemail@localhost"},"registry.ocp4.redhat.ren:5443": {"auth": "ZHVtbXk6ZHVtbXk=","email": "noemail@localhost"},"'${INSTALL_IMAGE_REGISTRY}'": {"auth": "'$( echo -n 'admin:shadowman' | openssl base64 )'","email": "noemail@localhost"}}}'
PULL_SECRET=$(cat /data/pull-secret.json)
NTP_SERVER=192.168.77.11
# HELP_SERVER=192.168.7.11
# KVM_HOST=192.168.7.11
# API_VIP=192.168.77.99
# INGRESS_VIP=192.168.77.98
# CLUSTER_PROVISION_IP=192.168.7.103
# BOOTSTRAP_IP=192.168.7.12
MACHINE_NETWORK='192.168.77.0/24'
# 定义单节点集群的节点信息
SNO_CLUSTER_NAME=osp-demo
SNO_BASE_DOMAIN=wzhlab.top
BOOTSTRAP_IP=192.168.77.42
MASTER_01_IP=192.168.77.43
MASTER_02_IP=192.168.77.44
MASTER_03_IP=192.168.77.45
BOOTSTRAP_IPv6=fd03::42
MASTER_01_IPv6=fd03::43
MASTER_02_IPv6=fd03::44
MASTER_03_IPv6=fd03::45
BOOTSTRAP_HOSTNAME=bootstrap-demo
MASTER_01_HOSTNAME=master-01-demo
MASTER_02_HOSTNAME=master-02-demo
MASTER_03_HOSTNAME=master-03-demo
BOOTSTRAP_INTERFACE=enp1s0
MASTER_01_INTERFACE=enp1s0
MASTER_02_INTERFACE=enp1s0
MASTER_03_INTERFACE=enp1s0
MASTER_01_INTERFACE_MAC=52:54:00:12:A1:01
MASTER_02_INTERFACE_MAC=52:54:00:12:A1:02
MASTER_03_INTERFACE_MAC=52:54:00:12:A1:03
BOOTSTRAP_DISK=/dev/vda
MASTER_01_DISK=/dev/vda
MASTER_02_DISK=/dev/vda
MASTER_03_DISK=/dev/vda
OCP_GW=192.168.77.11
OCP_NETMASK=255.255.255.0
OCP_NETMASK_S=24
OCP_DNS=192.168.77.11
OCP_GW_v6=fd03::11
OCP_NETMASK_v6=64
# echo ${SNO_IF_MAC} > /data/sno/sno.mac
mkdir -p ${BASE_DIR}/data/install
cd ${BASE_DIR}/data/install
/bin/rm -rf *.ign .openshift_install_state.json auth bootstrap manifests master*[0-9] worker*[0-9] *
cat << EOF > ${BASE_DIR}/data/install/install-config.yaml
apiVersion: v1
baseDomain: $SNO_BASE_DOMAIN
compute:
- name: worker
replicas: 0
controlPlane:
name: master
replicas: 1
metadata:
name: $SNO_CLUSTER_NAME
networking:
# OVNKubernetes , OpenShiftSDN
clusterNetwork:
- cidr: 172.21.0.0/16
hostPrefix: 23
# - cidr: fd02::/48
# hostPrefix: 64
machineNetwork:
- cidr: $MACHINE_NETWORK
# - cidr: 2001:DB8::/32
serviceNetwork:
- 172.22.0.0/16
# - fd03::/112
platform:
none: {}
pullSecret: '${PULL_SECRET}'
sshKey: |
$( cat ${BASE_DIR}/.ssh/id_rsa.pub | sed 's/^/ /g' )
additionalTrustBundle: |
$( cat /etc/crts/redhat.ren.ca.crt | sed 's/^/ /g' )
imageContentSources:
- mirrors:
- ${INSTALL_IMAGE_REGISTRY}/ocp4/openshift4
source: quay.io/openshift-release-dev/ocp-release
- mirrors:
- ${INSTALL_IMAGE_REGISTRY}/ocp4/openshift4
source: quay.io/openshift-release-dev/ocp-v4.0-art-dev
EOF
cat << EOF > ${BASE_DIR}/data/install/agent-config.yaml
apiVersion: v1alpha1
kind: AgentConfig
metadata:
name: $SNO_CLUSTER_NAME
rendezvousIP: $MASTER_01_IP
additionalNTPSources:
- $NTP_SERVER
hosts:
- hostname: $MASTER_01_HOSTNAME
role: master
rootDeviceHints:
deviceName: "$MASTER_01_DISK"
interfaces:
- name: $MASTER_01_INTERFACE
macAddress: $MASTER_01_INTERFACE_MAC
networkConfig:
interfaces:
- name: $MASTER_01_INTERFACE
type: ethernet
state: up
mac-address: $MASTER_01_INTERFACE_MAC
ipv4:
enabled: true
address:
- ip: $MASTER_01_IP
prefix-length: $OCP_NETMASK_S
dhcp: false
dns-resolver:
config:
server:
- $OCP_DNS
routes:
config:
- destination: 0.0.0.0/0
next-hop-address: $OCP_GW
next-hop-interface: $MASTER_01_INTERFACE
table-id: 254
EOF
/bin/cp -f ${BASE_DIR}/data/install/install-config.yaml ${BASE_DIR}/data/install/install-config.yaml.bak
openshift-install --dir=${BASE_DIR}/data/install agent create cluster-manifests
sudo bash -c "/bin/cp -f mirror/registries.conf /etc/containers/registries.conf.d/; chmod +r /etc/containers/registries.conf.d/*"
# /bin/cp -f /data/ocp4/ansible-helper/files/* ${BASE_DIR}/data/install/openshift/
sudo bash -c "cd /data/ocp4 ; bash image.registries.conf.sh quaylab.infra.wzhlab.top:5443 ;"
/bin/cp -f /data/ocp4/99-worker-container-registries.yaml ${BASE_DIR}/data/install/openshift
/bin/cp -f /data/ocp4/99-master-container-registries.yaml ${BASE_DIR}/data/install/openshift
cd ${BASE_DIR}/data/install/
# openshift-install --dir=${BASE_DIR}/data/install create ignition-configs
mkdir -p ~/.cache/agent/image_cache/
/bin/cp -f /data/ocp-$BUILDNUMBER/rhcos-live.x86_64.iso ~/.cache/agent/image_cache/coreos-x86_64.iso
openshift-install --dir=${BASE_DIR}/data/install agent create image --log-level=debug
# ......
# DEBUG Fetching image from OCP release (oc adm release info --image-for=machine-os-images --insecure=true --icsp-file=/tmp/icsp-file3636774741 quay.io/openshift-release-dev/ocp-release@sha256:96bf74ce789ccb22391deea98e0c5050c41b67cc17defbb38089d32226dba0b8)
# DEBUG The file was found in cache: /home/3node/.cache/agent/image_cache/coreos-x86_64.iso
# INFO Verifying cached file
# DEBUG extracting /coreos/coreos-x86_64.iso.sha256 to /tmp/cache1876698393, oc image extract --path /coreos/coreos-x86_64.iso.sha256:/tmp/cache1876698393 --confirm --icsp-file=/tmp/icsp-file455852761 quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:052130abddf741195b6753888cf8a00757dedeb7010f7d4dcc4b842b5bc705f6
# ......
coreos-installer iso ignition show agent.x86_64.iso > ignition.ign
# HTTP_PATH=http://192.168.7.11:8080/ignition
source /data/ocp4/acm.fn.sh
# 我们会创建一个wzh用户,密码是redhat,这个可以在第一次启动的是,从console/ssh直接用用户名口令登录
# 方便排错和研究
VAR_PWD_HASH="$(python3 -c 'import crypt,getpass; print(crypt.crypt("redhat"))')"
cat ${BASE_DIR}/data/install/ignition.ign \
| jq --arg VAR "$VAR_PWD_HASH" --arg VAR_SSH "$NODE_SSH_KEY" '.passwd.users += [{ "name": "wzh", "system": true, "passwordHash": $VAR , "sshAuthorizedKeys": [ $VAR_SSH ], "groups": [ "adm", "wheel", "sudo", "systemd-journal" ] }]' \
| jq '. += { "kernel_arguments" : { "should_exist" : [ "systemd.debug-shell=1" ] } }' \
| jq -c . \
> ${BASE_DIR}/data/install/ignition-iso.ign
coreos-installer iso ignition embed -f -i ignition-iso.ign agent.x86_64.iso
# VAR_IMAGE_VER=rhcos-410.86.202303200936-AnolisOS-0-live.x86_64.iso
create_lv() {
var_vg=$1
var_pool=$2
var_lv=$3
var_size=$4
var_action=$5
lvremove -f $var_vg/$var_lv
# lvcreate -y -L $var_size -n $var_lv $var_vg
if [ "$var_action" == "recreate" ]; then
lvcreate --type thin -n $var_lv -V $var_size --thinpool $var_vg/$var_pool
wipefs --all --force /dev/$var_vg/$var_lv
fi
}
virsh destroy ocp4-acm-one-bootstrap
virsh undefine ocp4-acm-one-bootstrap
create_lv vgdata poolA lvacm-one-bootstrap 500G
create_lv vgdata poolA lvacm-one-bootstrap-data 500G
virsh destroy ocp4-acm-one-master-01
virsh undefine ocp4-acm-one-master-01
create_lv vgdata poolA lvacm-one-master-01 500G
create_lv vgdata poolA lvacm-one-master-01-data 500G
virsh destroy ocp4-acm-one-master-02
virsh undefine ocp4-acm-one-master-02
create_lv vgdata poolA lvacm-one-master-02 500G
create_lv vgdata poolA lvacm-one-master-02-data 500G
virsh destroy ocp4-acm-one-master-03
virsh undefine ocp4-acm-one-master-03
create_lv vgdata poolA lvacm-one-master-03 500G
create_lv vgdata poolA lvacm-one-master-03-data 500G
cat << EOF >> /etc/sysctl.d/99-wzh-sysctl.conf
vm.overcommit_memory = 1
EOF
sysctl --system
# 创建实验用虚拟网络
mkdir -p /data/kvm
cd /data/kvm
cat << 'EOF' > /data/kvm/bridge.sh
#!/usr/bin/env bash
PUB_CONN='eno1'
PUB_IP='172.21.6.103/24'
PUB_GW='172.21.6.254'
PUB_DNS='172.21.1.1'
nmcli con down "$PUB_CONN"
nmcli con delete "$PUB_CONN"
nmcli con down baremetal
nmcli con delete baremetal
# RHEL 8.1 appends the word "System" in front of the connection,delete in case it exists
nmcli con down "System $PUB_CONN"
nmcli con delete "System $PUB_CONN"
nmcli connection add ifname baremetal type bridge con-name baremetal ipv4.method 'manual' \
ipv4.address "$PUB_IP" \
ipv4.gateway "$PUB_GW" \
ipv4.dns "$PUB_DNS"
nmcli con add type bridge-slave ifname "$PUB_CONN" master baremetal
nmcli con down "$PUB_CONN";pkill dhclient;dhclient baremetal
nmcli con up baremetal
EOF
bash /data/kvm/bridge.sh
nmcli con mod baremetal +ipv4.addresses "192.168.7.103/24"
nmcli con up baremetal
cat << EOF > /root/.ssh/config
StrictHostKeyChecking no
UserKnownHostsFile=/dev/null
EOF
pvcreate -y /dev/vdb
vgcreate vgdate /dev/vdb
# https://access.redhat.com/articles/766133
lvcreate -y -n poolA -L 500G vgdata
lvcreate -y -n poolA_meta -L 10G vgdata
lvconvert -y --thinpool vgdata/poolA --poolmetadata vgdata/poolA_meta
lvextend -l +100%FREE vgdata/poolA
mkdir -p /data/kvm/one/
scp [email protected]:/home/3node/data/install/agent.x86_64.iso /data/kvm/one/
create_lv() {
var_vg=$1
var_pool=$2
var_lv=$3
var_size=$4
var_action=$5
lvremove -f $var_vg/$var_lv
# lvcreate -y -L $var_size -n $var_lv $var_vg
if [ "$var_action" == "recreate" ]; then
lvcreate --type thin -n $var_lv -V $var_size --thinpool $var_vg/$var_pool
wipefs --all --force /dev/$var_vg/$var_lv
fi
}
SNO_MEM=64
virsh destroy ocp4-acm-one-master-01
virsh undefine ocp4-acm-one-master-01
create_lv vgdata poolA lvacm-one-master-01 500G recreate
create_lv vgdata poolA lvacm-one-master-01-data 500G recreate
virt-install --name=ocp4-acm-one-master-01 --vcpus=16 --ram=$(($SNO_MEM*1024)) \
--cpu=host-model \
--disk path=/dev/vgdata/lvacm-one-master-01,device=disk,bus=virtio,format=raw \
--disk path=/dev/vgdata/lvacm-one-master-01-data,device=disk,bus=virtio,format=raw \
--os-variant rhel8.3 --network bridge=baremetal,model=virtio,mac=52:54:00:12:A1:01 \
--graphics vnc,port=59003 --noautoconsole \
--boot menu=on --cdrom /data/kvm/one/agent.x86_64.iso
for unkonwn reason, the vm will be shutdown, instead of reboot, you have to poweron it manually.
cd ${BASE_DIR}/data/install
export KUBECONFIG=${BASE_DIR}/data/install/auth/kubeconfig
echo "export KUBECONFIG=${BASE_DIR}/data/install/auth/kubeconfig" >> ~/.bashrc
# oc completion bash | sudo tee /etc/bash_completion.d/openshift > /dev/null
cd ${BASE_DIR}/data/install
openshift-install --dir=${BASE_DIR}/data/install agent wait-for bootstrap-complete --log-level=debug
# ......
# DEBUG RendezvousIP from the AgentConfig 192.168.77.43
# INFO Bootstrap Kube API Initialized
# INFO Bootstrap configMap status is complete
# INFO cluster bootstrap is complete
cd ${BASE_DIR}/data/install
openshift-install --dir=${BASE_DIR}/data/install agent wait-for install-complete --log-level=debug
# ......
# INFO Install complete!
# INFO To access the cluster as the system:admin user when using 'oc', run
# INFO export KUBECONFIG=/home/3node/data/install/auth/kubeconfig
# INFO Access the OpenShift web-console here: https://console-openshift-console.apps.osp-demo.wzhlab.top
# INFO Login to the console with user: "kubeadmin", and password: "UmfI2-99uAb-BRdaS-LLjQ9"
# init setting for helper node
cat << EOF > ~/.ssh/config
StrictHostKeyChecking no
UserKnownHostsFile=/dev/null
EOF
chmod 600 ~/.ssh/config
# ssh core@*****
# sudo -i
# # change password for root
# echo 'redhat' | passwd --stdin root
# sed -i "s|^PasswordAuthentication no$|PasswordAuthentication yes|g" /etc/ssh/sshd_config
# sed -i "s|^PermitRootLogin no$|PermitRootLogin yes|g" /etc/ssh/sshd_config
# sed -i "s|^#ClientAliveInterval 180$|ClientAliveInterval 1800|g" /etc/ssh/sshd_config
# systemctl restart sshd
# # set env, so oc can be used
# cat << EOF >> ~/.bashrc
# export KUBECONFIG=/etc/kubernetes/static-pod-resources/kube-apiserver-certs/secrets/node-kubeconfigs/localhost.kubeconfig
# RET=`oc config use-context system:admin`
# EOF
cat > ${BASE_DIR}/data/install/crack.txt << EOF
echo redhat | sudo passwd --stdin root
sudo sed -i "s|^PasswordAuthentication no$|PasswordAuthentication yes|g" /etc/ssh/sshd_config
sudo sed -i "s|^PermitRootLogin no$|PermitRootLogin yes|g" /etc/ssh/sshd_config
sudo sed -i "s|^#ClientAliveInterval 180$|ClientAliveInterval 1800|g" /etc/ssh/sshd_config
sudo systemctl restart sshd
sudo sh -c 'echo "export KUBECONFIG=/etc/kubernetes/static-pod-resources/kube-apiserver-certs/secrets/node-kubeconfigs/localhost.kubeconfig" >> /root/.bashrc'
sudo sh -c 'echo "RET=\\\`oc config use-context system:admin\\\`" >> /root/.bashrc'
EOF
for i in 23 24 25
do
ssh [email protected].$i < ${BASE_DIR}/data/install/crack.txt
done
# https://unix.stackexchange.com/questions/230084/send-the-password-through-stdin-in-ssh-copy-id
dnf install -y sshpass
for i in 23 24 25
do
sshpass -p 'redhat' ssh-copy-id [email protected].$i
done
for i in 23 24 25
do
ssh [email protected].$i poweroff
done
virsh start ocp4-acm-one-master-01
virsh start ocp4-acm-one-master-02
virsh start ocp4-acm-one-master-03
mkdir -p ~/.kube/bak/
var_date=$(date '+%Y-%m-%d-%H%M')
/bin/cp -f /data/install/auth/kubeconfig ~/.kube/bak/kubeconfig-$var_date
/bin/cp -f /data/install/auth/kubeadmin-password ~/.kube/bak/kubeadmin-password-$var_date
sed "s/admin/admin\/$SNO_CLUSTER_NAME/g" /data/install/auth/kubeconfig > /tmp/config.new
# https://medium.com/@jacobtomlinson/how-to-merge-kubernetes-kubectl-config-files-737b61bd517d
/bin/cp -f ~/.kube/config ~/.kube/config.bak && KUBECONFIG=~/.kube/config:/tmp/config.new kubectl config view --flatten > /tmp/config && /bin/mv -f /tmp/config ~/.kube/config
unset KUBECONFIG
我们装好了single node,那么接下来,我们还可以给这个single node添加worker节点,让这个single node cluster变成一个单master的集群。
# first, lets stick ingress to master
oc label node acm-demo-hub-master ocp-ingress-run="true"
oc patch ingresscontroller default -n openshift-ingress-operator --type=merge --patch='{"spec":{"nodePlacement":{"nodeSelector": {"matchLabels":{"ocp-ingress-run":"true"}}}}}'
# we are testing env, so we don't need ingress replicas.
oc patch --namespace=openshift-ingress-operator --patch='{"spec": {"replicas": 1}}' --type=merge ingresscontroller/default
oc get -n openshift-ingress-operator ingresscontroller/default -o yaml
# then we get worker's ignition file, and start worker node, add it to cluster
oc extract -n openshift-machine-api secret/worker-user-data --keys=userData --to=- > /var/www/html/ignition/sno-worker.ign
HELP_SERVER=192.168.7.11
# 定义单节点集群的节点信息
SNO_IP=192.168.7.16
SNO_GW=192.168.7.11
SNO_NETMAST=255.255.255.0
SNO_HOSTNAME=acm-demo-hub-worker-01
SNO_IF=enp1s0
SNO_DNS=192.168.7.11
SNO_DISK=/dev/vda
SNO_MEM=16
BOOT_ARG=" ip=$SNO_IP::$SNO_GW:$SNO_NETMAST:$SNO_HOSTNAME:$SNO_IF:none nameserver=$SNO_DNS coreos.inst.install_dev=${SNO_DISK##*/} coreos.inst.ignition_url=http://$HELP_SERVER:8080/ignition/sno-worker.ign"
/bin/cp -f /data/ocp4/rhcos-live.x86_64.iso sno.iso
coreos-installer iso kargs modify -a "$BOOT_ARG" sno.iso
# go to kvm host ( 103 )
scp [email protected]:/data/install/sno.iso /data/kvm/
virsh destroy ocp4-acm-hub-worker01
virsh undefine ocp4-acm-hub-worker01
create_lv() {
var_vg=$1
var_pool=$2
var_lv=$3
var_size=$4
var_action=$5
lvremove -f $var_vg/$var_lv
# lvcreate -y -L $var_size -n $var_lv $var_vg
if [ "$var_action" == "recreate" ]; then
lvcreate --type thin -n $var_lv -V $var_size --thinpool $var_vg/$var_pool
wipefs --all --force /dev/$var_vg/$var_lv
fi
}
create_lv vgdata poolA lvacmhub-worker01 500G recreate
# create_lv vgdata poolA lvacmhub-worker01-data 500G remove
virt-install --name=ocp4-acm-hub-worker01 --vcpus=16 --ram=$(($SNO_MEM*1024)) \
--cpu=host-model \
--disk path=/dev/vgdata/lvacmhub-worker01,device=disk,bus=virtio,format=raw \
`# --disk path=/dev/vgdata/lvacmhub-data,device=disk,bus=virtio,format=raw` \
--os-variant rhel8.3 --network bridge=baremetal,model=virtio \
--graphics vnc,port=59003 \
--boot menu=on --cdrom /data/kvm/sno.iso
# after 2 boot up,
# go back to helper
oc get csr
oc get csr -ojson | jq -r '.items[] | select(.status == {} ) | .metadata.name' | xargs oc adm certificate approve