From 54622d4d0244df9bea5f8105f76be29f9784cf17 Mon Sep 17 00:00:00 2001 From: James Denton Date: Thu, 18 Jul 2019 13:03:55 +0000 Subject: [PATCH 01/15] Initial commit for SRIOV support with Express --- group_vars/hypervisors.yml | 12 +++++-- pf9-express.yml | 9 ++++- roles/map-role/tasks/main.yml | 9 +++++ .../templates/pf9-neutron-ovs-agent.j2 | 8 +++-- .../templates/pf9-neutron-sriov-agent.j2 | 5 +++ .../templates/pf9-ostackhost-neutron.j2 | 5 ++- roles/neutron-prerequisites/tasks/main.yml | 9 ++--- .../tasks/main.yml | 9 +++++ .../tasks/prerequisites-sriov.yml | 35 +++++++++++++++++++ .../pre-flight-checks-openstack/vars/main.yml | 7 ++++ 10 files changed, 96 insertions(+), 12 deletions(-) create mode 100644 roles/map-role/templates/pf9-neutron-sriov-agent.j2 create mode 100644 roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml create mode 100644 roles/pre-flight-checks-openstack/vars/main.yml diff --git a/group_vars/hypervisors.yml b/group_vars/hypervisors.yml index 2942f3ec..f4f0ed0c 100644 --- a/group_vars/hypervisors.yml +++ b/group_vars/hypervisors.yml @@ -11,7 +11,15 @@ glance: "off" multipath: False nova_instances_path: /opt/pf9/data/instances/ neutron_ovs_allow_dhcp_vms: "False" -neutron_ovs_bridge_name: "br-pf9" -neutron_ovs_bridge_mappings: "external:br-pf9" +neutron_ovs_bridge_name: "br-pf9, br-sriov" +neutron_ovs_bridge_mappings: "external:br-pf9, sriov:br-sriov" ceilometer_customize: False ceilometer_cpu_interval: 600 + +#################### +# SRIOV +#################### + +sriov: "on" +physical_device_mappings: "" +pci_passthrough_whitelist: "{}" diff --git a/pf9-express.yml b/pf9-express.yml index 1c43b037..cc54a24f 100644 --- a/pf9-express.yml +++ b/pf9-express.yml @@ -85,11 +85,17 @@ - { role: "bond-config", when: manage_network == True } - { role: "pf9-auth", when: manage_network == True and ansible_distribution == "Ubuntu" } -# OpenStack Hypervisor Nodes +# OpenStack Hypervisor Nodes - Pre-Flight Checks +# Runs pre-checks and prompts users to reboot if necessary - hosts: hypervisors + any_errors_fatal: true become: true roles: - pre-flight-checks-openstack + +- hosts: hypervisors + become: true + roles: - common - ntp - pf9-hostagent @@ -103,6 +109,7 @@ - { role: "map-role", rolename: "pf9-neutron-l3-agent", when: autoreg == "on" } - { role: "map-role", rolename: "pf9-neutron-metadata-agent", when: autoreg == "on" } - { role: "map-role", rolename: "pf9-neutron-dhcp-agent", when: autoreg == "on" and dhcp == "on" } + - { role: "map-role", rolename: "pf9-neutron-sriov-agent", when: sriov == "on" } - { role: "wait-for-convergence", when: autoreg == "on" } - { role: "multipath", when: multipath == True } - { role: "enable-nested-virt", when: nested_virt == True } diff --git a/roles/map-role/tasks/main.yml b/roles/map-role/tasks/main.yml index cc07206a..ffb0a179 100644 --- a/roles/map-role/tasks/main.yml +++ b/roles/map-role/tasks/main.yml @@ -132,6 +132,15 @@ shell: "cat /tmp/keystone-token.txt" register: api_token +# JD DEBUG +- name: print out role json for debugging + debug: + msg: "{{ role_json }}" + +#- name: fail! +# fail: +# when: 1==1 + - name: "Assigning Role - {{rolename}}" uri: url: "https://{{ctrl_ip}}/resmgr/v1/hosts/{{host_id.stdout.strip()}}/roles/{{rolename}}" diff --git a/roles/map-role/templates/pf9-neutron-ovs-agent.j2 b/roles/map-role/templates/pf9-neutron-ovs-agent.j2 index cf495870..3d1be73e 100644 --- a/roles/map-role/templates/pf9-neutron-ovs-agent.j2 +++ b/roles/map-role/templates/pf9-neutron-ovs-agent.j2 @@ -3,9 +3,13 @@ "bridge_mappings": "{{neutron_ovs_bridge_mappings}}", "enable_distributed_routing": "{{neutron_ovs_enable_distributed_routing}}", "enable_tunneling": "{{neutron_ovs_enable_tunneling}}", -{% if neutron_tunnel_types is defined %} "local_ip": "{{tunnel_ip}}", +{% if neutron_tunnel_types is defined %} + "local_ip": "{{tunnel_ip}}", +{% endif %} +{% if sriov == "on" %} + "extensions": "fdb", + "shared_physical_device_mappings": "{% for mapping in physical_device_mappings %}{{ mapping }}{% if not loop.last %},{% endif %}{% endfor %}", {% endif %} "net_type": "{{neutron_ovs_net_type}}"{% if neutron_tunnel_types is defined %}, "tunnel_types": "{{neutron_tunnel_types}}"{%- endif %} - } diff --git a/roles/map-role/templates/pf9-neutron-sriov-agent.j2 b/roles/map-role/templates/pf9-neutron-sriov-agent.j2 new file mode 100644 index 00000000..4bf23932 --- /dev/null +++ b/roles/map-role/templates/pf9-neutron-sriov-agent.j2 @@ -0,0 +1,5 @@ +{ +{% if sriov == "on" %} + "physical_device_mappings": "{% for mapping in physical_device_mappings %}{{ mapping }}{% if not loop.last %},{% endif %}{% endfor %}" +{% endif %} +} diff --git a/roles/map-role/templates/pf9-ostackhost-neutron.j2 b/roles/map-role/templates/pf9-ostackhost-neutron.j2 index d682c455..473934f4 100644 --- a/roles/map-role/templates/pf9-ostackhost-neutron.j2 +++ b/roles/map-role/templates/pf9-ostackhost-neutron.j2 @@ -1,5 +1,8 @@ { "cluster_ip": "{{ha_cluster_ip}}", "instances_path": "{{nova_instances_path}}", - "novncproxy_base_url": "{{neutron_novncproxy_base_url}}" + "novncproxy_base_url": "{{neutron_novncproxy_base_url}}", +{% if sriov == "on" %} + "pci_passthrough_whitelist": "[{% for physical_device_mapping in physical_device_mappings %}{% set provider,devname = physical_device_mapping.split(':') %}{\"physical_network\":\"{{ provider }}\", \"devname\":\"{{ devname }}\"}{% if not loop.last %},{% endif %}{% endfor %}]" +{% endif %} } diff --git a/roles/neutron-prerequisites/tasks/main.yml b/roles/neutron-prerequisites/tasks/main.yml index 76695c8b..457f7256 100644 --- a/roles/neutron-prerequisites/tasks/main.yml +++ b/roles/neutron-prerequisites/tasks/main.yml @@ -17,13 +17,10 @@ - include: ubuntu.yml when: ansible_distribution == "Ubuntu" -- name: check if OVS bridge already exists - shell: "ifconfig -a | grep ^{{neutron_ovs_bridge_name}} > /dev/null 2>&1; if [ $? -eq 0 ]; then echo 'exists'; else echo 'not-exist'; fi" - register: ovs_bridge_check - - name: Create required OVS bridges openvswitch_bridge: - bridge: "{{neutron_ovs_bridge_name}}" + bridge: "{{ item }}" + fail_mode: secure state: present - when: ovs_bridge_check.stdout.strip() == "not-exist" + with_items: "{{ neutron_ovs_bridge_name.split(',') }}" diff --git a/roles/pre-flight-checks-openstack/tasks/main.yml b/roles/pre-flight-checks-openstack/tasks/main.yml index 51ef3e8e..f02d851c 100644 --- a/roles/pre-flight-checks-openstack/tasks/main.yml +++ b/roles/pre-flight-checks-openstack/tasks/main.yml @@ -20,6 +20,15 @@ - kvm_result is defined and kvm_result.stat.exists == False - inventory_hostname in groups['hypervisors'] +################################################## +## Include tasks that verify individual components +################################################## + +- include_tasks: prerequisites-sriov.yml + when: + - sriov == "on" + - ansible_virtualization_role == "host" + ########################################################################################### ## Verify DVR is on, step 1 in the UI is complete, import vars, and test credentials ########################################################################################### diff --git a/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml b/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml new file mode 100644 index 00000000..96af5bd9 --- /dev/null +++ b/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml @@ -0,0 +1,35 @@ +--- +- name: Fail on incompatible CPU architecture + fail: + msg: "Detected {{ cpu_vendor }} CPU not supported! Must be {{ supported_cpus }}." + when: + - cpu_vendor not in supported_cpus + +- name: Register IOMMU DMARs + find: paths=/sys/class/iommu file_type=directory patterns="*" + register: iommus + +# We need to +# - Check to see if IOMMU is already enabled. If not, let's check grub (and break out iommu/pt) +# - Check for SRIOV compatibility (via NIC) +# - Update grub and reboot if necessary. Wait for reboot. + +- name: Check GRUB defaults and enable IOMMU if necessary + lineinfile: + path: /etc/default/grub + backrefs: true + regexp: '^GRUB_CMDLINE_LINUX="((?!.*{{ iommu_kernel_cmds }}).*)"$' + line: 'GRUB_CMDLINE_LINUX="\1 {{ iommu_kernel_cmds }}"' + backup: yes + register: grub + +- name: Update GRUB config + command: update-grub + +- name: Fail if IOMMU is not enabled + fail: + msg: | + IOMMU is not currently enabled in the kernel but has been configured. + Please reboot the host and rerun Express. + Refer to https://platform9.com/knowledge/KB12345 + when: iommus.examined < 1 diff --git a/roles/pre-flight-checks-openstack/vars/main.yml b/roles/pre-flight-checks-openstack/vars/main.yml new file mode 100644 index 00000000..1d93b9c4 --- /dev/null +++ b/roles/pre-flight-checks-openstack/vars/main.yml @@ -0,0 +1,7 @@ +--- +# SR-IOV support requires Intel or AMD CPUs +cpu_vendor: "{{ ansible_facts['processor'][1] | lower | regex_replace('(authentic)|(genuine)', '') }}" +iommu_kernel_cmds: '{{ cpu_vendor }}_iommu=on iommu=pt' +supported_cpus: + - intel + - amd From def929fa4ed6fcb1e63c738a1a8a1b948e755f35 Mon Sep 17 00:00:00 2001 From: James Denton Date: Thu, 18 Jul 2019 17:29:54 +0000 Subject: [PATCH 02/15] Updated documentation --- README.md | 141 +++++++++++++------ docs/SRIOV.md | 236 ++++++++++++++++++++++++++++++++ host_vars/compute01.yml.example | 3 + host_vars/compute02.yml.example | 4 + 4 files changed, 345 insertions(+), 39 deletions(-) create mode 100644 docs/SRIOV.md create mode 100644 host_vars/compute01.yml.example create mode 100644 host_vars/compute02.yml.example diff --git a/README.md b/README.md index 43bb599a..712dee46 100644 --- a/README.md +++ b/README.md @@ -1,68 +1,104 @@ # Platform9 Express -Platform9 Express (pf9-express) is a Customer Success developed tool for bringing hosts under management by a Platform9 management plane. It can bring a host to the point where it shows up in the Clarity UI as a host waiting to be authorized, or it can (optionally) perform Platform9 role deployments for both OpenStack and Kubernetes. Platform9 Express includes a CLI and can be installed on a CentOS or Ubuntu control host. + +Platform9 Express (**pf9-express**) is a Customer Success developed tool for bringing hosts under management by a Platform9 management plane. It can bring a host to the point where it shows up in the Clarity UI as a host waiting to be authorized, or it can (optionally) perform Platform9 role deployments for both OpenStack and Kubernetes. Platform9 Express includes a CLI and can be installed on a CentOS or Ubuntu control host. + +### Table of Contents + +- [Prerequisites](#prerequisites) +- [Installing Express on Control Host](#installation) +- [Configuring Access to the Management Plane](#configure-access-to-the-management-plane-cli-only) +- [Install Prerequisite Packages on Control Host](#install-prerequisite-packages) +- [Configuring the Inventory](#configuring-the-inventory-cli-only) +- [CSV Import](#csv-import) +- [Running Platform9 Express](#running-platform9-express) + +#### Advanced Topics + +- [Overriding Variables](#overriding-inventory-variables) +- [Using SR-IOV](docs/SRIOV.md) ## Prerequisites + Platform9 Express must be installed on a control host with IP connectivity to the hosts to be brought under management. CentOS 7.4+, Ubuntu 16.04, or Ubuntu 18.04 are supported on the control host. Before installing Platform9 Express, you'll need administrator credentials for the Platform9 management plane. If a proxy is required for HTTP/HTTPS traffic, you'll need the URL for the proxy. +> There are strict requirements for hosts whose software is deployed by Platform9 Express. Please refer to your Customer Success team for further details. + ## Installation + Perform the following steps to install Platform9 Express: -1. Login as root (or a user with sudo access) on the host that you plan to install Platform9 Express on. +1. Login as **root** (or a user with sudo access) on the host that you plan to install Platform9 Express on. + +2. Install **git** -2. Install git ``` yum install git # CentOS apt update && apt install git # Ubuntu ``` -3. Clone the Platform9 Express repository. +3. Clone the Platform9 Express repository. ``` git clone https://github.com/platform9/express.git /opt/pf9-express ``` -NOTE: In this example, the installation directory is /opt/pf9-express, but any directory can be used. + +> In this example, the installation directory is **/opt/pf9-express**, but any directory can be used. ## Configure Access to the Management Plane (CLI Only) -To configure the Platform9 Express CLI to communicate with the Platform9 management plane, run the following command (a sample session is included): + +To configure the Platform9 Express CLI to communicate with the Platform9 management plane, run the following command: + +``` +./pf9-express -s +``` + +Example: ``` # ./pf9-express -s NOTE: to enter a NULL value for prompt, enter '-' - + PF9 Management Plane URL [https://company.platform9.net]: --> accepted: https://company.platform9.net - + Admin Username [user@company.com]: --> accepted: user@company.com - + Admin Password [********]: --> accepted: ******** - + Region [Sunnyvale]: --> accepted: Sunnyvale - + Tenant [service]: --> accepted: service - + Manage Hostname [true false] [false]: --> accepted: false - + Manage DNS Resolver [true false] [false]: --> accepted: false - + DNS Resolver 1 [8.8.8.8]: --> accepted: 8.8.8.8 - + DNS Resolver 2 [8.8.4.4]: --> accepted: 8.8.4.4 - + Proxy URL: --> accepted: - ``` ## Install Prerequisite Packages -To install prerequisite packages on the Platform9 Express control host, run the following command (a sample session is included): + +To install prerequisite packages on the Platform9 Express control host, run the following command: + +``` +./pf9-express -i +``` + +Example: ``` # ./pf9-express -i @@ -70,13 +106,20 @@ To install prerequisite packages on the Platform9 Express control host, run the --> Validating package dependencies: epel-release ntp nginx gcc python-devel python2-pip bc shade docker-py ansible ``` -## Configuration Inventory (CLI Only) -Platform9 Express uses Ansible to execute commands on the hosts to be taken under management. In order to configure Ansible to run remote commands on the managed hosts, the Ansible Inventory file must be configured. This file is located in /opt/pf9-express/inventory/hosts. +## Configuring the Inventory (CLI Only) + +Platform9 Express uses Ansible to execute commands on the hosts to be taken under management. In order to configure Ansible to run remote commands on the managed hosts, the Ansible Inventory file must be configured. This file is located in **/opt/pf9-express/inventory/hosts**. -NOTE: A sample template is installed in the previous command ("./pf9-express -s"). A breakdown of the Inventory File is below: +> Platform9 Express supports Ansible's `group_vars` and `host_vars` methods of defining variables. + +A sample template is installed in the setup command (**./pf9-express -s**). A breakdown of the inventory file is below: ## Sample Inventory File Part 1 - Authentication Portion -This is where you enter the credentials for your control host to log into the target VM hosts to be managed by the Platform9 management plane (through either a password or SSH key, comment out any password lines if using SSH authentication and vice versa as needed) + +This is where you enter the credentials for your control host to log into the target hosts to be managed by the Platform9 management plane. + +> When using password authentication, comment out `ansible_ssh_private_key_file`. When using a private key, comment out `ansible_sudo_pass`. + ``` ## ## Ansible Inventory @@ -90,7 +133,9 @@ ansible_ssh_pass=winterwonderland ``` ## Sample Inventory File Part 2 - Network Portion -This is where you can configure optional network settings to create a bond with single or multiple interfaces. + +This is where you can configure optional network settings to create a bond with single or multiple interfaces. + ``` ################################################################################################ ## Optional Settings @@ -112,7 +157,9 @@ cv01 bond_members='["eth1","eth2"]' bond_sub_interfaces='[{"vlanid":"100","ip":" ``` ## Sample Inventory File Part 3 - OpenStack Portion + You can configure the OpenStack hosts and their pertinent roles (Hypervisor, Image Host, Storage Host, DNS Host) + ``` ################################################################################################ ## OpenStack Groups @@ -131,7 +178,7 @@ cinder hv01 ansible_host=10.0.0.11 vm_console_ip=10.0.0.11 ha_cluster_ip=10.0.1.11 tunnel_ip=10.0.2.11 dhcp=on snat=on hv02 ansible_host=10.0.0.12 vm_console_ip=10.0.0.12 tunnel_ip=10.0.2.12 dhcp=on snat=on hv03 ansible_host=10.0.0.13 vm_console_ip=10.0.0.13 tunnel_ip=10.0.2.13 -hv04 ansible_host=10.0.0.14 +hv04 ansible_host=10.0.0.14 ## global variables defined in group_vars/glance.yml ## note: if the following variables are not defined, the value of ansible_host will be inherited @@ -153,7 +200,9 @@ hv02 cinder_ip=10.0.4.14 pvs=["/dev/sdb","/dev/sdc","/dev/sdd","/dev/sde"] ``` ## Sample Inventory File Part 4 - Kubernetes Portion -This is where you can configure your Kubernetes cluster members under their own roles (either master or worker). For a worker, you can optionally add it into a running cluster using the "cluster_uuid" variable. For any new workers, you can omit this variable assignment. + +This is where you can configure your Kubernetes cluster members under their own roles (either master or worker). For a worker, you can optionally add it into a running cluster using the **cluster_uuid** variable. For any new workers, you can omit this variable assignment. + ``` ################################################################################################ ## Kubernetes Groups @@ -176,9 +225,11 @@ cv05 ansible_host=10.0.0.19 cluster_uuid=7273706d-afd5-44ea-8fbf-901ceb6bef27 ``` ## CSV Import -Instead of manually configuring the inventory file, you can use the '-f ' option to auto-configure it from a CSV definition file. + +Instead of manually configuring the inventory file, you can use the **-f ** option to auto-configure it from a CSV definition file. Here's a sample CSV definition file: + ``` hostname,username,key,ip,dhcp,snat,glance,glance-public,nic1,nic2,mgmtvlan,mgmtip,mgmtnetmask,Storagevlan,storageip,storagenetmask,tunnelvlan,tunnelip,tunnelnetmask fake01,centos,~/.ssh/id_rsa,172.16.7.182,TRUE,TRUE,TRUE,TRUE,ens160,,243,172.16.243.11,255.255.255.0,244,172.16.244.11,255.255.255.0,245,172.16.245.11,255.255.255.0 @@ -186,18 +237,22 @@ fake02,ubuntu,~/.ssh/id_rsa,172.16.7.47,TRUE,FALSE,FALSE,FALSE,ens192,,243,172.1 ``` ## Controlling UID/GID for the Platform9 Host Agent -If you want to control the UID and GID values for the Platform9 service account (pf9/pf9group), set the following inventory variables: + +If you want to control the UID and GID values for the Platform9 service account (pf9:pf9group), set the following inventory variables: + * pf9_uid * pf9_gid If these variables are not defined, the Host Agent Installer will allow the system to auto-assign the UID and GID. -NOTE: This feature is not idempotent. If the 'pf9' user had not been created yet, Platform9 Express will create the 'pf9' user and 'pf9group' group based on the values of pf9_uid and pf9_gid. If the 'pf9' user already exists, Platform9 Express will skip the user/group management section; it will not attempt to alter the UID/GID settings. +> This feature is not idempotent. If the **pf9** user had not been created yet, Platform9 Express will create the **pf9** user and **pf9group** group based on the values of **pf9_uid** and **pf9_gid**. If the **pf9** user already exists, Platform9 Express will skip the user/group management section; it will not attempt to alter the UID/GID settings. ## Running Platform9 Express -The basic syntax for starting Platform9 Express includes a target (host group, individual host, comma-delimited list of hosts, or "all" to run all groups) and an optional flag ('-a') that instructs it to perform role deployment. + +The basic syntax for starting Platform9 Express includes a target (host group, individual host, comma-delimited list of hosts, or "all" to run all groups) and an optional flag (**-a**) that instructs it to perform role deployment. Here's an example of invoking Platform9 Express against a number of hosts without registering them automatically to the management plane: + ``` # ./pf9-express hv01,hv02,hv03 ################################################################ @@ -207,13 +262,15 @@ Here's an example of invoking Platform9 Express against a number of hosts withou --> Validating package dependencies: epel-release ntp nginx gcc python-devel python2-pip bc shade docker-py ansible setupd --> Updating setupd libraries: pf9_master_setup.py pf9_utils.py pf9_mgmt_setup.py attach-node add-cluster --> ansible_version = 2.5 - + [Executing: ansible-playbook ./pf9-express.yml] . . . ``` -Here's an example of invoking Platform9 Express against a single host group (host groups are either "pmo" for OpenStack and "pmk" for Kubernetes), performing role deployments (based on metadata defined in /opt/pf9-express/inventory/hosts), and registering them automatically to the management plane + +Here's an example of invoking Platform9 Express against a single host group (host groups are either "pmo" for OpenStack and "pmk" for Kubernetes), performing role deployments (based on metadata defined in **/opt/pf9-express/inventory/hosts**), and registering them automatically to the management plane + ``` # ./pf9-express -a pmk ################################################################ @@ -223,13 +280,15 @@ Here's an example of invoking Platform9 Express against a single host group (hos --> Validating package dependencies: epel-release ntp nginx gcc python-devel python2-pip bc shade docker-py ansible setupd --> Updating setupd libraries: pf9_master_setup.py pf9_utils.py pf9_mgmt_setup.py attach-node add-cluster --> ansible_version = 2.5 - + [Executing: ansible-playbook ./pf9-express.yml] . . . ``` -Here's an example of invoking Platform9 Express against all host groups and performing role deployments (based on metadata defined in /opt/pf9-express/inventory/hosts): + +Here's an example of invoking Platform9 Express against all host groups and performing role deployments (based on metadata defined in **/opt/pf9-express/inventory/hosts**): + ``` # ./pf9-express -a all ################################################################ @@ -239,19 +298,21 @@ Here's an example of invoking Platform9 Express against all host groups and perf --> Validating package dependencies: epel-release ntp nginx gcc python-devel python2-pip bc shade docker-py ansible setupd --> Updating setupd libraries: pf9_master_setup.py pf9_utils.py pf9_mgmt_setup.py attach-node add-cluster --> ansible_version = 2.5 - + [Executing: ansible-playbook ./pf9-express.yml] . . . ``` + Here's the usage statement showing all command-line options: + ``` # ./pf9-express Usage: ./pf9-express [Args] - + Args (Optional): - + -a|--autoRegister : auto-register host with management plane -i|--installPrereqs : install pre-requisites and exit -s|--setup : run setup and exit @@ -264,20 +325,22 @@ Args (Optional): -h|--help : display this message ``` -## Managing Multiple Cloud Management Regions (DUs) -If you have more than one Platform9 region to manage, you can create a configuration file for each one (using pf9-express.conf as a template) and start pf9-express with the '-c' flag: + +If you have more than one Platform9 region to manage, you can create a configuration file for each one (using pf9-express.conf as a template) and start **pf9-express** with the **-c** flag: ``` ./pf9-express -c ~/pf9-site1.conf -a hv01 ``` ## Overriding Inventory Variables -If you want to override an Ansible variable defined in Inventory or dynamically within playbooks, you can invoke pf9-express with the '-e' flag: + +If you want to override an Ansible variable defined in Inventory or dynamically within playbooks, you can invoke **pf9-express** with the **-e** flag: ``` ./pf9-express -c ~/pf9-express.conf -a -e "proxy_url=https://proxy1.platform9.net" hv01 ``` -NOTE: Variables passed as extra-vars have the highest precedence. + +> Variables passed as extra-vars have the highest precedence. ## License diff --git a/docs/SRIOV.md b/docs/SRIOV.md new file mode 100644 index 00000000..15f69715 --- /dev/null +++ b/docs/SRIOV.md @@ -0,0 +1,236 @@ +# Using SR-IOV with Platform9 Express + +In PMO version 3.11.x, support for SR-IOV has been introduced. SR-IOV provides increased network performance including higher throughput, lower latency, and lower jitter when compared to virtual switching technologies such as Open vSwitch. + +SR-IOV is supported by multiple network interface cards (NICs) provided by many networking vendors, including Intel, Cisco, Mellanox, Broadcom, QLogic, and others. + +The following NICs have been tested with the Platform9 PMO 3.11 release: + +* Mellanox ConnectX-4 Lx EN +* Mellanox ConnectX-5 EN +* Intel X520 +* Intel X540-T2 +* Broadcom NetXtreme II (BCM57810 / HP 533FLR-T) + +## Limitations + +The following are a few of the limitations of SR-IOV: + +* Bonded NICs at the host-level are not recommended/not supported for use with SR-IOV. While active/passive bonding may work in this configuration, LACP/802.3ad is definitely not a supported configuration. +* Virtual Functions are automatically assigned to Neutron ports and are not customizable. +* Instance-level NIC bonding using Virtual Functions is not supported. +* Port security/security groups are not supported. +* VLAN networks are required. Flat (untagged) and overlay networks are not supported. + +## System Prerequisites + +SR-IOV requires the following: + +* BIOS Support (configuration varies by vendor) +* Kernel IOMMU Support +* Kernel IOMMU Passthrough support +* Compatible Network Interface Card (NIC) + +> When SR-IOV capable NICs are used in conjunction with Open vSwitch bridges, you have the option of using an existing provider label, such as **external**, or using a dedicated provider. When sharing a provider network between SR-IOV and non-SR-IOV ports, communication between the ports on the same network is permitted. Using a dedicated provider will require you to call out a second bridge mapping, such as `sriov:br-sriov`, to allow DHCP ports connected to a vSwitch to communicate with the SR-IOV ports. + +### Kernel IOMMU Support + +Using **dmesg**, you can verify if IOMMU is enabled with the following command: + +``` +# dmesg | grep IOMMU +``` + +If you do not see the message ```DMAR: IOMMU enabled```, then proceed with the following steps: + +First, enable IOMMU support in the kernel by modifying the GRUB configuration at **/etc/default/grub**: + +``` +GRUB_CMDLINE_LINUX="... intel_iommu=on" #Intel-based Systems + +GRUB_CMDLINE_LINUX="... amd_iommu=on" #AMD-based Systems +``` + +Next, update GRUB: + +``` +update-grub +``` + +> Once the kernel configuration has been modified, you must reboot for the changes to take effect. + +### IOMMU Passthrough Support + +To enable IOMMU passthrough support in the kernel, please complete the following steps: + +First, enable passthrough support in the kernel by modifying the GRUB configuration at **/etc/default/grub**: + +``` +GRUB_CMDLINE_LINUX="... iommu=pt" +``` + +Then, update GRUB: + +``` +update-grub +``` + +> Once the kernel configuration has been modified, you must reboot for the changes to take effect. + +## Deploying PMO with SR-IOV support using Express + +Using the Platform9 Express tool, operators can deploy PMO with support for SR-IOV. The Express tool will perform many of the tasks outlined in the previous sections, including enabling IOMMU and passthrough support in the kernel, as well as implementing a unit file for persisting VFs across reboots. + +> Given the complexity involved in supporting Mellanox NICs, the Express tool will initially only support Intel NICs using the ixgbe driver. This includes the Intel X520, X540, and X550 families. + +The necessary configuration details can be implemented globally using **group_vars**, or on an individual host basis using **host_vars**. Each method is described below. + +### Host Variables + +Compute node-specific configurations can be implemented using what is known as host_vars. Configurations that may vary between hosts include: + +* Network interface name +* Quantity of network interfaces used for SRIOV +* Provider network mappings +* +Using **host_vars**, the following are some variables that can be modified: + +* physical_device_mappings (required) +* neutron_ovs_bridge_mappings (optional) + +In this example, two hosts have different NICs installed that report different names to the operating system. + +``` +root@compute01:~# ip link show +... +6: ens1: mtu 1500 qdisc mq portid 0002c90300ffe511 state UP mode DEFAULT group default qlen 1000 + link/ether 00:02:c9:ff:e5:10 brd ff:ff:ff:ff:ff:ff +7: ens1d1: mtu 1500 qdisc mq portid 0002c90300ffe512 state UP mode DEFAULT group default qlen 1000 + link/ether 00:02:c9:ff:e5:11 brd ff:ff:ff:ff:ff:ff +``` + +``` +root@compute02:~# ip link show +... +3: ens1f0: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 + link/ether 90:e2:ba:a2:1b:88 brd ff:ff:ff:ff:ff:ff +5: ens1f1: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 + link/ether 90:e2:ba:a2:1b:89 brd ff:ff:ff:ff:ff:ff +``` + +NIC naming can vary based on the kernel version, NIC driver, and the PCI slot where the card is installed. In this example, the NIC installed in each host is from a different manufacturer and uses a different driver: + +``` +root@compute01:~# ethtool -i ens1 +driver: mlx4_en +version: 4.0-0 +firmware-version: 2.42.5000 +expansion-rom-version: +bus-info: 0000:08:00.0 +supports-statistics: yes +supports-test: yes +supports-eeprom-access: no +supports-register-dump: no +supports-priv-flags: yes +``` + +``` +root@compute02:~# ethtool -i ens1f0 +driver: ixgbe +version: 5.1.0-k +firmware-version: 0x61bd0001 +expansion-rom-version: +bus-info: 0000:08:00.0 +supports-statistics: yes +supports-test: yes +supports-eeprom-access: yes +supports-register-dump: yes +supports-priv-flags: yes +``` + +The **host_vars** for each host can be implemented in a file that corresponds to the host's short name located at **/opt/pf9-express/host_vars/.yml**. In the following example, **compute01** uses a single network interface for SR-IOV, while **compute02** uses two. SR-IOV networks will leverage a new provider label named **sriov**, as shown here: + +``` +--- +# compute01.yml +physical_device_mappings: + - sriov:ens1 +``` + +``` +--- +# compute02.yml +physical_device_mappings: + - sriov:ens1f0 + - sriov:ens1f1 +``` + +> SR-IOV supports VLAN networks only. Flat and overlay networks are not supported. + +### Group Variables + +Group-wide configurations can be implemented using what is known as **group_vars**. Configurations that may be consistent between groups include: + +* Network interface name +* Quantity of network interfaces used for SRIOV +* Provider network mappings + +Using **group_vars**, the following are some variables that can be modified: + +* neutron_ovs_bridge_mappings +* physical_device_mappings + +The **group_vars** for the **hypervisors** group can be implemented in a file that corresponds to the group's name located at **/opt/pf9-express/group_vars/.yml**. In the following example, every host in the **hypervisors** group has the same NIC installed in the same slot, so the naming convention is consistent across all hosts. A second provider bridge mapping has been established that will allow non-SR-IOV capable ports, such as DHCP, to connect to a vSwitch and communicate with SR-IOV ports: + +``` +--- +# hypervisors.yml +... +neutron_ovs_bridge_mappings: "external:br-pf9, sriov:br-sriov" +physical_device_mappings: + - sriov:ens1f0 + - sriov:ens1f1 +... +``` + +> Host vars take precedence over group vars. If a small number of hosts vary from the greater group, feel free to implement the respective **host_vars** files accordingly. + +### Inventory File + +To enable support for SR-IOV on a host, the inventory must be modified according so that SR-IOV related tasks are executed. One method of enabling support for a host is to add the **sriov=on** variable to an individual host in the **hypervisors** group, as shown here: + +``` +[hypervisors] +compute01 ansible_host=10.50.0.197 vm_console_ip=10.50.0.197 ha_cluster_ip=10.50.0.197 tunnel_ip=10.50.0.197 dhcp=on snat=on sriov=on +compute02 ansible_host=10.50.0.196 vm_console_ip=10.50.0.196 tunnel_ip=10.50.0.196 dhcp=on snat=on sriov=on +``` + +SR-IOV can be enabled group-wide by modifying the respective **group_vars** file, as shown here: + +``` +--- +# hypervisors.yml +... +#################### +# SRIOV +#################### +sriov: "on" +... +``` + +Lastly, SR-IOV can be enabled via the respective **host_vars** file, as shown here: + +``` +--- +# compute01.yml### +sriov: "on" +physical_device_mappings: + - sriov:ens1 +``` + +### Installation +Once the respective configuration is in place, install PMO with Express using some variation of the following: + +``` +# ./pf9-express -a pmo +``` diff --git a/host_vars/compute01.yml.example b/host_vars/compute01.yml.example new file mode 100644 index 00000000..7c7e3246 --- /dev/null +++ b/host_vars/compute01.yml.example @@ -0,0 +1,3 @@ +--- +physical_device_mappings: + - sriov:ens1 diff --git a/host_vars/compute02.yml.example b/host_vars/compute02.yml.example new file mode 100644 index 00000000..a3c00dbd --- /dev/null +++ b/host_vars/compute02.yml.example @@ -0,0 +1,4 @@ +--- +physical_device_mappings: + - sriov:ens1f0 + - sriov:ens1f1 From a8de4175f9529111d77f85453f27cf9bb2a17123 Mon Sep 17 00:00:00 2001 From: James Denton Date: Thu, 25 Jul 2019 17:54:47 +0000 Subject: [PATCH 03/15] Added logic to handle VF creation and persistence. Added additional documentation --- docs/SRIOV.md | 34 +++++++++++-- group_vars/hypervisors.yml | 4 +- host_vars/compute01.yml.example | 2 + host_vars/compute02.yml.example | 3 ++ pf9-express | 2 +- pf9-express.yml | 15 +++++- roles/neutron-prerequisites/tasks/main.yml | 5 +- roles/neutron-sriov/handlers/main.yml | 18 +++++++ roles/neutron-sriov/tasks/main.yml | 50 +++++++++++++++++++ .../tasks/prerequisites-sriov.yml | 29 ++++++++--- .../pre-flight-checks-openstack/vars/main.yml | 3 ++ 11 files changed, 148 insertions(+), 17 deletions(-) create mode 100644 roles/neutron-sriov/handlers/main.yml create mode 100644 roles/neutron-sriov/tasks/main.yml diff --git a/docs/SRIOV.md b/docs/SRIOV.md index 15f69715..81f76951 100644 --- a/docs/SRIOV.md +++ b/docs/SRIOV.md @@ -4,7 +4,7 @@ In PMO version 3.11.x, support for SR-IOV has been introduced. SR-IOV provides i SR-IOV is supported by multiple network interface cards (NICs) provided by many networking vendors, including Intel, Cisco, Mellanox, Broadcom, QLogic, and others. -The following NICs have been tested with the Platform9 PMO 3.11 release: +The following NICs have been tested with the Platform9 PMO 3.11.3 release: * Mellanox ConnectX-4 Lx EN * Mellanox ConnectX-5 EN @@ -12,6 +12,13 @@ The following NICs have been tested with the Platform9 PMO 3.11 release: * Intel X540-T2 * Broadcom NetXtreme II (BCM57810 / HP 533FLR-T) +The following drivers are considered supported: + +* ixgbe +* bnx2x + +> Mellanox cards require additional configuration that is outside the scope of this guide and Platform9 Express. + ## Limitations The following are a few of the limitations of SR-IOV: @@ -92,10 +99,12 @@ Compute node-specific configurations can be implemented using what is known as h * Network interface name * Quantity of network interfaces used for SRIOV * Provider network mappings -* +* Number of VFs per interface + Using **host_vars**, the following are some variables that can be modified: * physical_device_mappings (required) +* sriov_numvfs (required) * neutron_ovs_bridge_mappings (optional) In this example, two hosts have different NICs installed that report different names to the operating system. @@ -148,13 +157,15 @@ supports-register-dump: yes supports-priv-flags: yes ``` -The **host_vars** for each host can be implemented in a file that corresponds to the host's short name located at **/opt/pf9-express/host_vars/.yml**. In the following example, **compute01** uses a single network interface for SR-IOV, while **compute02** uses two. SR-IOV networks will leverage a new provider label named **sriov**, as shown here: +The **host_vars** for each host can be implemented in a file that corresponds to the host's short name located at **/opt/pf9-express/host_vars/.yml**. In the following example, **compute01** uses a single network interface for SR-IOV, while **compute02** uses two. SR-IOV networks will leverage a new provider label named **sriov** and 8 VFs per interface, as shown here: ``` --- # compute01.yml physical_device_mappings: - sriov:ens1 +sriov_numvfs: + - ens1:8 ``` ``` @@ -163,6 +174,9 @@ physical_device_mappings: physical_device_mappings: - sriov:ens1f0 - sriov:ens1f1 +sriov_numvfs: + - ens1f0:8 + - ens1f1:8 ``` > SR-IOV supports VLAN networks only. Flat and overlay networks are not supported. @@ -172,12 +186,13 @@ physical_device_mappings: Group-wide configurations can be implemented using what is known as **group_vars**. Configurations that may be consistent between groups include: * Network interface name -* Quantity of network interfaces used for SRIOV +* Number of VFs per interface * Provider network mappings Using **group_vars**, the following are some variables that can be modified: * neutron_ovs_bridge_mappings +* sriov_numvfs * physical_device_mappings The **group_vars** for the **hypervisors** group can be implemented in a file that corresponds to the group's name located at **/opt/pf9-express/group_vars/.yml**. In the following example, every host in the **hypervisors** group has the same NIC installed in the same slot, so the naming convention is consistent across all hosts. A second provider bridge mapping has been established that will allow non-SR-IOV capable ports, such as DHCP, to connect to a vSwitch and communicate with SR-IOV ports: @@ -190,6 +205,9 @@ neutron_ovs_bridge_mappings: "external:br-pf9, sriov:br-sriov" physical_device_mappings: - sriov:ens1f0 - sriov:ens1f1 +sriov_numvfs: + - ens1f0:8 + - ens1f1:8 ... ``` @@ -222,7 +240,7 @@ Lastly, SR-IOV can be enabled via the respective **host_vars** file, as shown he ``` --- -# compute01.yml### +# compute01.yml sriov: "on" physical_device_mappings: - sriov:ens1 @@ -234,3 +252,9 @@ Once the respective configuration is in place, install PMO with Express using so ``` # ./pf9-express -a pmo ``` + +To refresh VFs, run **pf9-express** with the **refresh-sriov** tag: + +``` +# ./pf9-express -t refresh-sriov hypervisors +``` diff --git a/group_vars/hypervisors.yml b/group_vars/hypervisors.yml index f4f0ed0c..bccbeac4 100644 --- a/group_vars/hypervisors.yml +++ b/group_vars/hypervisors.yml @@ -11,8 +11,8 @@ glance: "off" multipath: False nova_instances_path: /opt/pf9/data/instances/ neutron_ovs_allow_dhcp_vms: "False" -neutron_ovs_bridge_name: "br-pf9, br-sriov" -neutron_ovs_bridge_mappings: "external:br-pf9, sriov:br-sriov" +#neutron_ovs_bridge_name: "br-pf9,br-sriov" +neutron_ovs_bridge_mappings: "external:br-pf9" ceilometer_customize: False ceilometer_cpu_interval: 600 diff --git a/host_vars/compute01.yml.example b/host_vars/compute01.yml.example index 7c7e3246..d82d6132 100644 --- a/host_vars/compute01.yml.example +++ b/host_vars/compute01.yml.example @@ -1,3 +1,5 @@ --- physical_device_mappings: - sriov:ens1 +sriov_numvfs: + - ens1:8 diff --git a/host_vars/compute02.yml.example b/host_vars/compute02.yml.example index a3c00dbd..ffc867ee 100644 --- a/host_vars/compute02.yml.example +++ b/host_vars/compute02.yml.example @@ -2,3 +2,6 @@ physical_device_mappings: - sriov:ens1f0 - sriov:ens1f1 +sriov_numvfs: + - ens1f0:8 + - ens1f1:8 diff --git a/pf9-express b/pf9-express index 6cdaaf70..fb32388d 100755 --- a/pf9-express +++ b/pf9-express @@ -514,7 +514,7 @@ while [ $# -gt 0 ]; do tags=${2} for tag in $(echo ${tags} | sed -e 's/,/ /g'); do case ${tag} in - live-migration|image-import) + live-migration|image-import|refresh-sriov) ;; *) assert "invalid tag : '${tag}'" diff --git a/pf9-express.yml b/pf9-express.yml index cc54a24f..9f0f119c 100644 --- a/pf9-express.yml +++ b/pf9-express.yml @@ -174,4 +174,17 @@ - k8s_worker become: true roles: - - post-hook \ No newline at end of file + - post-hook + +# Run SR-IOV role +- hosts: + - hypervisors + become: true + tasks: + - import_role: + name: neutron-sriov + when: + - sriov == "on" + - ansible_virtualization_role == "host" + tags: + - refresh-sriov diff --git a/roles/neutron-prerequisites/tasks/main.yml b/roles/neutron-prerequisites/tasks/main.yml index 457f7256..04379cbd 100644 --- a/roles/neutron-prerequisites/tasks/main.yml +++ b/roles/neutron-prerequisites/tasks/main.yml @@ -19,8 +19,9 @@ - name: Create required OVS bridges openvswitch_bridge: - bridge: "{{ item }}" + bridge: "{{ item.split(':')[1] }}" fail_mode: secure state: present - with_items: "{{ neutron_ovs_bridge_name.split(',') }}" + with_items: "{{ neutron_ovs_bridge_mappings.split(',') }}" +# with_items: "{{ neutron_ovs_bridge_name.split(',') }}" diff --git a/roles/neutron-sriov/handlers/main.yml b/roles/neutron-sriov/handlers/main.yml new file mode 100644 index 00000000..3a9cee75 --- /dev/null +++ b/roles/neutron-sriov/handlers/main.yml @@ -0,0 +1,18 @@ +--- +- name: Restart sysfsutils + systemd: + name: sysfsutils.service + state: restarted + listen: restart_sysfsutils + +- name: Restart pf9-ostackhost + systemd: + name: pf9-ostackhost.service + state: restarted + listen: restart_ostackhost + +- name: Restart pf9-sriov-agent + systemd: + name: pf9-neutron-sriov-agent.service + state: restarted + listen: restart_neutronsriovagent diff --git a/roles/neutron-sriov/tasks/main.yml b/roles/neutron-sriov/tasks/main.yml new file mode 100644 index 00000000..7582e7aa --- /dev/null +++ b/roles/neutron-sriov/tasks/main.yml @@ -0,0 +1,50 @@ +--- +# SR-IOV virtual functions get reset at boot unless commands exist in +# rc.local (deprecated), a systemctl unit file, or sysfs.conf. We set it +# up in sysfs. + +# Get current VF count in running sysfs +- name: Get current VF count for interface + slurp: + path: "/sys/class/net/{{ item.split(':')[0] }}/device/sriov_numvfs" + register: slurp_vfs + with_items: "{{ sriov_numvfs }}" + +- set_fact: + current_vfs: "{{ current_vfs|default({}) | combine({item.item.split(':')[0]:item.content | b64decode | replace('\n', '')}) }}" + with_items: "{{ slurp_vfs.results }}" + +# Remove entries in sysfs when count changed. This WILL break connectivity +# for instances using VFs on the interface until the instance is shutoff +# or hard rebooted! +- name: Remove existing entries for interface from sysfs + lineinfile: + path: /etc/sysfs.conf + state: absent + regexp: "^class\\/net\\/{{ item.split(':')[0] }}\\/device\\/sriov_numvfs = .*" + with_items: "{{ sriov_numvfs }}" + when: current_vfs[item.split(':')[0]] != item.split(':')[1] + +- name: Set VFs to 0 to work around I/O error when count is changed + lineinfile: + path: /etc/sysfs.conf + line: "class/net/{{ item.split(':')[0] }}/device/sriov_numvfs = 0" + create: yes + with_items: "{{ sriov_numvfs }}" + +- name: Add VFs to sysfs.conf + lineinfile: + path: /etc/sysfs.conf + insertafter: "^class\\/net\\/{{ item.split(':')[0] }}\\/device\\/sriov_numvfs = 0" + line: "class/net/{{ item.split(':')[0] }}/device/sriov_numvfs = {{ item.split(':')[1] }}" + create: yes + with_items: "{{ sriov_numvfs }}" + register: sysfs_vfs + notify: + - restart_sysfsutils + - restart_ostackhost + - restart_neutronsriovagent + +- debug: + msg: "ALERT - VFs on {{ ansible_hostname }} changed. Instances using SR-IOV ports must be shutdown or hard rebooted for interfaces to be reconnected." + when: sysfs_vfs.changed diff --git a/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml b/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml index 96af5bd9..a9d2d7a4 100644 --- a/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml +++ b/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml @@ -1,4 +1,8 @@ --- +# There are prerequisites for SR-IOV support, including IOMMU enabled, +# passthrough, and driver support. Many drivers support SR-IOV, but some +# require out-of-kernel drivers (ie. Mellanox) and some don't support sysfs. + - name: Fail on incompatible CPU architecture fail: msg: "Detected {{ cpu_vendor }} CPU not supported! Must be {{ supported_cpus }}." @@ -11,8 +15,6 @@ # We need to # - Check to see if IOMMU is already enabled. If not, let's check grub (and break out iommu/pt) -# - Check for SRIOV compatibility (via NIC) -# - Update grub and reboot if necessary. Wait for reboot. - name: Check GRUB defaults and enable IOMMU if necessary lineinfile: @@ -28,8 +30,23 @@ - name: Fail if IOMMU is not enabled fail: - msg: | - IOMMU is not currently enabled in the kernel but has been configured. - Please reboot the host and rerun Express. - Refer to https://platform9.com/knowledge/KB12345 + msg: + - "IOMMU is not currently enabled in the kernel but has been configured. Please reboot the host and rerun Express. Refer to https://platform9.com/knowledge/KB12345 for assistance." when: iommus.examined < 1 + +# Warn if NIC driver is not supported +- name: Determine driver bound to NICs + find: + paths: "/sys/class/net/{{ item.split(':')[1] }}/device/driver/module/drivers" + file_type: link + register: find_result + with_items: "{{ physical_device_mappings }}" + +- set_fact: + nic_driver: "{{ (item.path | basename).split(':')[1] }}" + with_items: "{{ find_result.results[0].files }}" + +- debug: + msg: + - "The {{ nic_driver }} NIC driver is not currently supported by Platform9. Refer to https://platform9.com/knowledge/KB12345 for assistance." + when: nic_driver not in supported_nic_drivers diff --git a/roles/pre-flight-checks-openstack/vars/main.yml b/roles/pre-flight-checks-openstack/vars/main.yml index 1d93b9c4..3e21d7cf 100644 --- a/roles/pre-flight-checks-openstack/vars/main.yml +++ b/roles/pre-flight-checks-openstack/vars/main.yml @@ -5,3 +5,6 @@ iommu_kernel_cmds: '{{ cpu_vendor }}_iommu=on iommu=pt' supported_cpus: - intel - amd +supported_nic_drivers: + - ixgbe + - bnx2x From 95e7f34ed83150bccea8c7dd6bc8929588272b8e Mon Sep 17 00:00:00 2001 From: James Denton Date: Thu, 18 Jul 2019 13:03:55 +0000 Subject: [PATCH 04/15] Initial commit for SRIOV support with Express --- group_vars/hypervisors.yml | 12 +++++-- pf9-express.yml | 9 ++++- roles/map-role/tasks/main.yml | 9 +++++ .../templates/pf9-neutron-ovs-agent.j2 | 8 +++-- .../templates/pf9-neutron-sriov-agent.j2 | 5 +++ .../templates/pf9-ostackhost-neutron.j2 | 5 ++- roles/neutron-prerequisites/tasks/main.yml | 9 ++--- .../tasks/main.yml | 9 +++++ .../tasks/prerequisites-sriov.yml | 35 +++++++++++++++++++ .../pre-flight-checks-openstack/vars/main.yml | 7 ++++ 10 files changed, 96 insertions(+), 12 deletions(-) create mode 100644 roles/map-role/templates/pf9-neutron-sriov-agent.j2 create mode 100644 roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml create mode 100644 roles/pre-flight-checks-openstack/vars/main.yml diff --git a/group_vars/hypervisors.yml b/group_vars/hypervisors.yml index 2942f3ec..f4f0ed0c 100644 --- a/group_vars/hypervisors.yml +++ b/group_vars/hypervisors.yml @@ -11,7 +11,15 @@ glance: "off" multipath: False nova_instances_path: /opt/pf9/data/instances/ neutron_ovs_allow_dhcp_vms: "False" -neutron_ovs_bridge_name: "br-pf9" -neutron_ovs_bridge_mappings: "external:br-pf9" +neutron_ovs_bridge_name: "br-pf9, br-sriov" +neutron_ovs_bridge_mappings: "external:br-pf9, sriov:br-sriov" ceilometer_customize: False ceilometer_cpu_interval: 600 + +#################### +# SRIOV +#################### + +sriov: "on" +physical_device_mappings: "" +pci_passthrough_whitelist: "{}" diff --git a/pf9-express.yml b/pf9-express.yml index 805beb7c..eec58e03 100644 --- a/pf9-express.yml +++ b/pf9-express.yml @@ -85,11 +85,17 @@ - { role: "bond-config", when: manage_network == True } - { role: "pf9-auth", when: manage_network == True and ansible_distribution == "Ubuntu" } -# OpenStack Hypervisor Nodes +# OpenStack Hypervisor Nodes - Pre-Flight Checks +# Runs pre-checks and prompts users to reboot if necessary - hosts: hypervisors + any_errors_fatal: true become: true roles: - pre-flight-checks-openstack + +- hosts: hypervisors + become: true + roles: - common - ntp - pf9-hostagent @@ -103,6 +109,7 @@ - { role: "map-role", rolename: "pf9-neutron-l3-agent", when: autoreg == "on" } - { role: "map-role", rolename: "pf9-neutron-metadata-agent", when: autoreg == "on" } - { role: "map-role", rolename: "pf9-neutron-dhcp-agent", when: autoreg == "on" and dhcp == "on" } + - { role: "map-role", rolename: "pf9-neutron-sriov-agent", when: sriov == "on" } - { role: "wait-for-convergence", when: autoreg == "on" } - { role: "multipath", when: multipath == True } - { role: "enable-nested-virt", when: nested_virt == True } diff --git a/roles/map-role/tasks/main.yml b/roles/map-role/tasks/main.yml index cc07206a..ffb0a179 100644 --- a/roles/map-role/tasks/main.yml +++ b/roles/map-role/tasks/main.yml @@ -132,6 +132,15 @@ shell: "cat /tmp/keystone-token.txt" register: api_token +# JD DEBUG +- name: print out role json for debugging + debug: + msg: "{{ role_json }}" + +#- name: fail! +# fail: +# when: 1==1 + - name: "Assigning Role - {{rolename}}" uri: url: "https://{{ctrl_ip}}/resmgr/v1/hosts/{{host_id.stdout.strip()}}/roles/{{rolename}}" diff --git a/roles/map-role/templates/pf9-neutron-ovs-agent.j2 b/roles/map-role/templates/pf9-neutron-ovs-agent.j2 index cf495870..3d1be73e 100644 --- a/roles/map-role/templates/pf9-neutron-ovs-agent.j2 +++ b/roles/map-role/templates/pf9-neutron-ovs-agent.j2 @@ -3,9 +3,13 @@ "bridge_mappings": "{{neutron_ovs_bridge_mappings}}", "enable_distributed_routing": "{{neutron_ovs_enable_distributed_routing}}", "enable_tunneling": "{{neutron_ovs_enable_tunneling}}", -{% if neutron_tunnel_types is defined %} "local_ip": "{{tunnel_ip}}", +{% if neutron_tunnel_types is defined %} + "local_ip": "{{tunnel_ip}}", +{% endif %} +{% if sriov == "on" %} + "extensions": "fdb", + "shared_physical_device_mappings": "{% for mapping in physical_device_mappings %}{{ mapping }}{% if not loop.last %},{% endif %}{% endfor %}", {% endif %} "net_type": "{{neutron_ovs_net_type}}"{% if neutron_tunnel_types is defined %}, "tunnel_types": "{{neutron_tunnel_types}}"{%- endif %} - } diff --git a/roles/map-role/templates/pf9-neutron-sriov-agent.j2 b/roles/map-role/templates/pf9-neutron-sriov-agent.j2 new file mode 100644 index 00000000..4bf23932 --- /dev/null +++ b/roles/map-role/templates/pf9-neutron-sriov-agent.j2 @@ -0,0 +1,5 @@ +{ +{% if sriov == "on" %} + "physical_device_mappings": "{% for mapping in physical_device_mappings %}{{ mapping }}{% if not loop.last %},{% endif %}{% endfor %}" +{% endif %} +} diff --git a/roles/map-role/templates/pf9-ostackhost-neutron.j2 b/roles/map-role/templates/pf9-ostackhost-neutron.j2 index d682c455..473934f4 100644 --- a/roles/map-role/templates/pf9-ostackhost-neutron.j2 +++ b/roles/map-role/templates/pf9-ostackhost-neutron.j2 @@ -1,5 +1,8 @@ { "cluster_ip": "{{ha_cluster_ip}}", "instances_path": "{{nova_instances_path}}", - "novncproxy_base_url": "{{neutron_novncproxy_base_url}}" + "novncproxy_base_url": "{{neutron_novncproxy_base_url}}", +{% if sriov == "on" %} + "pci_passthrough_whitelist": "[{% for physical_device_mapping in physical_device_mappings %}{% set provider,devname = physical_device_mapping.split(':') %}{\"physical_network\":\"{{ provider }}\", \"devname\":\"{{ devname }}\"}{% if not loop.last %},{% endif %}{% endfor %}]" +{% endif %} } diff --git a/roles/neutron-prerequisites/tasks/main.yml b/roles/neutron-prerequisites/tasks/main.yml index 76695c8b..457f7256 100644 --- a/roles/neutron-prerequisites/tasks/main.yml +++ b/roles/neutron-prerequisites/tasks/main.yml @@ -17,13 +17,10 @@ - include: ubuntu.yml when: ansible_distribution == "Ubuntu" -- name: check if OVS bridge already exists - shell: "ifconfig -a | grep ^{{neutron_ovs_bridge_name}} > /dev/null 2>&1; if [ $? -eq 0 ]; then echo 'exists'; else echo 'not-exist'; fi" - register: ovs_bridge_check - - name: Create required OVS bridges openvswitch_bridge: - bridge: "{{neutron_ovs_bridge_name}}" + bridge: "{{ item }}" + fail_mode: secure state: present - when: ovs_bridge_check.stdout.strip() == "not-exist" + with_items: "{{ neutron_ovs_bridge_name.split(',') }}" diff --git a/roles/pre-flight-checks-openstack/tasks/main.yml b/roles/pre-flight-checks-openstack/tasks/main.yml index 51ef3e8e..f02d851c 100644 --- a/roles/pre-flight-checks-openstack/tasks/main.yml +++ b/roles/pre-flight-checks-openstack/tasks/main.yml @@ -20,6 +20,15 @@ - kvm_result is defined and kvm_result.stat.exists == False - inventory_hostname in groups['hypervisors'] +################################################## +## Include tasks that verify individual components +################################################## + +- include_tasks: prerequisites-sriov.yml + when: + - sriov == "on" + - ansible_virtualization_role == "host" + ########################################################################################### ## Verify DVR is on, step 1 in the UI is complete, import vars, and test credentials ########################################################################################### diff --git a/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml b/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml new file mode 100644 index 00000000..96af5bd9 --- /dev/null +++ b/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml @@ -0,0 +1,35 @@ +--- +- name: Fail on incompatible CPU architecture + fail: + msg: "Detected {{ cpu_vendor }} CPU not supported! Must be {{ supported_cpus }}." + when: + - cpu_vendor not in supported_cpus + +- name: Register IOMMU DMARs + find: paths=/sys/class/iommu file_type=directory patterns="*" + register: iommus + +# We need to +# - Check to see if IOMMU is already enabled. If not, let's check grub (and break out iommu/pt) +# - Check for SRIOV compatibility (via NIC) +# - Update grub and reboot if necessary. Wait for reboot. + +- name: Check GRUB defaults and enable IOMMU if necessary + lineinfile: + path: /etc/default/grub + backrefs: true + regexp: '^GRUB_CMDLINE_LINUX="((?!.*{{ iommu_kernel_cmds }}).*)"$' + line: 'GRUB_CMDLINE_LINUX="\1 {{ iommu_kernel_cmds }}"' + backup: yes + register: grub + +- name: Update GRUB config + command: update-grub + +- name: Fail if IOMMU is not enabled + fail: + msg: | + IOMMU is not currently enabled in the kernel but has been configured. + Please reboot the host and rerun Express. + Refer to https://platform9.com/knowledge/KB12345 + when: iommus.examined < 1 diff --git a/roles/pre-flight-checks-openstack/vars/main.yml b/roles/pre-flight-checks-openstack/vars/main.yml new file mode 100644 index 00000000..1d93b9c4 --- /dev/null +++ b/roles/pre-flight-checks-openstack/vars/main.yml @@ -0,0 +1,7 @@ +--- +# SR-IOV support requires Intel or AMD CPUs +cpu_vendor: "{{ ansible_facts['processor'][1] | lower | regex_replace('(authentic)|(genuine)', '') }}" +iommu_kernel_cmds: '{{ cpu_vendor }}_iommu=on iommu=pt' +supported_cpus: + - intel + - amd From d629209ba2b55c1f4af09feb36b8d18a6d3db224 Mon Sep 17 00:00:00 2001 From: James Denton Date: Thu, 18 Jul 2019 17:29:54 +0000 Subject: [PATCH 05/15] Updated documentation --- README.md | 141 +++++++++++++------ docs/SRIOV.md | 236 ++++++++++++++++++++++++++++++++ host_vars/compute01.yml.example | 3 + host_vars/compute02.yml.example | 4 + 4 files changed, 345 insertions(+), 39 deletions(-) create mode 100644 docs/SRIOV.md create mode 100644 host_vars/compute01.yml.example create mode 100644 host_vars/compute02.yml.example diff --git a/README.md b/README.md index 43bb599a..712dee46 100644 --- a/README.md +++ b/README.md @@ -1,68 +1,104 @@ # Platform9 Express -Platform9 Express (pf9-express) is a Customer Success developed tool for bringing hosts under management by a Platform9 management plane. It can bring a host to the point where it shows up in the Clarity UI as a host waiting to be authorized, or it can (optionally) perform Platform9 role deployments for both OpenStack and Kubernetes. Platform9 Express includes a CLI and can be installed on a CentOS or Ubuntu control host. + +Platform9 Express (**pf9-express**) is a Customer Success developed tool for bringing hosts under management by a Platform9 management plane. It can bring a host to the point where it shows up in the Clarity UI as a host waiting to be authorized, or it can (optionally) perform Platform9 role deployments for both OpenStack and Kubernetes. Platform9 Express includes a CLI and can be installed on a CentOS or Ubuntu control host. + +### Table of Contents + +- [Prerequisites](#prerequisites) +- [Installing Express on Control Host](#installation) +- [Configuring Access to the Management Plane](#configure-access-to-the-management-plane-cli-only) +- [Install Prerequisite Packages on Control Host](#install-prerequisite-packages) +- [Configuring the Inventory](#configuring-the-inventory-cli-only) +- [CSV Import](#csv-import) +- [Running Platform9 Express](#running-platform9-express) + +#### Advanced Topics + +- [Overriding Variables](#overriding-inventory-variables) +- [Using SR-IOV](docs/SRIOV.md) ## Prerequisites + Platform9 Express must be installed on a control host with IP connectivity to the hosts to be brought under management. CentOS 7.4+, Ubuntu 16.04, or Ubuntu 18.04 are supported on the control host. Before installing Platform9 Express, you'll need administrator credentials for the Platform9 management plane. If a proxy is required for HTTP/HTTPS traffic, you'll need the URL for the proxy. +> There are strict requirements for hosts whose software is deployed by Platform9 Express. Please refer to your Customer Success team for further details. + ## Installation + Perform the following steps to install Platform9 Express: -1. Login as root (or a user with sudo access) on the host that you plan to install Platform9 Express on. +1. Login as **root** (or a user with sudo access) on the host that you plan to install Platform9 Express on. + +2. Install **git** -2. Install git ``` yum install git # CentOS apt update && apt install git # Ubuntu ``` -3. Clone the Platform9 Express repository. +3. Clone the Platform9 Express repository. ``` git clone https://github.com/platform9/express.git /opt/pf9-express ``` -NOTE: In this example, the installation directory is /opt/pf9-express, but any directory can be used. + +> In this example, the installation directory is **/opt/pf9-express**, but any directory can be used. ## Configure Access to the Management Plane (CLI Only) -To configure the Platform9 Express CLI to communicate with the Platform9 management plane, run the following command (a sample session is included): + +To configure the Platform9 Express CLI to communicate with the Platform9 management plane, run the following command: + +``` +./pf9-express -s +``` + +Example: ``` # ./pf9-express -s NOTE: to enter a NULL value for prompt, enter '-' - + PF9 Management Plane URL [https://company.platform9.net]: --> accepted: https://company.platform9.net - + Admin Username [user@company.com]: --> accepted: user@company.com - + Admin Password [********]: --> accepted: ******** - + Region [Sunnyvale]: --> accepted: Sunnyvale - + Tenant [service]: --> accepted: service - + Manage Hostname [true false] [false]: --> accepted: false - + Manage DNS Resolver [true false] [false]: --> accepted: false - + DNS Resolver 1 [8.8.8.8]: --> accepted: 8.8.8.8 - + DNS Resolver 2 [8.8.4.4]: --> accepted: 8.8.4.4 - + Proxy URL: --> accepted: - ``` ## Install Prerequisite Packages -To install prerequisite packages on the Platform9 Express control host, run the following command (a sample session is included): + +To install prerequisite packages on the Platform9 Express control host, run the following command: + +``` +./pf9-express -i +``` + +Example: ``` # ./pf9-express -i @@ -70,13 +106,20 @@ To install prerequisite packages on the Platform9 Express control host, run the --> Validating package dependencies: epel-release ntp nginx gcc python-devel python2-pip bc shade docker-py ansible ``` -## Configuration Inventory (CLI Only) -Platform9 Express uses Ansible to execute commands on the hosts to be taken under management. In order to configure Ansible to run remote commands on the managed hosts, the Ansible Inventory file must be configured. This file is located in /opt/pf9-express/inventory/hosts. +## Configuring the Inventory (CLI Only) + +Platform9 Express uses Ansible to execute commands on the hosts to be taken under management. In order to configure Ansible to run remote commands on the managed hosts, the Ansible Inventory file must be configured. This file is located in **/opt/pf9-express/inventory/hosts**. -NOTE: A sample template is installed in the previous command ("./pf9-express -s"). A breakdown of the Inventory File is below: +> Platform9 Express supports Ansible's `group_vars` and `host_vars` methods of defining variables. + +A sample template is installed in the setup command (**./pf9-express -s**). A breakdown of the inventory file is below: ## Sample Inventory File Part 1 - Authentication Portion -This is where you enter the credentials for your control host to log into the target VM hosts to be managed by the Platform9 management plane (through either a password or SSH key, comment out any password lines if using SSH authentication and vice versa as needed) + +This is where you enter the credentials for your control host to log into the target hosts to be managed by the Platform9 management plane. + +> When using password authentication, comment out `ansible_ssh_private_key_file`. When using a private key, comment out `ansible_sudo_pass`. + ``` ## ## Ansible Inventory @@ -90,7 +133,9 @@ ansible_ssh_pass=winterwonderland ``` ## Sample Inventory File Part 2 - Network Portion -This is where you can configure optional network settings to create a bond with single or multiple interfaces. + +This is where you can configure optional network settings to create a bond with single or multiple interfaces. + ``` ################################################################################################ ## Optional Settings @@ -112,7 +157,9 @@ cv01 bond_members='["eth1","eth2"]' bond_sub_interfaces='[{"vlanid":"100","ip":" ``` ## Sample Inventory File Part 3 - OpenStack Portion + You can configure the OpenStack hosts and their pertinent roles (Hypervisor, Image Host, Storage Host, DNS Host) + ``` ################################################################################################ ## OpenStack Groups @@ -131,7 +178,7 @@ cinder hv01 ansible_host=10.0.0.11 vm_console_ip=10.0.0.11 ha_cluster_ip=10.0.1.11 tunnel_ip=10.0.2.11 dhcp=on snat=on hv02 ansible_host=10.0.0.12 vm_console_ip=10.0.0.12 tunnel_ip=10.0.2.12 dhcp=on snat=on hv03 ansible_host=10.0.0.13 vm_console_ip=10.0.0.13 tunnel_ip=10.0.2.13 -hv04 ansible_host=10.0.0.14 +hv04 ansible_host=10.0.0.14 ## global variables defined in group_vars/glance.yml ## note: if the following variables are not defined, the value of ansible_host will be inherited @@ -153,7 +200,9 @@ hv02 cinder_ip=10.0.4.14 pvs=["/dev/sdb","/dev/sdc","/dev/sdd","/dev/sde"] ``` ## Sample Inventory File Part 4 - Kubernetes Portion -This is where you can configure your Kubernetes cluster members under their own roles (either master or worker). For a worker, you can optionally add it into a running cluster using the "cluster_uuid" variable. For any new workers, you can omit this variable assignment. + +This is where you can configure your Kubernetes cluster members under their own roles (either master or worker). For a worker, you can optionally add it into a running cluster using the **cluster_uuid** variable. For any new workers, you can omit this variable assignment. + ``` ################################################################################################ ## Kubernetes Groups @@ -176,9 +225,11 @@ cv05 ansible_host=10.0.0.19 cluster_uuid=7273706d-afd5-44ea-8fbf-901ceb6bef27 ``` ## CSV Import -Instead of manually configuring the inventory file, you can use the '-f ' option to auto-configure it from a CSV definition file. + +Instead of manually configuring the inventory file, you can use the **-f ** option to auto-configure it from a CSV definition file. Here's a sample CSV definition file: + ``` hostname,username,key,ip,dhcp,snat,glance,glance-public,nic1,nic2,mgmtvlan,mgmtip,mgmtnetmask,Storagevlan,storageip,storagenetmask,tunnelvlan,tunnelip,tunnelnetmask fake01,centos,~/.ssh/id_rsa,172.16.7.182,TRUE,TRUE,TRUE,TRUE,ens160,,243,172.16.243.11,255.255.255.0,244,172.16.244.11,255.255.255.0,245,172.16.245.11,255.255.255.0 @@ -186,18 +237,22 @@ fake02,ubuntu,~/.ssh/id_rsa,172.16.7.47,TRUE,FALSE,FALSE,FALSE,ens192,,243,172.1 ``` ## Controlling UID/GID for the Platform9 Host Agent -If you want to control the UID and GID values for the Platform9 service account (pf9/pf9group), set the following inventory variables: + +If you want to control the UID and GID values for the Platform9 service account (pf9:pf9group), set the following inventory variables: + * pf9_uid * pf9_gid If these variables are not defined, the Host Agent Installer will allow the system to auto-assign the UID and GID. -NOTE: This feature is not idempotent. If the 'pf9' user had not been created yet, Platform9 Express will create the 'pf9' user and 'pf9group' group based on the values of pf9_uid and pf9_gid. If the 'pf9' user already exists, Platform9 Express will skip the user/group management section; it will not attempt to alter the UID/GID settings. +> This feature is not idempotent. If the **pf9** user had not been created yet, Platform9 Express will create the **pf9** user and **pf9group** group based on the values of **pf9_uid** and **pf9_gid**. If the **pf9** user already exists, Platform9 Express will skip the user/group management section; it will not attempt to alter the UID/GID settings. ## Running Platform9 Express -The basic syntax for starting Platform9 Express includes a target (host group, individual host, comma-delimited list of hosts, or "all" to run all groups) and an optional flag ('-a') that instructs it to perform role deployment. + +The basic syntax for starting Platform9 Express includes a target (host group, individual host, comma-delimited list of hosts, or "all" to run all groups) and an optional flag (**-a**) that instructs it to perform role deployment. Here's an example of invoking Platform9 Express against a number of hosts without registering them automatically to the management plane: + ``` # ./pf9-express hv01,hv02,hv03 ################################################################ @@ -207,13 +262,15 @@ Here's an example of invoking Platform9 Express against a number of hosts withou --> Validating package dependencies: epel-release ntp nginx gcc python-devel python2-pip bc shade docker-py ansible setupd --> Updating setupd libraries: pf9_master_setup.py pf9_utils.py pf9_mgmt_setup.py attach-node add-cluster --> ansible_version = 2.5 - + [Executing: ansible-playbook ./pf9-express.yml] . . . ``` -Here's an example of invoking Platform9 Express against a single host group (host groups are either "pmo" for OpenStack and "pmk" for Kubernetes), performing role deployments (based on metadata defined in /opt/pf9-express/inventory/hosts), and registering them automatically to the management plane + +Here's an example of invoking Platform9 Express against a single host group (host groups are either "pmo" for OpenStack and "pmk" for Kubernetes), performing role deployments (based on metadata defined in **/opt/pf9-express/inventory/hosts**), and registering them automatically to the management plane + ``` # ./pf9-express -a pmk ################################################################ @@ -223,13 +280,15 @@ Here's an example of invoking Platform9 Express against a single host group (hos --> Validating package dependencies: epel-release ntp nginx gcc python-devel python2-pip bc shade docker-py ansible setupd --> Updating setupd libraries: pf9_master_setup.py pf9_utils.py pf9_mgmt_setup.py attach-node add-cluster --> ansible_version = 2.5 - + [Executing: ansible-playbook ./pf9-express.yml] . . . ``` -Here's an example of invoking Platform9 Express against all host groups and performing role deployments (based on metadata defined in /opt/pf9-express/inventory/hosts): + +Here's an example of invoking Platform9 Express against all host groups and performing role deployments (based on metadata defined in **/opt/pf9-express/inventory/hosts**): + ``` # ./pf9-express -a all ################################################################ @@ -239,19 +298,21 @@ Here's an example of invoking Platform9 Express against all host groups and perf --> Validating package dependencies: epel-release ntp nginx gcc python-devel python2-pip bc shade docker-py ansible setupd --> Updating setupd libraries: pf9_master_setup.py pf9_utils.py pf9_mgmt_setup.py attach-node add-cluster --> ansible_version = 2.5 - + [Executing: ansible-playbook ./pf9-express.yml] . . . ``` + Here's the usage statement showing all command-line options: + ``` # ./pf9-express Usage: ./pf9-express [Args] - + Args (Optional): - + -a|--autoRegister : auto-register host with management plane -i|--installPrereqs : install pre-requisites and exit -s|--setup : run setup and exit @@ -264,20 +325,22 @@ Args (Optional): -h|--help : display this message ``` -## Managing Multiple Cloud Management Regions (DUs) -If you have more than one Platform9 region to manage, you can create a configuration file for each one (using pf9-express.conf as a template) and start pf9-express with the '-c' flag: + +If you have more than one Platform9 region to manage, you can create a configuration file for each one (using pf9-express.conf as a template) and start **pf9-express** with the **-c** flag: ``` ./pf9-express -c ~/pf9-site1.conf -a hv01 ``` ## Overriding Inventory Variables -If you want to override an Ansible variable defined in Inventory or dynamically within playbooks, you can invoke pf9-express with the '-e' flag: + +If you want to override an Ansible variable defined in Inventory or dynamically within playbooks, you can invoke **pf9-express** with the **-e** flag: ``` ./pf9-express -c ~/pf9-express.conf -a -e "proxy_url=https://proxy1.platform9.net" hv01 ``` -NOTE: Variables passed as extra-vars have the highest precedence. + +> Variables passed as extra-vars have the highest precedence. ## License diff --git a/docs/SRIOV.md b/docs/SRIOV.md new file mode 100644 index 00000000..15f69715 --- /dev/null +++ b/docs/SRIOV.md @@ -0,0 +1,236 @@ +# Using SR-IOV with Platform9 Express + +In PMO version 3.11.x, support for SR-IOV has been introduced. SR-IOV provides increased network performance including higher throughput, lower latency, and lower jitter when compared to virtual switching technologies such as Open vSwitch. + +SR-IOV is supported by multiple network interface cards (NICs) provided by many networking vendors, including Intel, Cisco, Mellanox, Broadcom, QLogic, and others. + +The following NICs have been tested with the Platform9 PMO 3.11 release: + +* Mellanox ConnectX-4 Lx EN +* Mellanox ConnectX-5 EN +* Intel X520 +* Intel X540-T2 +* Broadcom NetXtreme II (BCM57810 / HP 533FLR-T) + +## Limitations + +The following are a few of the limitations of SR-IOV: + +* Bonded NICs at the host-level are not recommended/not supported for use with SR-IOV. While active/passive bonding may work in this configuration, LACP/802.3ad is definitely not a supported configuration. +* Virtual Functions are automatically assigned to Neutron ports and are not customizable. +* Instance-level NIC bonding using Virtual Functions is not supported. +* Port security/security groups are not supported. +* VLAN networks are required. Flat (untagged) and overlay networks are not supported. + +## System Prerequisites + +SR-IOV requires the following: + +* BIOS Support (configuration varies by vendor) +* Kernel IOMMU Support +* Kernel IOMMU Passthrough support +* Compatible Network Interface Card (NIC) + +> When SR-IOV capable NICs are used in conjunction with Open vSwitch bridges, you have the option of using an existing provider label, such as **external**, or using a dedicated provider. When sharing a provider network between SR-IOV and non-SR-IOV ports, communication between the ports on the same network is permitted. Using a dedicated provider will require you to call out a second bridge mapping, such as `sriov:br-sriov`, to allow DHCP ports connected to a vSwitch to communicate with the SR-IOV ports. + +### Kernel IOMMU Support + +Using **dmesg**, you can verify if IOMMU is enabled with the following command: + +``` +# dmesg | grep IOMMU +``` + +If you do not see the message ```DMAR: IOMMU enabled```, then proceed with the following steps: + +First, enable IOMMU support in the kernel by modifying the GRUB configuration at **/etc/default/grub**: + +``` +GRUB_CMDLINE_LINUX="... intel_iommu=on" #Intel-based Systems + +GRUB_CMDLINE_LINUX="... amd_iommu=on" #AMD-based Systems +``` + +Next, update GRUB: + +``` +update-grub +``` + +> Once the kernel configuration has been modified, you must reboot for the changes to take effect. + +### IOMMU Passthrough Support + +To enable IOMMU passthrough support in the kernel, please complete the following steps: + +First, enable passthrough support in the kernel by modifying the GRUB configuration at **/etc/default/grub**: + +``` +GRUB_CMDLINE_LINUX="... iommu=pt" +``` + +Then, update GRUB: + +``` +update-grub +``` + +> Once the kernel configuration has been modified, you must reboot for the changes to take effect. + +## Deploying PMO with SR-IOV support using Express + +Using the Platform9 Express tool, operators can deploy PMO with support for SR-IOV. The Express tool will perform many of the tasks outlined in the previous sections, including enabling IOMMU and passthrough support in the kernel, as well as implementing a unit file for persisting VFs across reboots. + +> Given the complexity involved in supporting Mellanox NICs, the Express tool will initially only support Intel NICs using the ixgbe driver. This includes the Intel X520, X540, and X550 families. + +The necessary configuration details can be implemented globally using **group_vars**, or on an individual host basis using **host_vars**. Each method is described below. + +### Host Variables + +Compute node-specific configurations can be implemented using what is known as host_vars. Configurations that may vary between hosts include: + +* Network interface name +* Quantity of network interfaces used for SRIOV +* Provider network mappings +* +Using **host_vars**, the following are some variables that can be modified: + +* physical_device_mappings (required) +* neutron_ovs_bridge_mappings (optional) + +In this example, two hosts have different NICs installed that report different names to the operating system. + +``` +root@compute01:~# ip link show +... +6: ens1: mtu 1500 qdisc mq portid 0002c90300ffe511 state UP mode DEFAULT group default qlen 1000 + link/ether 00:02:c9:ff:e5:10 brd ff:ff:ff:ff:ff:ff +7: ens1d1: mtu 1500 qdisc mq portid 0002c90300ffe512 state UP mode DEFAULT group default qlen 1000 + link/ether 00:02:c9:ff:e5:11 brd ff:ff:ff:ff:ff:ff +``` + +``` +root@compute02:~# ip link show +... +3: ens1f0: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 + link/ether 90:e2:ba:a2:1b:88 brd ff:ff:ff:ff:ff:ff +5: ens1f1: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 + link/ether 90:e2:ba:a2:1b:89 brd ff:ff:ff:ff:ff:ff +``` + +NIC naming can vary based on the kernel version, NIC driver, and the PCI slot where the card is installed. In this example, the NIC installed in each host is from a different manufacturer and uses a different driver: + +``` +root@compute01:~# ethtool -i ens1 +driver: mlx4_en +version: 4.0-0 +firmware-version: 2.42.5000 +expansion-rom-version: +bus-info: 0000:08:00.0 +supports-statistics: yes +supports-test: yes +supports-eeprom-access: no +supports-register-dump: no +supports-priv-flags: yes +``` + +``` +root@compute02:~# ethtool -i ens1f0 +driver: ixgbe +version: 5.1.0-k +firmware-version: 0x61bd0001 +expansion-rom-version: +bus-info: 0000:08:00.0 +supports-statistics: yes +supports-test: yes +supports-eeprom-access: yes +supports-register-dump: yes +supports-priv-flags: yes +``` + +The **host_vars** for each host can be implemented in a file that corresponds to the host's short name located at **/opt/pf9-express/host_vars/.yml**. In the following example, **compute01** uses a single network interface for SR-IOV, while **compute02** uses two. SR-IOV networks will leverage a new provider label named **sriov**, as shown here: + +``` +--- +# compute01.yml +physical_device_mappings: + - sriov:ens1 +``` + +``` +--- +# compute02.yml +physical_device_mappings: + - sriov:ens1f0 + - sriov:ens1f1 +``` + +> SR-IOV supports VLAN networks only. Flat and overlay networks are not supported. + +### Group Variables + +Group-wide configurations can be implemented using what is known as **group_vars**. Configurations that may be consistent between groups include: + +* Network interface name +* Quantity of network interfaces used for SRIOV +* Provider network mappings + +Using **group_vars**, the following are some variables that can be modified: + +* neutron_ovs_bridge_mappings +* physical_device_mappings + +The **group_vars** for the **hypervisors** group can be implemented in a file that corresponds to the group's name located at **/opt/pf9-express/group_vars/.yml**. In the following example, every host in the **hypervisors** group has the same NIC installed in the same slot, so the naming convention is consistent across all hosts. A second provider bridge mapping has been established that will allow non-SR-IOV capable ports, such as DHCP, to connect to a vSwitch and communicate with SR-IOV ports: + +``` +--- +# hypervisors.yml +... +neutron_ovs_bridge_mappings: "external:br-pf9, sriov:br-sriov" +physical_device_mappings: + - sriov:ens1f0 + - sriov:ens1f1 +... +``` + +> Host vars take precedence over group vars. If a small number of hosts vary from the greater group, feel free to implement the respective **host_vars** files accordingly. + +### Inventory File + +To enable support for SR-IOV on a host, the inventory must be modified according so that SR-IOV related tasks are executed. One method of enabling support for a host is to add the **sriov=on** variable to an individual host in the **hypervisors** group, as shown here: + +``` +[hypervisors] +compute01 ansible_host=10.50.0.197 vm_console_ip=10.50.0.197 ha_cluster_ip=10.50.0.197 tunnel_ip=10.50.0.197 dhcp=on snat=on sriov=on +compute02 ansible_host=10.50.0.196 vm_console_ip=10.50.0.196 tunnel_ip=10.50.0.196 dhcp=on snat=on sriov=on +``` + +SR-IOV can be enabled group-wide by modifying the respective **group_vars** file, as shown here: + +``` +--- +# hypervisors.yml +... +#################### +# SRIOV +#################### +sriov: "on" +... +``` + +Lastly, SR-IOV can be enabled via the respective **host_vars** file, as shown here: + +``` +--- +# compute01.yml### +sriov: "on" +physical_device_mappings: + - sriov:ens1 +``` + +### Installation +Once the respective configuration is in place, install PMO with Express using some variation of the following: + +``` +# ./pf9-express -a pmo +``` diff --git a/host_vars/compute01.yml.example b/host_vars/compute01.yml.example new file mode 100644 index 00000000..7c7e3246 --- /dev/null +++ b/host_vars/compute01.yml.example @@ -0,0 +1,3 @@ +--- +physical_device_mappings: + - sriov:ens1 diff --git a/host_vars/compute02.yml.example b/host_vars/compute02.yml.example new file mode 100644 index 00000000..a3c00dbd --- /dev/null +++ b/host_vars/compute02.yml.example @@ -0,0 +1,4 @@ +--- +physical_device_mappings: + - sriov:ens1f0 + - sriov:ens1f1 From 1fdb1922e4f6d2b30bc92fd95f0d51283f6a7ea0 Mon Sep 17 00:00:00 2001 From: James Denton Date: Thu, 25 Jul 2019 17:54:47 +0000 Subject: [PATCH 06/15] Added logic to handle VF creation and persistence. Added additional documentation --- docs/SRIOV.md | 34 +++++++++++-- group_vars/hypervisors.yml | 4 +- host_vars/compute01.yml.example | 2 + host_vars/compute02.yml.example | 3 ++ pf9-express | 2 +- pf9-express.yml | 13 +++++ roles/neutron-prerequisites/tasks/main.yml | 5 +- roles/neutron-sriov/handlers/main.yml | 18 +++++++ roles/neutron-sriov/tasks/main.yml | 50 +++++++++++++++++++ .../tasks/prerequisites-sriov.yml | 29 ++++++++--- .../pre-flight-checks-openstack/vars/main.yml | 3 ++ 11 files changed, 147 insertions(+), 16 deletions(-) create mode 100644 roles/neutron-sriov/handlers/main.yml create mode 100644 roles/neutron-sriov/tasks/main.yml diff --git a/docs/SRIOV.md b/docs/SRIOV.md index 15f69715..81f76951 100644 --- a/docs/SRIOV.md +++ b/docs/SRIOV.md @@ -4,7 +4,7 @@ In PMO version 3.11.x, support for SR-IOV has been introduced. SR-IOV provides i SR-IOV is supported by multiple network interface cards (NICs) provided by many networking vendors, including Intel, Cisco, Mellanox, Broadcom, QLogic, and others. -The following NICs have been tested with the Platform9 PMO 3.11 release: +The following NICs have been tested with the Platform9 PMO 3.11.3 release: * Mellanox ConnectX-4 Lx EN * Mellanox ConnectX-5 EN @@ -12,6 +12,13 @@ The following NICs have been tested with the Platform9 PMO 3.11 release: * Intel X540-T2 * Broadcom NetXtreme II (BCM57810 / HP 533FLR-T) +The following drivers are considered supported: + +* ixgbe +* bnx2x + +> Mellanox cards require additional configuration that is outside the scope of this guide and Platform9 Express. + ## Limitations The following are a few of the limitations of SR-IOV: @@ -92,10 +99,12 @@ Compute node-specific configurations can be implemented using what is known as h * Network interface name * Quantity of network interfaces used for SRIOV * Provider network mappings -* +* Number of VFs per interface + Using **host_vars**, the following are some variables that can be modified: * physical_device_mappings (required) +* sriov_numvfs (required) * neutron_ovs_bridge_mappings (optional) In this example, two hosts have different NICs installed that report different names to the operating system. @@ -148,13 +157,15 @@ supports-register-dump: yes supports-priv-flags: yes ``` -The **host_vars** for each host can be implemented in a file that corresponds to the host's short name located at **/opt/pf9-express/host_vars/.yml**. In the following example, **compute01** uses a single network interface for SR-IOV, while **compute02** uses two. SR-IOV networks will leverage a new provider label named **sriov**, as shown here: +The **host_vars** for each host can be implemented in a file that corresponds to the host's short name located at **/opt/pf9-express/host_vars/.yml**. In the following example, **compute01** uses a single network interface for SR-IOV, while **compute02** uses two. SR-IOV networks will leverage a new provider label named **sriov** and 8 VFs per interface, as shown here: ``` --- # compute01.yml physical_device_mappings: - sriov:ens1 +sriov_numvfs: + - ens1:8 ``` ``` @@ -163,6 +174,9 @@ physical_device_mappings: physical_device_mappings: - sriov:ens1f0 - sriov:ens1f1 +sriov_numvfs: + - ens1f0:8 + - ens1f1:8 ``` > SR-IOV supports VLAN networks only. Flat and overlay networks are not supported. @@ -172,12 +186,13 @@ physical_device_mappings: Group-wide configurations can be implemented using what is known as **group_vars**. Configurations that may be consistent between groups include: * Network interface name -* Quantity of network interfaces used for SRIOV +* Number of VFs per interface * Provider network mappings Using **group_vars**, the following are some variables that can be modified: * neutron_ovs_bridge_mappings +* sriov_numvfs * physical_device_mappings The **group_vars** for the **hypervisors** group can be implemented in a file that corresponds to the group's name located at **/opt/pf9-express/group_vars/.yml**. In the following example, every host in the **hypervisors** group has the same NIC installed in the same slot, so the naming convention is consistent across all hosts. A second provider bridge mapping has been established that will allow non-SR-IOV capable ports, such as DHCP, to connect to a vSwitch and communicate with SR-IOV ports: @@ -190,6 +205,9 @@ neutron_ovs_bridge_mappings: "external:br-pf9, sriov:br-sriov" physical_device_mappings: - sriov:ens1f0 - sriov:ens1f1 +sriov_numvfs: + - ens1f0:8 + - ens1f1:8 ... ``` @@ -222,7 +240,7 @@ Lastly, SR-IOV can be enabled via the respective **host_vars** file, as shown he ``` --- -# compute01.yml### +# compute01.yml sriov: "on" physical_device_mappings: - sriov:ens1 @@ -234,3 +252,9 @@ Once the respective configuration is in place, install PMO with Express using so ``` # ./pf9-express -a pmo ``` + +To refresh VFs, run **pf9-express** with the **refresh-sriov** tag: + +``` +# ./pf9-express -t refresh-sriov hypervisors +``` diff --git a/group_vars/hypervisors.yml b/group_vars/hypervisors.yml index f4f0ed0c..bccbeac4 100644 --- a/group_vars/hypervisors.yml +++ b/group_vars/hypervisors.yml @@ -11,8 +11,8 @@ glance: "off" multipath: False nova_instances_path: /opt/pf9/data/instances/ neutron_ovs_allow_dhcp_vms: "False" -neutron_ovs_bridge_name: "br-pf9, br-sriov" -neutron_ovs_bridge_mappings: "external:br-pf9, sriov:br-sriov" +#neutron_ovs_bridge_name: "br-pf9,br-sriov" +neutron_ovs_bridge_mappings: "external:br-pf9" ceilometer_customize: False ceilometer_cpu_interval: 600 diff --git a/host_vars/compute01.yml.example b/host_vars/compute01.yml.example index 7c7e3246..d82d6132 100644 --- a/host_vars/compute01.yml.example +++ b/host_vars/compute01.yml.example @@ -1,3 +1,5 @@ --- physical_device_mappings: - sriov:ens1 +sriov_numvfs: + - ens1:8 diff --git a/host_vars/compute02.yml.example b/host_vars/compute02.yml.example index a3c00dbd..ffc867ee 100644 --- a/host_vars/compute02.yml.example +++ b/host_vars/compute02.yml.example @@ -2,3 +2,6 @@ physical_device_mappings: - sriov:ens1f0 - sriov:ens1f1 +sriov_numvfs: + - ens1f0:8 + - ens1f1:8 diff --git a/pf9-express b/pf9-express index 6cdaaf70..fb32388d 100755 --- a/pf9-express +++ b/pf9-express @@ -514,7 +514,7 @@ while [ $# -gt 0 ]; do tags=${2} for tag in $(echo ${tags} | sed -e 's/,/ /g'); do case ${tag} in - live-migration|image-import) + live-migration|image-import|refresh-sriov) ;; *) assert "invalid tag : '${tag}'" diff --git a/pf9-express.yml b/pf9-express.yml index eec58e03..b0756358 100644 --- a/pf9-express.yml +++ b/pf9-express.yml @@ -175,3 +175,16 @@ become: true roles: - post-hook + +# Run SR-IOV role +- hosts: + - hypervisors + become: true + tasks: + - import_role: + name: neutron-sriov + when: + - sriov == "on" + - ansible_virtualization_role == "host" + tags: + - refresh-sriov diff --git a/roles/neutron-prerequisites/tasks/main.yml b/roles/neutron-prerequisites/tasks/main.yml index 457f7256..04379cbd 100644 --- a/roles/neutron-prerequisites/tasks/main.yml +++ b/roles/neutron-prerequisites/tasks/main.yml @@ -19,8 +19,9 @@ - name: Create required OVS bridges openvswitch_bridge: - bridge: "{{ item }}" + bridge: "{{ item.split(':')[1] }}" fail_mode: secure state: present - with_items: "{{ neutron_ovs_bridge_name.split(',') }}" + with_items: "{{ neutron_ovs_bridge_mappings.split(',') }}" +# with_items: "{{ neutron_ovs_bridge_name.split(',') }}" diff --git a/roles/neutron-sriov/handlers/main.yml b/roles/neutron-sriov/handlers/main.yml new file mode 100644 index 00000000..3a9cee75 --- /dev/null +++ b/roles/neutron-sriov/handlers/main.yml @@ -0,0 +1,18 @@ +--- +- name: Restart sysfsutils + systemd: + name: sysfsutils.service + state: restarted + listen: restart_sysfsutils + +- name: Restart pf9-ostackhost + systemd: + name: pf9-ostackhost.service + state: restarted + listen: restart_ostackhost + +- name: Restart pf9-sriov-agent + systemd: + name: pf9-neutron-sriov-agent.service + state: restarted + listen: restart_neutronsriovagent diff --git a/roles/neutron-sriov/tasks/main.yml b/roles/neutron-sriov/tasks/main.yml new file mode 100644 index 00000000..7582e7aa --- /dev/null +++ b/roles/neutron-sriov/tasks/main.yml @@ -0,0 +1,50 @@ +--- +# SR-IOV virtual functions get reset at boot unless commands exist in +# rc.local (deprecated), a systemctl unit file, or sysfs.conf. We set it +# up in sysfs. + +# Get current VF count in running sysfs +- name: Get current VF count for interface + slurp: + path: "/sys/class/net/{{ item.split(':')[0] }}/device/sriov_numvfs" + register: slurp_vfs + with_items: "{{ sriov_numvfs }}" + +- set_fact: + current_vfs: "{{ current_vfs|default({}) | combine({item.item.split(':')[0]:item.content | b64decode | replace('\n', '')}) }}" + with_items: "{{ slurp_vfs.results }}" + +# Remove entries in sysfs when count changed. This WILL break connectivity +# for instances using VFs on the interface until the instance is shutoff +# or hard rebooted! +- name: Remove existing entries for interface from sysfs + lineinfile: + path: /etc/sysfs.conf + state: absent + regexp: "^class\\/net\\/{{ item.split(':')[0] }}\\/device\\/sriov_numvfs = .*" + with_items: "{{ sriov_numvfs }}" + when: current_vfs[item.split(':')[0]] != item.split(':')[1] + +- name: Set VFs to 0 to work around I/O error when count is changed + lineinfile: + path: /etc/sysfs.conf + line: "class/net/{{ item.split(':')[0] }}/device/sriov_numvfs = 0" + create: yes + with_items: "{{ sriov_numvfs }}" + +- name: Add VFs to sysfs.conf + lineinfile: + path: /etc/sysfs.conf + insertafter: "^class\\/net\\/{{ item.split(':')[0] }}\\/device\\/sriov_numvfs = 0" + line: "class/net/{{ item.split(':')[0] }}/device/sriov_numvfs = {{ item.split(':')[1] }}" + create: yes + with_items: "{{ sriov_numvfs }}" + register: sysfs_vfs + notify: + - restart_sysfsutils + - restart_ostackhost + - restart_neutronsriovagent + +- debug: + msg: "ALERT - VFs on {{ ansible_hostname }} changed. Instances using SR-IOV ports must be shutdown or hard rebooted for interfaces to be reconnected." + when: sysfs_vfs.changed diff --git a/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml b/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml index 96af5bd9..a9d2d7a4 100644 --- a/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml +++ b/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml @@ -1,4 +1,8 @@ --- +# There are prerequisites for SR-IOV support, including IOMMU enabled, +# passthrough, and driver support. Many drivers support SR-IOV, but some +# require out-of-kernel drivers (ie. Mellanox) and some don't support sysfs. + - name: Fail on incompatible CPU architecture fail: msg: "Detected {{ cpu_vendor }} CPU not supported! Must be {{ supported_cpus }}." @@ -11,8 +15,6 @@ # We need to # - Check to see if IOMMU is already enabled. If not, let's check grub (and break out iommu/pt) -# - Check for SRIOV compatibility (via NIC) -# - Update grub and reboot if necessary. Wait for reboot. - name: Check GRUB defaults and enable IOMMU if necessary lineinfile: @@ -28,8 +30,23 @@ - name: Fail if IOMMU is not enabled fail: - msg: | - IOMMU is not currently enabled in the kernel but has been configured. - Please reboot the host and rerun Express. - Refer to https://platform9.com/knowledge/KB12345 + msg: + - "IOMMU is not currently enabled in the kernel but has been configured. Please reboot the host and rerun Express. Refer to https://platform9.com/knowledge/KB12345 for assistance." when: iommus.examined < 1 + +# Warn if NIC driver is not supported +- name: Determine driver bound to NICs + find: + paths: "/sys/class/net/{{ item.split(':')[1] }}/device/driver/module/drivers" + file_type: link + register: find_result + with_items: "{{ physical_device_mappings }}" + +- set_fact: + nic_driver: "{{ (item.path | basename).split(':')[1] }}" + with_items: "{{ find_result.results[0].files }}" + +- debug: + msg: + - "The {{ nic_driver }} NIC driver is not currently supported by Platform9. Refer to https://platform9.com/knowledge/KB12345 for assistance." + when: nic_driver not in supported_nic_drivers diff --git a/roles/pre-flight-checks-openstack/vars/main.yml b/roles/pre-flight-checks-openstack/vars/main.yml index 1d93b9c4..3e21d7cf 100644 --- a/roles/pre-flight-checks-openstack/vars/main.yml +++ b/roles/pre-flight-checks-openstack/vars/main.yml @@ -5,3 +5,6 @@ iommu_kernel_cmds: '{{ cpu_vendor }}_iommu=on iommu=pt' supported_cpus: - intel - amd +supported_nic_drivers: + - ixgbe + - bnx2x From bbb8bffc8b9e886908774dd89b3e4572391165fb Mon Sep 17 00:00:00 2001 From: James Denton Date: Thu, 15 Aug 2019 14:29:22 +0000 Subject: [PATCH 07/15] Updated GRUB commands for CentOS/RHEL --- .../tasks/prerequisites-sriov.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml b/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml index a9d2d7a4..395db913 100644 --- a/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml +++ b/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml @@ -25,8 +25,15 @@ backup: yes register: grub -- name: Update GRUB config +- name: Update GRUB config (Ubuntu) command: update-grub + when: + - ansible_distribution == 'Ubuntu' + +- name: Update GRUB config (CentOS) + command: grub2-mkconfig -o /boot/grub2/grub.cfg + when: + - ansible_distribution == 'CentOS' or ansible_distribution == 'Red Hat Enterprise Linux' - name: Fail if IOMMU is not enabled fail: From 80b498cda190f93cc27ea5c78d8f70b913badda7 Mon Sep 17 00:00:00 2001 From: James Denton Date: Fri, 16 Aug 2019 21:15:41 +0000 Subject: [PATCH 08/15] Use systemd rather than sysfs for compatibility with CentOS and Ubuntu --- roles/neutron-sriov/handlers/main.yml | 8 ++-- roles/neutron-sriov/tasks/main.yml | 67 +++++++++++---------------- 2 files changed, 30 insertions(+), 45 deletions(-) diff --git a/roles/neutron-sriov/handlers/main.yml b/roles/neutron-sriov/handlers/main.yml index 3a9cee75..18e889ae 100644 --- a/roles/neutron-sriov/handlers/main.yml +++ b/roles/neutron-sriov/handlers/main.yml @@ -1,9 +1,9 @@ --- -- name: Restart sysfsutils +- name: Reload Platform9 VF Manager systemd: - name: sysfsutils.service - state: restarted - listen: restart_sysfsutils + name: pf9-sriov-vf-manager.service + state: reloaded + listen: restart_vf_manager - name: Restart pf9-ostackhost systemd: diff --git a/roles/neutron-sriov/tasks/main.yml b/roles/neutron-sriov/tasks/main.yml index 7582e7aa..5a5af4cf 100644 --- a/roles/neutron-sriov/tasks/main.yml +++ b/roles/neutron-sriov/tasks/main.yml @@ -1,50 +1,35 @@ --- # SR-IOV virtual functions get reset at boot unless commands exist in -# rc.local (deprecated), a systemctl unit file, or sysfs.conf. We set it -# up in sysfs. +# rc.local (deprecated), a systemctl unit file, or sysfs.conf. We set up +# up a custome systemctl unit/service. -# Get current VF count in running sysfs -- name: Get current VF count for interface - slurp: - path: "/sys/class/net/{{ item.split(':')[0] }}/device/sriov_numvfs" - register: slurp_vfs - with_items: "{{ sriov_numvfs }}" +- name: Drop-in SR-IOV VF Manager Systemd Unit + template: + src: pf9-sriov-vf-manager.service.j2 + dest: /usr/lib/systemd/system/pf9-sriov-vf-manager.service + owner: root + group: root + mode: '0644' -- set_fact: - current_vfs: "{{ current_vfs|default({}) | combine({item.item.split(':')[0]:item.content | b64decode | replace('\n', '')}) }}" - with_items: "{{ slurp_vfs.results }}" +- name: Force systemd to reread config + systemd: + daemon_reload: yes -# Remove entries in sysfs when count changed. This WILL break connectivity -# for instances using VFs on the interface until the instance is shutoff -# or hard rebooted! -- name: Remove existing entries for interface from sysfs - lineinfile: - path: /etc/sysfs.conf - state: absent - regexp: "^class\\/net\\/{{ item.split(':')[0] }}\\/device\\/sriov_numvfs = .*" - with_items: "{{ sriov_numvfs }}" - when: current_vfs[item.split(':')[0]] != item.split(':')[1] +- name: Enable SR-IOV VF Manager Systemd Service + systemd: + enabled: yes + name: pf9-sriov-vf-manager.service -- name: Set VFs to 0 to work around I/O error when count is changed - lineinfile: - path: /etc/sysfs.conf - line: "class/net/{{ item.split(':')[0] }}/device/sriov_numvfs = 0" - create: yes - with_items: "{{ sriov_numvfs }}" - -- name: Add VFs to sysfs.conf - lineinfile: - path: /etc/sysfs.conf - insertafter: "^class\\/net\\/{{ item.split(':')[0] }}\\/device\\/sriov_numvfs = 0" - line: "class/net/{{ item.split(':')[0] }}/device/sriov_numvfs = {{ item.split(':')[1] }}" - create: yes - with_items: "{{ sriov_numvfs }}" - register: sysfs_vfs - notify: - - restart_sysfsutils +- name: Update SR-IOV VF Manager Script + template: + src: pf9-virtual-functions.sh.j2 + dest: /opt/pf9/pf9-virtual-functions.sh + owner: root + group: root + mode: '0755' + notify: + - restart_vf_manager - restart_ostackhost - restart_neutronsriovagent -- debug: - msg: "ALERT - VFs on {{ ansible_hostname }} changed. Instances using SR-IOV ports must be shutdown or hard rebooted for interfaces to be reconnected." - when: sysfs_vfs.changed + From 75cd3f165258d20c683593e7eccbc8e5746c637a Mon Sep 17 00:00:00 2001 From: James Denton Date: Fri, 16 Aug 2019 21:23:42 +0000 Subject: [PATCH 09/15] Removed errant debug --- roles/map-role/tasks/main.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/roles/map-role/tasks/main.yml b/roles/map-role/tasks/main.yml index ffb0a179..cc07206a 100644 --- a/roles/map-role/tasks/main.yml +++ b/roles/map-role/tasks/main.yml @@ -132,15 +132,6 @@ shell: "cat /tmp/keystone-token.txt" register: api_token -# JD DEBUG -- name: print out role json for debugging - debug: - msg: "{{ role_json }}" - -#- name: fail! -# fail: -# when: 1==1 - - name: "Assigning Role - {{rolename}}" uri: url: "https://{{ctrl_ip}}/resmgr/v1/hosts/{{host_id.stdout.strip()}}/roles/{{rolename}}" From f3d39f01bcb425d0151bc569250dde903177b487 Mon Sep 17 00:00:00 2001 From: James Denton Date: Tue, 20 Aug 2019 19:51:45 +0000 Subject: [PATCH 10/15] Changes per review --- README.md | 2 +- docs/SRIOV.md | 32 ++++++++++--------- group_vars/hypervisors.yml | 10 ++++-- ...compute01.yml.example => hv01.yml.example} | 0 ...compute02.yml.example => hv02.yml.example} | 0 .../templates/pf9-ostackhost-neutron.j2 | 4 +-- roles/neutron-prerequisites/tasks/main.yml | 1 - .../tasks/prerequisites-sriov.yml | 6 ++-- 8 files changed, 31 insertions(+), 24 deletions(-) rename host_vars/{compute01.yml.example => hv01.yml.example} (100%) rename host_vars/{compute02.yml.example => hv02.yml.example} (100%) diff --git a/README.md b/README.md index 712dee46..78b06a44 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ A sample template is installed in the setup command (**./pf9-express -s**). A br This is where you enter the credentials for your control host to log into the target hosts to be managed by the Platform9 management plane. -> When using password authentication, comment out `ansible_ssh_private_key_file`. When using a private key, comment out `ansible_sudo_pass`. +> When using password authentication, comment out `ansible_ssh_private_key_file`. When using a private key, comment out `ansible_ssh_pass`. ``` ## diff --git a/docs/SRIOV.md b/docs/SRIOV.md index 81f76951..cb3719ee 100644 --- a/docs/SRIOV.md +++ b/docs/SRIOV.md @@ -1,10 +1,10 @@ # Using SR-IOV with Platform9 Express -In PMO version 3.11.x, support for SR-IOV has been introduced. SR-IOV provides increased network performance including higher throughput, lower latency, and lower jitter when compared to virtual switching technologies such as Open vSwitch. +In PMO version 3.11, support for SR-IOV has been introduced. SR-IOV provides increased network performance including higher throughput, lower latency, and lower jitter when compared to virtual switching technologies, such as Open vSwitch. SR-IOV is supported by multiple network interface cards (NICs) provided by many networking vendors, including Intel, Cisco, Mellanox, Broadcom, QLogic, and others. -The following NICs have been tested with the Platform9 PMO 3.11.3 release: +The following NICs have been tested with the Platform9 PMO 3.11 release: * Mellanox ConnectX-4 Lx EN * Mellanox ConnectX-5 EN @@ -23,7 +23,7 @@ The following drivers are considered supported: The following are a few of the limitations of SR-IOV: -* Bonded NICs at the host-level are not recommended/not supported for use with SR-IOV. While active/passive bonding may work in this configuration, LACP/802.3ad is definitely not a supported configuration. +* Bonded NICs at the host-level are not recommended/not supported for use with SR-IOV. While active/passive bonding may work in this configuration, LACP/802.3ad is not a supported configuration. * Virtual Functions are automatically assigned to Neutron ports and are not customizable. * Instance-level NIC bonding using Virtual Functions is not supported. * Port security/security groups are not supported. @@ -40,6 +40,8 @@ SR-IOV requires the following: > When SR-IOV capable NICs are used in conjunction with Open vSwitch bridges, you have the option of using an existing provider label, such as **external**, or using a dedicated provider. When sharing a provider network between SR-IOV and non-SR-IOV ports, communication between the ports on the same network is permitted. Using a dedicated provider will require you to call out a second bridge mapping, such as `sriov:br-sriov`, to allow DHCP ports connected to a vSwitch to communicate with the SR-IOV ports. +While BIOS changes necessary for SR-IOV support must be handled manually, Platform9 Express can and will configure the kernel to enable IOMMU and Passthrough support. The following steps document the manual processes necessary to enable IOMMU and Passthrough support in the kernel. + ### Kernel IOMMU Support Using **dmesg**, you can verify if IOMMU is enabled with the following command: @@ -64,7 +66,7 @@ Next, update GRUB: update-grub ``` -> Once the kernel configuration has been modified, you must reboot for the changes to take effect. +> Once the kernel configuration has been modified, you **must** reboot for the changes to take effect. If these changes are implemented using Platform9 Express, the playbook will not proceed without a reboot. ### IOMMU Passthrough Support @@ -82,7 +84,7 @@ Then, update GRUB: update-grub ``` -> Once the kernel configuration has been modified, you must reboot for the changes to take effect. +> Once the kernel configuration has been modified, you **must** reboot for the changes to take effect. If these changes are implemented using Platform9 Express, the playbook will not proceed without a reboot. ## Deploying PMO with SR-IOV support using Express @@ -110,7 +112,7 @@ Using **host_vars**, the following are some variables that can be modified: In this example, two hosts have different NICs installed that report different names to the operating system. ``` -root@compute01:~# ip link show +root@hv01:~# ip link show ... 6: ens1: mtu 1500 qdisc mq portid 0002c90300ffe511 state UP mode DEFAULT group default qlen 1000 link/ether 00:02:c9:ff:e5:10 brd ff:ff:ff:ff:ff:ff @@ -119,7 +121,7 @@ root@compute01:~# ip link show ``` ``` -root@compute02:~# ip link show +root@hv02:~# ip link show ... 3: ens1f0: mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000 link/ether 90:e2:ba:a2:1b:88 brd ff:ff:ff:ff:ff:ff @@ -130,7 +132,7 @@ root@compute02:~# ip link show NIC naming can vary based on the kernel version, NIC driver, and the PCI slot where the card is installed. In this example, the NIC installed in each host is from a different manufacturer and uses a different driver: ``` -root@compute01:~# ethtool -i ens1 +root@hv01:~# ethtool -i ens1 driver: mlx4_en version: 4.0-0 firmware-version: 2.42.5000 @@ -144,7 +146,7 @@ supports-priv-flags: yes ``` ``` -root@compute02:~# ethtool -i ens1f0 +root@hv02:~# ethtool -i ens1f0 driver: ixgbe version: 5.1.0-k firmware-version: 0x61bd0001 @@ -157,11 +159,11 @@ supports-register-dump: yes supports-priv-flags: yes ``` -The **host_vars** for each host can be implemented in a file that corresponds to the host's short name located at **/opt/pf9-express/host_vars/.yml**. In the following example, **compute01** uses a single network interface for SR-IOV, while **compute02** uses two. SR-IOV networks will leverage a new provider label named **sriov** and 8 VFs per interface, as shown here: +The **host_vars** for each host can be implemented in a file that corresponds to the host's short name located at **/opt/pf9-express/host_vars/.yml**. In the following example, **hv01** uses a single network interface for SR-IOV, while **hv02** uses two. SR-IOV networks will leverage a new provider label named **sriov** and 8 VFs per interface, as shown here: ``` --- -# compute01.yml +# hv01.yml physical_device_mappings: - sriov:ens1 sriov_numvfs: @@ -170,7 +172,7 @@ sriov_numvfs: ``` --- -# compute02.yml +# hv02.yml physical_device_mappings: - sriov:ens1f0 - sriov:ens1f1 @@ -219,8 +221,8 @@ To enable support for SR-IOV on a host, the inventory must be modified according ``` [hypervisors] -compute01 ansible_host=10.50.0.197 vm_console_ip=10.50.0.197 ha_cluster_ip=10.50.0.197 tunnel_ip=10.50.0.197 dhcp=on snat=on sriov=on -compute02 ansible_host=10.50.0.196 vm_console_ip=10.50.0.196 tunnel_ip=10.50.0.196 dhcp=on snat=on sriov=on +hv01 ansible_host=10.50.0.197 vm_console_ip=10.50.0.197 ha_cluster_ip=10.50.0.197 tunnel_ip=10.50.0.197 dhcp=on snat=on sriov=on +hv02 ansible_host=10.50.0.196 vm_console_ip=10.50.0.196 tunnel_ip=10.50.0.196 dhcp=on snat=on sriov=on ``` SR-IOV can be enabled group-wide by modifying the respective **group_vars** file, as shown here: @@ -240,7 +242,7 @@ Lastly, SR-IOV can be enabled via the respective **host_vars** file, as shown he ``` --- -# compute01.yml +# hv01.yml sriov: "on" physical_device_mappings: - sriov:ens1 diff --git a/group_vars/hypervisors.yml b/group_vars/hypervisors.yml index bccbeac4..7ce654f7 100644 --- a/group_vars/hypervisors.yml +++ b/group_vars/hypervisors.yml @@ -11,8 +11,14 @@ glance: "off" multipath: False nova_instances_path: /opt/pf9/data/instances/ neutron_ovs_allow_dhcp_vms: "False" -#neutron_ovs_bridge_name: "br-pf9,br-sriov" + +neutron_ovs_bridge_name: "br-pf9" + +# neutron_ovs_bridge_mappings should be used in conjunction with +# neutron_ovs_bridge_name. Multiple bridge mappings can be specified +# using a comma-separated list. neutron_ovs_bridge_mappings: "external:br-pf9" + ceilometer_customize: False ceilometer_cpu_interval: 600 @@ -20,6 +26,6 @@ ceilometer_cpu_interval: 600 # SRIOV #################### -sriov: "on" +sriov: "off" physical_device_mappings: "" pci_passthrough_whitelist: "{}" diff --git a/host_vars/compute01.yml.example b/host_vars/hv01.yml.example similarity index 100% rename from host_vars/compute01.yml.example rename to host_vars/hv01.yml.example diff --git a/host_vars/compute02.yml.example b/host_vars/hv02.yml.example similarity index 100% rename from host_vars/compute02.yml.example rename to host_vars/hv02.yml.example diff --git a/roles/map-role/templates/pf9-ostackhost-neutron.j2 b/roles/map-role/templates/pf9-ostackhost-neutron.j2 index 473934f4..a84b7ff8 100644 --- a/roles/map-role/templates/pf9-ostackhost-neutron.j2 +++ b/roles/map-role/templates/pf9-ostackhost-neutron.j2 @@ -1,8 +1,8 @@ { "cluster_ip": "{{ha_cluster_ip}}", "instances_path": "{{nova_instances_path}}", - "novncproxy_base_url": "{{neutron_novncproxy_base_url}}", {% if sriov == "on" %} - "pci_passthrough_whitelist": "[{% for physical_device_mapping in physical_device_mappings %}{% set provider,devname = physical_device_mapping.split(':') %}{\"physical_network\":\"{{ provider }}\", \"devname\":\"{{ devname }}\"}{% if not loop.last %},{% endif %}{% endfor %}]" + "pci_passthrough_whitelist": "[{% for physical_device_mapping in physical_device_mappings %}{% set provider,devname = physical_device_mapping.split(':') %}{\"physical_network\":\"{{ provider }}\", \"devname\":\"{{ devname }}\"}{% if not loop.last %},{% endif %}{% endfor %}]", {% endif %} + "novncproxy_base_url": "{{neutron_novncproxy_base_url}}" } diff --git a/roles/neutron-prerequisites/tasks/main.yml b/roles/neutron-prerequisites/tasks/main.yml index 04379cbd..281151d4 100644 --- a/roles/neutron-prerequisites/tasks/main.yml +++ b/roles/neutron-prerequisites/tasks/main.yml @@ -23,5 +23,4 @@ fail_mode: secure state: present with_items: "{{ neutron_ovs_bridge_mappings.split(',') }}" -# with_items: "{{ neutron_ovs_bridge_name.split(',') }}" diff --git a/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml b/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml index 395db913..b5f55e9a 100644 --- a/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml +++ b/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml @@ -33,12 +33,12 @@ - name: Update GRUB config (CentOS) command: grub2-mkconfig -o /boot/grub2/grub.cfg when: - - ansible_distribution == 'CentOS' or ansible_distribution == 'Red Hat Enterprise Linux' + - ansible_os_family == "RedHat" - name: Fail if IOMMU is not enabled fail: msg: - - "IOMMU is not currently enabled in the kernel but has been configured. Please reboot the host and rerun Express. Refer to https://platform9.com/knowledge/KB12345 for assistance." + - "IOMMU is not currently enabled in the kernel but has been configured. Please reboot the host and rerun Express." when: iommus.examined < 1 # Warn if NIC driver is not supported @@ -55,5 +55,5 @@ - debug: msg: - - "The {{ nic_driver }} NIC driver is not currently supported by Platform9. Refer to https://platform9.com/knowledge/KB12345 for assistance." + - "The {{ nic_driver }} NIC driver is not currently supported by Platform9." when: nic_driver not in supported_nic_drivers From ed9d7f3a0c8722989072991ee1472879ba3d9457 Mon Sep 17 00:00:00 2001 From: James Denton Date: Wed, 21 Aug 2019 12:27:15 +0000 Subject: [PATCH 11/15] Updated docs --- docs/SRIOV.md | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/docs/SRIOV.md b/docs/SRIOV.md index cb3719ee..b08a4e59 100644 --- a/docs/SRIOV.md +++ b/docs/SRIOV.md @@ -90,13 +90,13 @@ update-grub Using the Platform9 Express tool, operators can deploy PMO with support for SR-IOV. The Express tool will perform many of the tasks outlined in the previous sections, including enabling IOMMU and passthrough support in the kernel, as well as implementing a unit file for persisting VFs across reboots. -> Given the complexity involved in supporting Mellanox NICs, the Express tool will initially only support Intel NICs using the ixgbe driver. This includes the Intel X520, X540, and X550 families. +> Given the complexity involved in supporting Mellanox NICs, the Express tool will initially only support Intel NICs using the ixgbe and Broadcom NICs using the bnx2x driver. This includes the Intel X520, X540, and X550 families as well as certain Broadcom/QLogic NetXtreme families. The necessary configuration details can be implemented globally using **group_vars**, or on an individual host basis using **host_vars**. Each method is described below. ### Host Variables -Compute node-specific configurations can be implemented using what is known as host_vars. Configurations that may vary between hosts include: +Compute node-specific configurations can be implemented using what is known as **host_vars**. Configurations that may vary between hosts include: * Network interface name * Quantity of network interfaces used for SRIOV @@ -159,13 +159,13 @@ supports-register-dump: yes supports-priv-flags: yes ``` -The **host_vars** for each host can be implemented in a file that corresponds to the host's short name located at **/opt/pf9-express/host_vars/.yml**. In the following example, **hv01** uses a single network interface for SR-IOV, while **hv02** uses two. SR-IOV networks will leverage a new provider label named **sriov** and 8 VFs per interface, as shown here: +The **host_vars** for each host can be implemented in a file that corresponds to the host's short name located at **/opt/pf9-express/host_vars/.yml**. In the following example, **hv01** uses a single network interface for SR-IOV, while **hv02** uses two. SR-IOV networks will leverage the default provider label named **external** and 8 VFs per interface, as shown here: ``` --- # hv01.yml physical_device_mappings: - - sriov:ens1 + - external:ens1 sriov_numvfs: - ens1:8 ``` @@ -174,8 +174,8 @@ sriov_numvfs: --- # hv02.yml physical_device_mappings: - - sriov:ens1f0 - - sriov:ens1f1 + - external:ens1f0 + - external:ens1f1 sriov_numvfs: - ens1f0:8 - ens1f1:8 @@ -221,8 +221,8 @@ To enable support for SR-IOV on a host, the inventory must be modified according ``` [hypervisors] -hv01 ansible_host=10.50.0.197 vm_console_ip=10.50.0.197 ha_cluster_ip=10.50.0.197 tunnel_ip=10.50.0.197 dhcp=on snat=on sriov=on -hv02 ansible_host=10.50.0.196 vm_console_ip=10.50.0.196 tunnel_ip=10.50.0.196 dhcp=on snat=on sriov=on +hv01 ansible_host=10.0.0.11 vm_console_ip=10.0.0.11 ha_cluster_ip=10.0.1.11 tunnel_ip=10.0.2.11 dhcp=on snat=on sriov=on +hv02 ansible_host=10.0.0.12 vm_console_ip=10.0.0.12 tunnel_ip=10.0.2.12 dhcp=on snat=on ``` SR-IOV can be enabled group-wide by modifying the respective **group_vars** file, as shown here: @@ -245,18 +245,23 @@ Lastly, SR-IOV can be enabled via the respective **host_vars** file, as shown he # hv01.yml sriov: "on" physical_device_mappings: - - sriov:ens1 + - external:ens1 +... ``` ### Installation -Once the respective configuration is in place, install PMO with Express using some variation of the following: +Once the respective configuration is in place, install PMO with Express: ``` # ./pf9-express -a pmo ``` +When changing the number of VFs for a given interfaces, the host operating system must set VFs to 0 before setting the new value. Platform9 Express can refresh the VFs when the **refresh-sriov** tag is used. + To refresh VFs, run **pf9-express** with the **refresh-sriov** tag: ``` # ./pf9-express -t refresh-sriov hypervisors ``` + +> When VFs are refreshed, any VM attached to a VF must be shutdown/started or issued a hard reboot to restore network connectivity. From 8d8aaa2d065a126d6732974ad6d1eb41c7a3528b Mon Sep 17 00:00:00 2001 From: James Denton Date: Wed, 21 Aug 2019 15:32:55 +0000 Subject: [PATCH 12/15] Added a reboot step when modifying grub and default timers --- docs/SRIOV.md | 8 +++++ .../tasks/prerequisites-sriov.yml | 36 ++++++++++++++++++- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/docs/SRIOV.md b/docs/SRIOV.md index b08a4e59..a2cd8314 100644 --- a/docs/SRIOV.md +++ b/docs/SRIOV.md @@ -102,12 +102,15 @@ Compute node-specific configurations can be implemented using what is known as * * Quantity of network interfaces used for SRIOV * Provider network mappings * Number of VFs per interface +* Auto reboot and wait time Using **host_vars**, the following are some variables that can be modified: * physical_device_mappings (required) * sriov_numvfs (required) * neutron_ovs_bridge_mappings (optional) +* grub_reboot (optional - defaults to true) +* reboot_wait (optional - defaults to 900 seconds) In this example, two hosts have different NICs installed that report different names to the operating system. @@ -168,6 +171,7 @@ physical_device_mappings: - external:ens1 sriov_numvfs: - ens1:8 +reboot_wait: 1200 ``` ``` @@ -190,12 +194,15 @@ Group-wide configurations can be implemented using what is known as **group_vars * Network interface name * Number of VFs per interface * Provider network mappings +* Auto reboot and wait time Using **group_vars**, the following are some variables that can be modified: * neutron_ovs_bridge_mappings * sriov_numvfs * physical_device_mappings +* grub_reboot (optional - defaults to true) +* reboot_wait (optional - defaults to 900 seconds) The **group_vars** for the **hypervisors** group can be implemented in a file that corresponds to the group's name located at **/opt/pf9-express/group_vars/.yml**. In the following example, every host in the **hypervisors** group has the same NIC installed in the same slot, so the naming convention is consistent across all hosts. A second provider bridge mapping has been established that will allow non-SR-IOV capable ports, such as DHCP, to connect to a vSwitch and communicate with SR-IOV ports: @@ -203,6 +210,7 @@ The **group_vars** for the **hypervisors** group can be implemented in a file th --- # hypervisors.yml ... + neutron_ovs_bridge_mappings: "external:br-pf9, sriov:br-sriov" physical_device_mappings: - sriov:ens1f0 diff --git a/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml b/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml index b5f55e9a..2eaa4df7 100644 --- a/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml +++ b/roles/pre-flight-checks-openstack/tasks/prerequisites-sriov.yml @@ -35,11 +35,45 @@ when: - ansible_os_family == "RedHat" +# If grub_reboot is not enabled, user will be prompted to reboot the host(s) manually - name: Fail if IOMMU is not enabled + fail: + msg: + - "IOMMU is not currently enabled in the kernel but has been configured. Please reboot the host manually and rerun Express." + when: + - iommus.examined < 1 + - not grub_reboot + +# If grub_reboot is enabled, Express will reboot the host(s) and wait for their return +- name: Reboot to enable IOMMU + reboot: + msg: "Platform9 - Host is being rebooted!" + reboot_timeout: "{{ reboot_wait }}" + post_reboot_delay: 30 + test_command: uname + when: + - iommus.examined < 1 + - grub_reboot + register: reboot_results + +- debug: + msg: "Reboot results: {{ reboot_results | default('N/A') }}" + +- set_fact: + rebooted: "{{ reboot_results.rebooted | default('false') }}" + +# Recheck IOMMUs +- name: Register IOMMU DMARs (post-reboot) + find: paths=/sys/class/iommu file_type=directory patterns="*" + register: iommu_recheck + +# If IOMMU is not enabled post-reboot, bail out. +- name: Fail hard if IOMMU is not enabled fail: msg: - "IOMMU is not currently enabled in the kernel but has been configured. Please reboot the host and rerun Express." - when: iommus.examined < 1 + when: + - iommu_recheck.examined < 1 # Warn if NIC driver is not supported - name: Determine driver bound to NICs From 8bac59beccf01785c11a15582a1739c4b948453f Mon Sep 17 00:00:00 2001 From: James Denton Date: Wed, 21 Aug 2019 16:57:32 +0000 Subject: [PATCH 13/15] re-auth due to losing /tmp post-reboot --- roles/pre-flight-checks-openstack/tasks/main.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/roles/pre-flight-checks-openstack/tasks/main.yml b/roles/pre-flight-checks-openstack/tasks/main.yml index f02d851c..a229126a 100644 --- a/roles/pre-flight-checks-openstack/tasks/main.yml +++ b/roles/pre-flight-checks-openstack/tasks/main.yml @@ -33,6 +33,10 @@ ## Verify DVR is on, step 1 in the UI is complete, import vars, and test credentials ########################################################################################### +- name: Re-obtain token (post-reboot) + import_role: + name: pf9-auth + - name: read api token shell: "cat /tmp/keystone-token.txt" register: api_token From fb9a2aff43ec77fda206741e3e07171142e84a21 Mon Sep 17 00:00:00 2001 From: James Denton Date: Wed, 21 Aug 2019 23:23:02 +0000 Subject: [PATCH 14/15] Add defaults --- roles/pre-flight-checks-openstack/defaults/main.yml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 roles/pre-flight-checks-openstack/defaults/main.yml diff --git a/roles/pre-flight-checks-openstack/defaults/main.yml b/roles/pre-flight-checks-openstack/defaults/main.yml new file mode 100644 index 00000000..2dd11314 --- /dev/null +++ b/roles/pre-flight-checks-openstack/defaults/main.yml @@ -0,0 +1,3 @@ +--- +grub_reboot: true +reboot_wait: 900 From 37e308206c491a96f00bf23604a0956c2586c7f9 Mon Sep 17 00:00:00 2001 From: James Denton Date: Wed, 21 Aug 2019 23:29:09 +0000 Subject: [PATCH 15/15] Add VF Manager Jinja --- .../templates/pf9-sriov-vf-manager.service.j2 | 15 +++++ .../templates/pf9-virtual-functions.sh.j2 | 61 +++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 roles/neutron-sriov/templates/pf9-sriov-vf-manager.service.j2 create mode 100644 roles/neutron-sriov/templates/pf9-virtual-functions.sh.j2 diff --git a/roles/neutron-sriov/templates/pf9-sriov-vf-manager.service.j2 b/roles/neutron-sriov/templates/pf9-sriov-vf-manager.service.j2 new file mode 100644 index 00000000..4f5e9e71 --- /dev/null +++ b/roles/neutron-sriov/templates/pf9-sriov-vf-manager.service.j2 @@ -0,0 +1,15 @@ +[Unit] +Description=Platform9 SR-IOV Virtual Function Manager +Wants=network-online.target +After=network-online.target + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/opt/pf9/pf9-virtual-functions.sh start +ExecStop=/opt/pf9/pf9-virtual-functions.sh stop +ExecReload=/opt/pf9/pf9-virtual-functions.sh reload +StandardOutput=journal + +[Install] +WantedBy=multi-user.target diff --git a/roles/neutron-sriov/templates/pf9-virtual-functions.sh.j2 b/roles/neutron-sriov/templates/pf9-virtual-functions.sh.j2 new file mode 100644 index 00000000..1bafa27f --- /dev/null +++ b/roles/neutron-sriov/templates/pf9-virtual-functions.sh.j2 @@ -0,0 +1,61 @@ +#!/bin/bash +# Copyright 2019 Platform9 Systems Inc. +# All Rights Reserved. + +## +# This script implements SR-IOV virtual functions based on variables defined +# within Platform9 Express. +# +# DO NOT EDIT THIS FILE MANUALLY! +## + +# VF Array +declare -A vfs + +{% for vf in sriov_numvfs %} +vfs[{{ vf.split(':')[0] }}]={{ vf.split(':')[1] }} +{% endfor %} + +function get_vf_count { + vf_count=$( /sys/class/net/$1/device/sriov_numvfs +} + +function set_vf { + echo $2 > /sys/class/net/$1/device/sriov_numvfs +} + +function start { + for vf in "${!vfs[@]}" + do + echo "Enabling ${vfs[$vf]} virtual functions on $vf." + set_vf $vf ${vfs[$vf]} + done +} + +function stop { + for vf in "${!vfs[@]}" + do + echo "Removing virtual functions from $vf." + unset_vf $vf + done +} + +function reload { + for vf in "${!vfs[@]}" + do + get_vf_count $vf + if [ "$vf_count" -ne "${vfs[$vf]}" ]; then + echo "Virtual function count has changed. Resetting virtual functions on $vf. Affected VMs must be power cycled to restore connectivity!" + unset_vf $vf + set_vf $vf ${vfs[$vf]} + else + echo "Virtual function count on $vf has not changed. Ignoring service reload." + fi + done +} + +"$@"