Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lustreinfiniband #289

Open
wants to merge 43 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 41 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
09642fe
- lustre-ipoib - This is a created implementation of Lustre using IP …
May 11, 2020
92ac2c0
- lustre_ipoib_nvmedrives - This is a created implementation of Lustr…
May 22, 2020
cf36ae9
- lustre-ipoib - This is a created implementation of Lustre using ip …
Jun 4, 2020
35d4839
Update config.json
chadnar2 Jun 27, 2020
fe4684e
changes to config.json rebootlustre.sh to remove extra unnecessary lines
Jun 29, 2020
39b4fa6
changes to lustre_rdma_nvmedrives config.json and rebootlustre.sh to …
Jun 29, 2020
541a254
changes to lustre_rdma_nvmedrives config.json and rebootlustre.sh to …
Jun 29, 2020
47d2af7
Merge branch 'lustreinfiniband' of github.com:Azure/azurehpc into lus…
Jun 29, 2020
81ad86a
Modification to config.json and rebootlustre.sh to remove unnecessary…
Jun 29, 2020
f37b167
Modifications to config.json files to clean up location variable. Add…
Jun 30, 2020
01fc4b6
Update readme.md
chadnar2 Jun 30, 2020
db68123
Update readme.md
chadnar2 Jun 30, 2020
b4e7866
Delete removeMOFED.sh
chadnar2 Jun 30, 2020
bcbcfa6
Delete oldreboot
chadnar2 Jun 30, 2020
b5fc4aa
Delete deploy_config.json
chadnar2 Jul 1, 2020
5a1079d
Delete hpcadmin_id_rsa
chadnar2 Jul 1, 2020
c9830a9
Delete hpcadmin_id_rsa.pub
chadnar2 Jul 1, 2020
a995af9
modifications of lustre_rdma_avs
Jul 1, 2020
292893f
Merge branch 'lustreinfiniband' of github.com:Azure/azurehpc into lus…
Jul 1, 2020
616553c
Delete waitforreboot.sh
chadnar2 Jul 1, 2020
f2c31c1
Delete waitforreboot.sh
chadnar2 Jul 1, 2020
9aba5d2
Update config.json
chadnar2 Jul 1, 2020
fd81754
change of lustre_rdma_nvmedrives config.json to use existing wait.sh
Jul 1, 2020
21080cb
change of lustre_rdma_nvmedrives config.json to use existing wait.sh
Jul 1, 2020
3ed6c06
Moving all lustre rdma/ipoib work to experimental folder for further …
Jul 2, 2020
f1bc9a2
Remove duplicated directory for lustre_ipoib
Jul 2, 2020
9aa90d4
Remove lustre_rdma_nvmedrives from examples
Jul 2, 2020
103f3f7
cleanup work for lustre_rdma_avs from examples folder
Jul 4, 2020
5f91e16
remove temp files
xpillons Jul 7, 2020
a49ed49
error fixed in config.json for lustre_ipoib_nvmedrives
Jul 7, 2020
b228270
Merge branch 'lustreinfiniband' of github.com:Azure/azurehpc into lus…
Jul 7, 2020
bdf2054
change in config.json for lustre_ipoib to fix PBS
Jul 7, 2020
7fe7aa9
The headnode cannot communicate with Lustre nodes over Infiband anywa…
Jul 9, 2020
51f305c
added pipeline
xpillons Jul 10, 2020
80922da
Merge branch 'master' into lustreinfiniband
xpillons Jul 10, 2020
ab1ad3c
fix image
xpillons Jul 10, 2020
0b03083
use plain CentOS Image
xpillons Jul 10, 2020
0f3279b
use the lfsrepo from the shared scripts dir
xpillons Jul 10, 2020
f0a8c9b
Need to have a diffrent repo than standard for the Lustre kernel
Jul 10, 2020
5cb4cc0
Need to have a diffrent repo than standard for the Lustre kernel
Jul 10, 2020
a7473c9
Update config.json
chadnar2 Jul 10, 2020
d685b87
Changed headnode in lustre_rdma_avs to Standard_D8s_v3
Jul 14, 2020
2ecfe62
Merge branch 'lustreinfiniband' of github.com:Azure/azurehpc into lus…
Jul 14, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
221 changes: 221 additions & 0 deletions experimental/lustre_ipoib/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
{
"location": "variables.location",
"resource_group": "variables.resource_group",
"install_from": "headnode",
"admin_user": "hpcadmin",
"vnet": {
"name": "hpcvnet",
"address_prefix": "10.2.0.0/20",
"subnets": {
"compute": "10.2.0.0/22",
"storage": "10.2.4.0/24"
}
},
"variables": {
"location": "<NOT-SET>",
"resource_group": "<NOT-SET>",
"image": "OpenLogic:CentOS-HPC:7.6:latest",
"lustreimage": "OpenLogic:CentOS-HPC:7.6:latest",
"drivenum": 4,
"ossnum": 4,
"low_priority": true,
"storage_account": "<NOT-SET>",
"storage_key": "sakey.{{variables.storage_account}}",
"storage_container": "<NOT-SET>",
"log_analytics_lfs_name": "<NOT-SET>",
"la_resourcegroup": "<NOT-SET>",
"la_name": "<NOT-SET>",
"log_analytics_workspace": "laworkspace.{{variables.la_resourcegroup}}.{{variables.la_name}}",
"log_analytics_key": "lakey.{{variables.la_resourcegroup}}.{{variables.la_name}}",
"lustre_version": "2.10",
"lustre_mount": "/lustre"
},
"resources": {
"headnode": {
"type": "vm",
"vm_type": "Standard_HC44rs",
"accelerated_networking": false,
"public_ip": true,
"image": "variables.image",
"subnet": "compute",
"tags": [
"disable-selinux",
"cndefault",
"localuser",
"pbsserver",
"loginnode",
"nfsserver"
]
},
"lustre": {
"type": "vmss",
"vm_type": "Standard_HC44rs",
"instances": "9",
"accelerated_networking": false,
"image": "variables.lustreimage",
"subnet": "storage",
"tags": [
"cndefault",
"lustre[0:5]",
"osses[1:5]",
"lfsrepo",
"lfsclient[5:9]",
"localuser",
"pbsclient[5:9]",
"nfsclient",
"disable-selinux",
"lfsloganalytics"
]
}
},
"install": [
{
"script": "disable-selinux.sh",
"tag": "disable-selinux",
"sudo": true
},
{
"script": "cndefault.sh",
"tag": "cndefault",
"sudo": true
},
{
"script": "nfsserver.sh",
"tag": "nfsserver",
"sudo": true
},
{
"script": "nfsclient.sh",
"args": [
"$(<hostlists/tags/nfsserver)"
],
"tag": "nfsclient",
"sudo": true
},
{
"script": "localuser.sh",
"args": [
"$(<hostlists/tags/nfsserver)"
],
"tag": "localuser",
"sudo": true
},
{
"type": "local_script",
"script": "installdrives.sh",
"args": [
"variables.resource_group",
"$(<hostlists/tags/lustre)",
"variables.ossnum",
"variables.drivenum"
]
},
{
"script": "lfsrepo.sh",
"tag": "lfsrepo",
"args": [
"variables.lustre_version"
],
"sudo": true
},
{
"script": "lfspkgs.sh",
"tag": "lustre",
"sudo": true
},
{
"script": "create_raid0.sh",
"args": [
"/dev/md10",
"/dev/sd[c-f]"
],
"tag": "osses",
"sudo": true
},
{
"script": "lfsmaster.sh",
"tag": "lustre",
"args": [
"/dev/sdb"
],
"sudo": true
},
{
"script": "lfsoss.sh",
"args": [
"$(head -n1 hostlists/tags/lustre)",
"/dev/md10"
],
"tag": "lustre",
"sudo": true
},
{
"script": "lfshsm.sh",
"args": [
"$(head -n1 hostlists/tags/lustre)",
"variables.storage_account",
"variables.storage_key",
"variables.storage_container",
"variables.lustre_version"
],
"tag": "lustre",
"sudo": true
},
{
"script": "lfsclient.sh",
"args": [
"$(head -n1 hostlists/tags/lustre)",
"variables.lustre_mount"
],
"tag": "lfsclient",
"sudo": true
},
{
"script": "lfsimport.sh",
"args": [
"variables.storage_account",
"variables.storage_key",
"variables.storage_container",
"variables.lustre_mount",
"variables.lustre_version"
],
"tag": "lfsazimport",
"sudo": true
},
{
"script": "lfsloganalytics.sh",
"args": [
"variables.log_analytics_lfs_name",
"variables.log_analytics_workspace",
"variables.log_analytics_key"
],
"tag": "lfsloganalytics",
"sudo": true
},
{
"script": "pbsdownload.sh",
"tag": "loginnode",
"sudo": false
},
{
"script": "pbsserver.sh",
"copy": [
"pbspro_19.1.1.centos7/pbspro-server-19.1.1-0.x86_64.rpm"
],
"tag": "pbsserver",
"sudo": false
},
{
"script": "pbsclient.sh",
"args": [
"$(<hostlists/tags/pbsserver)"
],
"copy": [
"pbspro_19.1.1.centos7/pbspro-execution-19.1.1-0.x86_64.rpm"
],
"tag": "pbsclient",
"sudo": false
}

]
}
37 changes: 37 additions & 0 deletions experimental/lustre_ipoib/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Lustre Infiniband

Visualisation: [config.json](https://azurehpc.azureedge.net/?o=https://raw.githubusercontent.com/Azure/azurehpc/master/examples/lustre_Infiniband/config.json)

This is a deployment of Lustre using IP over Infiniband (IPoIB) using 4 Lustre Object Storage Servers and 4 Lustre clients. The size of the system may be modified via config.json. A second work is underway to use Lustre on Azure with native RDMA and should be completed shortly. The Object Storage Servers are designed to run a raid0 group using 1TB drives. This value can easily be changed inside installdrives.sh.

Please note that installdrives.sh does take some time to complete due to it having to work with only part of a virtual machine scale set (VMSS).

This deployment will only function using the Python based AzureHPC (not the BASH libexec).

Resources:

* Head node (headnode)
* Compute nodes (compute)
* Lustre
* Management/Meta-data server (lfsmds) - identified by first lustre server
* Object storage servers (lfsoss) - identified by next 4 lustre servers
* Hierarchical storage management nodes (lfshsm)
* Lustre clients exporting with samba (lfssmb) - identified by last 4 lustre servers

> Note: The HC nodes are used for the cluster, although this node type may be easily changed by use of the vm_type variable for lustre inside config.json.

The configuration file requires the following variables to be set:

| Variable | Description |
|-------------------------|----------------------------------------------|
| resource_group | The resource group for the project |
| storage_account | The storage account for HSM |
| storage_key | The storage key for HSM |
| storage_container | The container to use for HSM |
| log_analytics_lfs_name | The lustre filesystem name for Log Analytics |
| la_resourcegroup | The resource group for Log Analytics |
| la_name | The Log Analytics Workspace name |

> Note: you can remove log anaytics and/or HSM from the config file if not required.

> Note: Key Vault should be used for the keys to keep them out of the config files.
33 changes: 33 additions & 0 deletions experimental/lustre_ipoib/scripts/installdrives.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash
groupname=$1
vmlist=$2
ossnum=$3
drivenum=$4

#create the drives first before attachint to vmss
drivecount=$(($drivenum*$ossnum))

for ((num=1; num<=$drivecount; num++)); do
az disk create -g $groupname -n "lustredrive$num" --size-gb 1024 &
done

sleep 60 # to ensure all drives are made

#Now use the created drives
index=0
lustrecnt=1

idlisttmp=$(az vmss list-instances --resource-group $groupname --name lustre |grep providers/Microsoft.Compute/virtualMachineScaleSets/lustre/virtualMachines | awk -F "virtualMachines/" '{print $2}' | sed '/networkInterfaces/d'| sed 's/["].*$//')

idlist=($idlisttmp)

for vmname in ${vmlist[@]}; do
((index++))
if [ $index -gt 0 ] ; then
for ((diskid=1; diskid<=$drivenum; diskid++)); do
az vmss disk attach --vmss-name lustre --disk lustredrive${lustrecnt} --sku Premium_LRS --instance-id ${idlist[$index]} --resource-group $groupname
((lustrecnt++))
done
fi
done

44 changes: 44 additions & 0 deletions experimental/lustre_ipoib/scripts/lfsclient.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash

# arg: $1 = lfsserver
# arg: $2 = mount point (default: /lustre)
master=$1
lfs_mount=${2:-/lustre}
mkdir ~/.ssh

cp -r /share/home/hpcuser/.ssh ~/

capture=$(ssh hpcuser@$master "sudo ip address show dev ib0")
masterib=$(echo $capture | awk -F 'inet' '{print $2}' | cut -d / -f 1 )

if rpm -q lustre; then

# if the server packages are installed only the client kmod is needed
# for 2.10 and nothing extra is needed for 2.12
if [ "$lustre_version" = "2.10" ]; then

if ! rpm -q kmod-lustre-client; then
yum -y install kmod-lustre-client
fi

fi

else

# install the client RPMs if not already installed
if ! rpm -q lustre-client kmod-lustre-client; then
yum -y install lustre-client kmod-lustre-client
fi
weak-modules --add-kernel $(uname -r)

fi
#Include the correct infiniband options
cat >/etc/modprobe.d/lustre.conf<<EOF
EOF
modprobe lnet
modprobe lustre

mkdir $lfs_mount
echo "${masterib}@tcp:/LustreFS $lfs_mount lustre flock,defaults,_netdev 0 0" >> /etc/fstab
mount -a
chmod 777 $lfs_mount
32 changes: 32 additions & 0 deletions experimental/lustre_ipoib/scripts/lfsmaster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash

# arg: $1 = device (e.g. L=/dev/sdb Lv2=/dev/nvme0n1)
device=$1

# this will only install MDS on first node in a scaleset
echo "pssh_nodenum is $PSSH_NODENUM"

cp -r /share/home/hpcuser/.ssh ~/

#Include the correct ipoib options
cat >/etc/modprobe.d/lustre.conf<<EOF
options lnet networks="tcp(ib0)"
EOF

if [ "$PSSH_NODENUM" = "0" ]; then

mkfs.lustre --fsname=LustreFS --mgs --mdt --mountfsoptions="user_xattr,errors=remount-ro" --backfstype=ldiskfs --reformat $device --index 0

mkdir /mnt/mgsmds
echo "$device /mnt/mgsmds lustre noatime,nodiratime,nobarrier 0 2" >> /etc/fstab
mount -a

# set up hsm
lctl set_param -P mdt.*-MDT0000.hsm_control=enabled
lctl set_param -P mdt.*-MDT0000.hsm.default_archive_id=1
lctl set_param mdt.*-MDT0000.hsm.max_requests=128

# allow any user and group ids to write
lctl set_param mdt.*-MDT0000.identity_upcall=NONE

fi
Loading