Skip to content

Commit

Permalink
Add parallel version of MiniKube/local compatible deployment for expe…
Browse files Browse the repository at this point in the history
…rimenting.
  • Loading branch information
JMGaljaard committed Sep 25, 2023
1 parent fc7fe32 commit b29d79a
Show file tree
Hide file tree
Showing 6 changed files with 269 additions and 0 deletions.
57 changes: 57 additions & 0 deletions terraform/terraform-dependencies-local/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Add Vulcano Gang scheduler plugin using all default values.
resource "helm_release" "volcano_scheduler" {
name = var.vulcano_scheduler_information.release_name
repository = var.vulcano_scheduler_repo_url
chart = var.vulcano_scheduler_information.chart_name
version = var.vulcano_scheduler_information.version

namespace = var.vulcano_scheduler_information.namespace
create_namespace = true
}

# Treat training-operator as overlay and apply a patch to add support for gang scheduling.
# Creates an overlay (patched version) of the original training operator to deploy.
data "kustomization_overlay" "training_operator" {
resources = [
"github.com/kubeflow/manifests.git/apps/training-operator/upstream/overlays/standalone?ref=${var.kubeflow_version}"
]

# Apply vulcano patch in overlay.
patches {
path = "patches/training-operator-patch.yaml"
target {
kind = "Deployment"
namespace = "kubeflow"
name = "training-operator"
}
}
}

# Deploy resources one-by-one.
resource "kustomization_resource" "training_operator" {
# Before we can install the training operator, we need to have the vulcano_scheduler up and running.
# See also the patch that we apply to the training operator through kustomize.
depends_on = [helm_release.volcano_scheduler]
for_each = data.kustomization_overlay.training_operator.ids
manifest = data.kustomization_overlay.training_operator.manifests[each.value]
}

# Create NFS resource
resource "helm_release" "nfs_client_provisioner" {
name = var.nfs_provider_information.release_name
repository = var.nfs_provisioner_repo_url
chart = var.nfs_provider_information.chart_name

namespace = var.nfs_provider_information.namespace
create_namespace = true

values = [
templatefile("${path.module}/values.nfs.yaml.tpl", {
nfs_server_path = var.nfs_provider_information.server_path
image_repository = var.nfs_provider_information.image_repository
image_tag = var.nfs_provider_information.image_tag
pull_policy = var.nfs_provider_information.pull_policy
nfs_size = var.nfs_provider_information.storage_size
})
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: training-operator
spec:
template:
spec:
containers:
- name: training-operator
command:
- /manager
- --gang-scheduler-name=volcano
18 changes: 18 additions & 0 deletions terraform/terraform-dependencies-local/providers.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Kustomization (i.e. Kustomize) requires kubernetes config
provider "kustomization" {
kubeconfig_path = var.kubernetes_config_path
}

provider "kubectl" {
config_path = var.kubernetes_config_path
}

provider "kubernetes" {
config_path = kubernetes_config_path
}

provider "helm" {
kubernetes {
config_path = var.kubernetes_config_path
}
}
98 changes: 98 additions & 0 deletions terraform/terraform-dependencies-local/values.nfs.yaml.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Default values for nfs-provisioner.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

replicaCount: 1

# imagePullSecrets:

image:
repository: ${image_repository}
tag: ${image_tag}
pullPolicy: ${pull_policy}


# For a list of available arguments
# Please see https://github.com/kubernetes-incubator/external-storage/blob/master/nfs/docs/deployment.md#arguments
extraArgs: {}
# device-based-fsids: false

service:
type: ClusterIP

nfsPort: 2049
nlockmgrPort: 32803
mountdPort: 20048
rquotadPort: 875
rpcbindPort: 111
statdPort: 662
# nfsNodePort:
# nlockmgrNodePort:
# mountdNodePort:
# rquotadNodePort:
# rpcbindNodePort:
# statdNodePort:

externalIPs: []

persistence:
enabled: true

## Persistent Volume Storage Class
## If defined, storageClassName: <storageClass>
## If set to "-", storageClassName: "", which disables dynamic provisioning
## If undefined (the default) or set to null, no storageClassName spec is
## set, choosing the default provisioner. (gp2 on AWS, standard on
## GKE, AWS & OpenStack)
##
storageClassName: "standard"
accessMode: ReadWriteOnce
size: ${nfs_size}

## For creating the StorageClass automatically:
storageClass:
create: true

## Set a provisioner name. If unset, a name will be generated.
# provisionerName:

## Set StorageClass as the default StorageClass
## Ignored if storageClass.create is false
defaultClass: false

## Set a StorageClass name
## Ignored if storageClass.create is false
name: nfs

# set to null to prevent expansion
allowVolumeExpansion: true
## StorageClass parameters
parameters: {}

mountOptions:
- vers=3

## ReclaimPolicy field of the class, which can be either Delete or Retain
reclaimPolicy: Delete

## For RBAC support:
rbac:
create: true

## Ignored if rbac.create is true
##
serviceAccountName: default

resources: {}
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi

nodeSelector: {}

tolerations: []

affinity: {}
61 changes: 61 additions & 0 deletions terraform/terraform-dependencies-local/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
variable "kubernetes_config_path" {
description = "Path of Kubernetes configuration file (change for non-default kubectl setup)"
default = "~/.kube/config"
}

variable "kubeflow_version" {
type = string
description = "Kubeflow (PyTorch) Training Operator to install."
default = "v1.5.0"
}

variable "nfs_provider_information" {
type = object({
release_name = string
chart_name = string
namespace = string
server_path = string
image_repository = string
image_tag = string
pull_policy = string
storage_size = string
})
default = {
release_name = "nfs-server"
chart_name = "nfs-server-provisioner"
namespace = "test"
server_path = "/mnt/kubernetes"
image_repository = "k8s.gcr.io/sig-storage/nfs-provisioner"
image_tag = "v3.0.0"
pull_policy = "IfNotPresent"
storage_size = "50Gi"
}
}

variable "nfs_provisioner_repo_url" {
description = "Repository URL to locate the utilized helm charts"
type = string
default = "https://charts.helm.sh/stable"
}

variable "vulcano_scheduler_information" {
type = object({
release_name = string
chart_name = string
namespace = string
version = string
})
default = {
release_name = "volcano"
chart_name = "volcano"
namespace = "volcano-system"
version = "v1.8.0"
}
}

variable "vulcano_scheduler_repo_url" {
description = "Repository URL to locate the utilized helm charts for Vulcano Scheduler Plugin."
type = string
default = "https://volcano-sh.github.io/helm-charts"
}

23 changes: 23 additions & 0 deletions terraform/terraform-dependencies-local/versions.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
terraform {
required_providers {
kustomization = {
source = "kbst/kustomization"
version = ">= 0.7.0"
}

kubectl = {
source = "gavinbunney/kubectl"
version = ">= 1.13.1"
}

kubernetes = {
source = "hashicorp/kubernetes"
version = ">= 1.13.1"
}

helm = {
source = "hashicorp/helm"
}
}
required_version = "~> 1.1"
}

0 comments on commit b29d79a

Please sign in to comment.