From 8daf9b34ace23600b5a2158a7bcc34b271fb4d28 Mon Sep 17 00:00:00 2001 From: Thanh Nguyen Date: Fri, 31 May 2024 10:10:20 -0500 Subject: [PATCH] multi pods spark --- kube/services/spark/spark-deploy.yaml | 325 +++++++++++++++++++++---- kube/services/spark/spark-service.yaml | 113 ++++++--- 2 files changed, 348 insertions(+), 90 deletions(-) diff --git a/kube/services/spark/spark-deploy.yaml b/kube/services/spark/spark-deploy.yaml index b280cecf07..e34ead38bd 100644 --- a/kube/services/spark/spark-deploy.yaml +++ b/kube/services/spark/spark-deploy.yaml @@ -1,10 +1,106 @@ +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: namenode-deployment +spec: + serviceName: "namenode" + replicas: 1 + selector: + matchLabels: + app: namenode + template: + metadata: + labels: + app: namenode + spec: + containers: + - name: namenode + image: "quay.io/cdis/namenode:3.3.0" + ports: + - containerPort: 9000 + envFrom: + - configMapRef: + name: hadoop-spark-config + volumeMounts: + - name: namenode-pv-storage + mountPath: /hadoop/dfs/name + command: ["/bin/bash" ] + args: + - "-c" + - | + ssh server sudo /etc/init.d/ssh start + update-ca-certificates + hdfs namenode -format + hdfs dfsadmin -safemode leave + hdfs dfs -mkdir /result + hdfs dfs -mkdir /jars + hdfs dfs -mkdir /archive + volumeClaimTemplates: + - metadata: + name: namenode-pv-storage + spec: + accessModes: ["ReadWriteOnce"] + storageClassName: "standard" + resources: + requests: + storage: 10Gi + +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: datanode-deployment +spec: + serviceName: "datanode" + replicas: 1 + selector: + matchLabels: + app: datanode + template: + metadata: + labels: + app: datanode + spec: + containers: + - name: datanode + image: "quay.io/cdis/datanode:3.3.0" + ports: + - containerPort: 50010 + envFrom: + - configMapRef: + name: hadoop-spark-config + volumeMounts: + - name: datanode-pv-storage + mountPath: /hadoop/dfs/data + command: ["/bin/bash" ] + args: + - "-c" + - | + ssh server sudo /etc/init.d/ssh start + update-ca-certificates + hdfs namenode -format + hdfs dfsadmin -safemode leave + hdfs dfs -mkdir /result + hdfs dfs -mkdir /jars + hdfs dfs -mkdir /archive + volumeClaimTemplates: + - metadata: + name: datanode-pv-storage + spec: + accessModes: ["ReadWriteOnce"] + storageClassName: "standard" + resources: + requests: + storage: 100Gi + +--- apiVersion: apps/v1 kind: Deployment metadata: - name: spark-deployment - annotations: - gen3.io/network-ingress: "tube" + name: resourcemanager-deployment spec: + replicas: 1 selector: # Only select pods based on the 'app' label matchLabels: @@ -51,54 +147,177 @@ spec: values: - ONDEMAND automountServiceAccountToken: false - volumes: containers: - - name: gen3-spark - GEN3_SPARK_IMAGE - ports: - - containerPort: 22 - - containerPort: 9000 - - containerPort: 8030 - - containerPort: 8031 - - containerPort: 8032 - - containerPort: 7077 - livenessProbe: - tcpSocket: - port: 9000 - initialDelaySeconds: 10 - periodSeconds: 30 - env: - - name: DICTIONARY_URL - valueFrom: - configMapKeyRef: - name: manifest-global - key: dictionary_url - - name: HADOOP_URL - value: hdfs://0.0.0.0:9000 - - name: HADOOP_HOST - value: 0.0.0.0 - volumeMounts: - imagePullPolicy: Always - resources: - requests: - cpu: 3 - memory: 4Gi - command: ["/bin/bash" ] - args: - - "-c" - - | - # get /usr/local/share/ca-certificates/cdis-ca.crt into system bundle - ssh server sudo /etc/init.d/ssh start - update-ca-certificates - python run_config.py - hdfs namenode -format - hdfs --daemon start namenode - hdfs --daemon start datanode - yarn --daemon start resourcemanager - yarn --daemon start nodemanager - hdfs dfsadmin -safemode leave - hdfs dfs -mkdir /result - hdfs dfs -mkdir /jars - hdfs dfs -mkdir /archive - /spark/sbin/start-all.sh - while true; do sleep 5; done + - name: resourcemanager + image: "quay.io/cdis/resourcemanager:3.3.0" + ports: + - containerPort: 8088 + envFrom: + - configMapRef: + name: hadoop-spark-config + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nodemanager-deployment +spec: + replicas: 2 # Adjust based on your needs + selector: + matchLabels: + app: nodemanager + template: + metadata: + labels: + app: nodemanager + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 25 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - spark + topologyKey: "kubernetes.io/hostname" + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - weight: 99 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType + operator: In + values: + - ONDEMAND + automountServiceAccountToken: false + containers: + - name: nodemanager + image: "quay.io/cdis/nodemanager:3.3.0" + ports: + - containerPort: 8042 + envFrom: + - configMapRef: + name: hadoop-spark-config + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: spark-master-deployment +spec: + replicas: 1 + selector: + matchLabels: + app: spark-master + template: + metadata: + labels: + app: spark-master + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 25 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - spark + topologyKey: "kubernetes.io/hostname" + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - weight: 99 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType + operator: In + values: + - ONDEMAND + automountServiceAccountToken: false + containers: + - name: spark-master + image: "quay.io/cdis/spark-master:3.3.0-hadoop3.3" + ports: + - containerPort: 7077 + - containerPort: 8080 + envFrom: + - configMapRef: + name: hadoop-spark-config + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: spark-worker-deployment +spec: + replicas: 2 # Adjust based on your needs + selector: + matchLabels: + app: spark-worker + template: + metadata: + labels: + app: spark-worker + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 25 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - spark + topologyKey: "kubernetes.io/hostname" + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - weight: 99 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType + operator: In + values: + - ONDEMAND + automountServiceAccountToken: false + containers: + - name: spark-worker + image: quay.io/cdis/spark-worker:3.3.0-hadoop3.3 + ports: + - containerPort: 8081 + envFrom: + - configMapRef: + name: hadoop-spark-config + command: ["/bin/bash" ] + args: + - "-c" + - | + ssh server sudo /etc/init.d/ssh start + update-ca-certificates diff --git a/kube/services/spark/spark-service.yaml b/kube/services/spark/spark-service.yaml index 4279be50f3..7e2c5797ad 100644 --- a/kube/services/spark/spark-service.yaml +++ b/kube/services/spark/spark-service.yaml @@ -1,44 +1,83 @@ +apiVersion: v1 kind: Service +metadata: + name: namenode +spec: + ports: + - port: 9000 + targetPort: 9000 + clusterIP: None + selector: + app: namenode + +--- + apiVersion: v1 +kind: Service metadata: - name: spark-service + name: datanode spec: + ports: + - port: 50010 + targetPort: 50010 + clusterIP: None selector: - app: spark + app: datanode + +--- + +apiVersion: v1 +kind: Service +metadata: + name: resourcemanager +spec: ports: - - protocol: TCP - port: 80 - targetPort: 80 - name: http - nodePort: null - - protocol: TCP - port: 9000 - targetPort: 9000 - name: hdfs - nodePort: null - - protocol: TCP - port: 8030 - targetPort: 8030 - name: yarn-scheduler - nodePort: null - - protocol: TCP - port: 8031 - targetPort: 8031 - name: yarn-resource-tracker - nodePort: null - - protocol: TCP - port: 8032 - targetPort: 8032 - name: yarn-address-manager - nodePort: null - - protocol: TCP - port: 22 - targetPort: 22 - name: ssl - nodePort: null - - protocol: TCP - port: 7077 + - port: 8088 + targetPort: 8088 + clusterIP: None + selector: + app: resourcemanager + +--- + +apiVersion: v1 +kind: Service +metadata: + name: nodemanager +spec: + ports: + - port: 8042 + targetPort: 8042 + clusterIP: None + selector: + app: nodemanager + +--- + +apiVersion: v1 +kind: Service +metadata: + name: spark-master +spec: + ports: + - port: 7077 targetPort: 7077 - name: spark-master - nodePort: null - type: ClusterIP + - port: 8080 + targetPort: 8080 + clusterIP: None + selector: + app: spark-master + +--- + +apiVersion: v1 +kind: Service +metadata: + name: spark-worker +spec: + ports: + - port: 8081 + targetPort: 8081 + clusterIP: None + selector: + app: spark-worker \ No newline at end of file