-
Notifications
You must be signed in to change notification settings - Fork 0
/
job.yaml
147 lines (147 loc) · 3.38 KB
/
job.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
apiVersion: v1
kind: Service
metadata:
name: blake-container
namespace: nnf-system
spec:
clusterIP: None
selector:
cray.nnf.container.workflow1: "true"
# ports:
# - protocol: TCP
# port: 80
# targetPort: 9376
---
# apiVersion: v1
# kind: Service
# metadata:
# name: blake-nodeport-worker2
# namespace: nnf-system
# spec:
# type: NodePort
# selector:
# job-name: blake-worker2
# ports:
# # By default and for convenience, the `targetPort` is set to the same value as the `port` field.
# - port: 9376
# # By default and for convenience, the Kubernetes control plane will allocate a port from a range (default: 30000-32767)
# # nodePort: 30007
# ---
apiVersion: batch/v1
kind: Job
metadata:
name: blake-worker2
namespace: nnf-system
spec:
# backoffLimit: 0
# activeDeadlineSeconds: 0
# ttlSecondsAfterFinished: 10
# completions: 0
# parallelism: 1
template:
metadata:
labels:
cray.nnf.container.workflow1: "true"
spec:
# restartPolicy: OnFailure
restartPolicy: Never
hostname: blake-worker2
subdomain: blake-container
containers:
- name: blake
image: python:latest
env:
- name: SCR_EXIT
value: "0"
command:
# - /bin/sh
# - -c
# - |
# echo "starting SCR..."
# sleep 300
# x=$(($RANDOM % 2))
# echo "exiting SCR: $x"
# # exit $x
# exit 1
- python3
args:
- -m
- http.server
- "9376"
ports:
- name: test
hostPort: 9376
containerPort: 9376
tolerations:
- effect: NoSchedule
key: cray.nnf.node
operator: Equal
value: "true"
nodeSelector:
kubernetes.io/hostname: "kind-worker2"
# kubernetes.io/hostname: "rabbit-node-1"
---
apiVersion: batch/v1
kind: Job
metadata:
name: blake-worker3
namespace: nnf-system
spec:
# backoffLimit: 0
# activeDeadlineSeconds: 0
# ttlSecondsAfterFinished: 10
# completions: 0
# parallelism: 1
template:
metadata:
labels:
cray.nnf.container.workflow1: "true"
spec:
# restartPolicy: OnFailure
restartPolicy: Never
hostname: blake-worker3
subdomain: blake-container
containers:
- name: blake
image: python:latest
env:
- name: SCR_EXIT
value: "0"
command:
# - /bin/sh
# - -c
# - |
# echo "starting SCR..."
# sleep 300
# x=$(($RANDOM % 2))
# echo "exiting SCR: $x"
# # exit $x
# exit 1
- python3
args:
- -m
- http.server
- "9376"
ports:
- name: test
hostPort: 9376
containerPort: 9376
# - wget
# args:
# - "-O-"
# - blake-container-service.nnf-system.svc.cluster.local
volumeMounts:
- name: "test"
mountPath: "/mnt/test"
tolerations:
- effect: NoSchedule
key: cray.nnf.node
operator: Equal
value: "true"
nodeSelector:
kubernetes.io/hostname: "kind-worker3"
volumes:
- name: "test"
hostPath:
path: "/tmp/123"
type: Directory