From a2d36ea82cb70affbc292e1178421a3e62f224e7 Mon Sep 17 00:00:00 2001 From: EarthlingDavey <15802017+EarthlingDavey@users.noreply.github.com> Date: Tue, 1 Oct 2024 11:52:01 +0100 Subject: [PATCH 1/3] Add phase 1 of production deployment --- deploy/production/deployment.tpl.yml | 64 +++++++++++++++++++++++++++- deploy/production/hpa.yml | 47 ++++++++++++++++++++ deploy/production/pod-monitor.yml | 13 ++++++ deploy/production/service.yml | 12 ++++++ deploy/staging/ingress.tpl.yml | 8 +--- 5 files changed, 136 insertions(+), 8 deletions(-) create mode 100644 deploy/production/hpa.yml create mode 100644 deploy/production/pod-monitor.yml create mode 100644 deploy/production/service.yml diff --git a/deploy/production/deployment.tpl.yml b/deploy/production/deployment.tpl.yml index bf45bfb86..9b2266ff7 100644 --- a/deploy/production/deployment.tpl.yml +++ b/deploy/production/deployment.tpl.yml @@ -30,8 +30,16 @@ spec: containers: - name: nginx image: ${ECR_URL}:${IMAGE_TAG_NGINX} + resources: + limits: + cpu: 500m + memory: 250Mi + requests: + cpu: 50m + memory: 100Mi ports: - containerPort: 8080 + name: http volumeMounts: - name: uploads mountPath: /var/www/html/public/app/uploads @@ -39,6 +47,14 @@ spec: mountPath: /sock securityContext: runAsUser: 101 + readinessProbe: + httpGet: + path: /readiness + port: 8080 + livenessProbe: + httpGet: + path: /liveness + port: 8080 env: - name: IPS_FORMATTED valueFrom: @@ -48,11 +64,27 @@ spec: - name: cron image: ${ECR_URL}:${IMAGE_TAG_CRON} + resources: + limits: + cpu: 50m + memory: 30Mi + requests: + cpu: 1m + memory: 12Mi securityContext: runAsUser: 3001 - name: fpm image: ${ECR_URL}:${IMAGE_TAG_FPM} + resources: + limits: + # If a pod exceeds its CPU limit, Kubernetes will simply throttle the pod. + cpu: "4" + # If a pod exceeds its memory limit, Kubernetes will kill the pod. + memory: 2000Mi + requests: + cpu: 500m + memory: 600Mi volumeMounts: - name: uploads mountPath: /var/www/html/public/app/uploads @@ -60,6 +92,26 @@ spec: mountPath: /sock securityContext: runAsUser: 101 + # Check frequently during startup, so that scaling up can happen as fast as possible. + startupProbe: + exec: + command: + - /usr/local/bin/fpm-health/fpm-readiness.sh + failureThreshold: 20 + periodSeconds: 5 + # Don't route traffic to this pod if the container is not ready. + readinessProbe: + exec: + command: + - /usr/local/bin/fpm-health/fpm-readiness.sh + periodSeconds: 10 + failureThreshold: 1 + # Restart the container if it fails liveness script. + livenessProbe: + exec: + command: + - /usr/local/bin/fpm-health/fpm-liveness.sh + periodSeconds: 10 env: - name: AWS_S3_BUCKET valueFrom: @@ -100,7 +152,17 @@ spec: valueFrom: secretKeyRef: name: basic-auth-secret - key: auth + key: auth + - name: CACHE_HOST + valueFrom: + secretKeyRef: + name: elasticache-output + key: primary_endpoint_address + - name: CACHE_PASSWORD + valueFrom: + secretKeyRef: + name: elasticache-output + key: auth_token envFrom: - configMapRef: name: ${KUBE_NAMESPACE} diff --git a/deploy/production/hpa.yml b/deploy/production/hpa.yml new file mode 100644 index 000000000..48bc8f2f8 --- /dev/null +++ b/deploy/production/hpa.yml @@ -0,0 +1,47 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: intranet-production + namespace: intranet-production +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: intranet-production + minReplicas: 4 + maxReplicas: 12 + metrics: + # fpm + - type: ContainerResource + containerResource: + name: cpu + container: fpm + target: + type: Utilization + # If request is 500m let's early scale at 250m + # Rely on CPU usage for scaling more than memory usage. + # For scaling down, CPU settles quickly and memory is freed up slowly. + averageUtilization: 50 + - type: ContainerResource + containerResource: + name: memory + container: fpm + target: + type: Utilization + # If request is 640Mi and php max is 384Mi let's scale at 512Mi + averageUtilization: 80 + # nginx + - type: ContainerResource + containerResource: + name: cpu + container: nginx + target: + type: Utilization + averageUtilization: 60 + - type: ContainerResource + containerResource: + name: memory + container: nginx + target: + type: Utilization + averageUtilization: 70 diff --git a/deploy/production/pod-monitor.yml b/deploy/production/pod-monitor.yml new file mode 100644 index 000000000..4dcbf68ef --- /dev/null +++ b/deploy/production/pod-monitor.yml @@ -0,0 +1,13 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: intranet-production + namespace: intranet-production +spec: + selector: + matchLabels: + app: intranet-production + podMetricsEndpoints: + - port: http + path: "/metrics/fpm" + interval: 15s diff --git a/deploy/production/service.yml b/deploy/production/service.yml new file mode 100644 index 000000000..de3c38bb3 --- /dev/null +++ b/deploy/production/service.yml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: nginx-service + labels: + app: intranet-production +spec: + ports: + - port: 8080 + name: http + selector: + app: intranet-production diff --git a/deploy/staging/ingress.tpl.yml b/deploy/staging/ingress.tpl.yml index c7d841372..b67294b3b 100644 --- a/deploy/staging/ingress.tpl.yml +++ b/deploy/staging/ingress.tpl.yml @@ -8,13 +8,7 @@ metadata: external-dns.alpha.kubernetes.io/aws-weight: "100" nginx.ingress.kubernetes.io/enable-modsecurity: "true" nginx.ingress.kubernetes.io/modsecurity-snippet: | - SecRuleEngine On - SecDefaultAction "phase:2,pass,log,tag:github_team=central-digital-product-team" - SecDefaultAction "phase:4,pass,log,tag:github_team=central-digital-product-team" - SecRuleRemoveById 949110 - # nginx.ingress.kubernetes.io/auth-type: basic - # nginx.ingress.kubernetes.io/auth-secret: basic-auth-secret - # nginx.ingress.kubernetes.io/auth-realm: 'Staging User | Authentication Required' + ${MODSEC_CONFIG} nginx.ingress.kubernetes.io/server-snippet: | location = /health { auth_basic off; From 40afa2bbb3929ac5301d533c628cbc517eda3669 Mon Sep 17 00:00:00 2001 From: EarthlingDavey <15802017+EarthlingDavey@users.noreply.github.com> Date: Tue, 1 Oct 2024 11:53:30 +0100 Subject: [PATCH 2/3] Create ingress.tpl.yml --- deploy/production/ingress.tpl.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 deploy/production/ingress.tpl.yml diff --git a/deploy/production/ingress.tpl.yml b/deploy/production/ingress.tpl.yml new file mode 100644 index 000000000..1989c04c6 --- /dev/null +++ b/deploy/production/ingress.tpl.yml @@ -0,0 +1 @@ +# Intentionally empty file. \ No newline at end of file From 896bc988ce522ca79ed1c39b56e1db4dc53896b9 Mon Sep 17 00:00:00 2001 From: EarthlingDavey <15802017+EarthlingDavey@users.noreply.github.com> Date: Tue, 1 Oct 2024 13:37:55 +0100 Subject: [PATCH 3/3] Move definitions to cloud-platform-environments --- deploy/demo/alerts.yml | 30 ------------------- deploy/demo/network-policy.yml | 16 ----------- deploy/demo/pod-monitor.yml | 13 --------- deploy/demo/service-monitor.yml | 13 --------- deploy/development/alerts.yml | 30 ------------------- deploy/development/network-policy.yml | 16 ----------- deploy/development/pod-monitor.yml | 13 --------- deploy/development/service-monitor.yml | 13 --------- deploy/production/pod-monitor.yml | 13 --------- deploy/staging/alerts.yml | 40 -------------------------- deploy/staging/network-policy.yml | 31 -------------------- deploy/staging/pod-monitor.yml | 13 --------- deploy/staging/service-monitor.yml | 13 --------- 13 files changed, 254 deletions(-) delete mode 100644 deploy/demo/alerts.yml delete mode 100644 deploy/demo/network-policy.yml delete mode 100644 deploy/demo/pod-monitor.yml delete mode 100644 deploy/demo/service-monitor.yml delete mode 100644 deploy/development/alerts.yml delete mode 100644 deploy/development/network-policy.yml delete mode 100644 deploy/development/pod-monitor.yml delete mode 100644 deploy/development/service-monitor.yml delete mode 100644 deploy/production/pod-monitor.yml delete mode 100644 deploy/staging/alerts.yml delete mode 100644 deploy/staging/network-policy.yml delete mode 100644 deploy/staging/pod-monitor.yml delete mode 100644 deploy/staging/service-monitor.yml diff --git a/deploy/demo/alerts.yml b/deploy/demo/alerts.yml deleted file mode 100644 index 5a0c81f5e..000000000 --- a/deploy/demo/alerts.yml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - namespace: intranet-demo - labels: - role: alert-rules - name: prometheus-custom-rules-intranet-demo -spec: - groups: - - name: application-rules - rules: - - alert: ServiceInsufficientAccessPolicy - expr: http_status_code_wp_home{namespace="intranet-demo"} != 401 - for: 1m - labels: - severity: intranet-demo - annotations: - message: Namespace {{ $labels.namespace }} (homepage) is returning an unexpected status code {{ printf "%0.0f" $value}}. - runbook_url: https://dsdmoj.atlassian.net/wiki/spaces/CDPT/pages/5124292758/Alerts+runbooks#ServiceInsufficientAccessPolicy - dashboard_url: https://grafana.live.cloud-platform.service.justice.gov.uk/d/bdwyqxz07sxkwg/intranet-service?orgId=1&var-namespace=intranet-demo - - - alert: ServiceAbsentAccessPolicy - expr: absent(http_status_code_wp_home{namespace="intranet-demo"}) == 1 - for: 1m - labels: - severity: intranet-demo - annotations: - message: Namespace {{ $labels.namespace }} (homepage) is not returning a status code. - runbook_url: https://dsdmoj.atlassian.net/wiki/spaces/CDPT/pages/5124292758/Alerts+runbooks#ServiceAbsentAccessPolicy - dashboard_url: https://grafana.live.cloud-platform.service.justice.gov.uk/d/bdwyqxz07sxkwg/intranet-service?orgId=1&var-namespace=intranet-demo diff --git a/deploy/demo/network-policy.yml b/deploy/demo/network-policy.yml deleted file mode 100644 index 589fbeb62..000000000 --- a/deploy/demo/network-policy.yml +++ /dev/null @@ -1,16 +0,0 @@ -kind: NetworkPolicy -apiVersion: networking.k8s.io/v1 -metadata: - name: allow-prometheus-scraping - namespace: intranet-demo -spec: - podSelector: - matchLabels: - app: intranet-demo - policyTypes: - - Ingress - ingress: - - from: - - namespaceSelector: - matchLabels: - component: monitoring diff --git a/deploy/demo/pod-monitor.yml b/deploy/demo/pod-monitor.yml deleted file mode 100644 index be45910e2..000000000 --- a/deploy/demo/pod-monitor.yml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: PodMonitor -metadata: - name: intranet-demo - namespace: intranet-demo -spec: - selector: - matchLabels: - app: intranet-demo - podMetricsEndpoints: - - port: http - path: "/metrics/fpm" - interval: 15s diff --git a/deploy/demo/service-monitor.yml b/deploy/demo/service-monitor.yml deleted file mode 100644 index 28abfb6d9..000000000 --- a/deploy/demo/service-monitor.yml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: intranet-demo - namespace: intranet-demo -spec: - selector: - matchLabels: - app: intranet-demo - endpoints: - - port: http - interval: 15s - path: /metrics/service diff --git a/deploy/development/alerts.yml b/deploy/development/alerts.yml deleted file mode 100644 index 35ee65bd0..000000000 --- a/deploy/development/alerts.yml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - namespace: intranet-dev - labels: - role: alert-rules - name: prometheus-custom-rules-intranet-dev -spec: - groups: - - name: application-rules - rules: - - alert: ServiceInsufficientAccessPolicy - expr: http_status_code_wp_home{namespace="intranet-dev"} != 401 - for: 1m - labels: - severity: intranet-dev - annotations: - message: Namespace {{ $labels.namespace }} (homepage) is returning an unexpected status code {{ printf "%0.0f" $value}}. - runbook_url: https://dsdmoj.atlassian.net/wiki/spaces/CDPT/pages/5124292758/Alerts+runbooks#ServiceInsufficientAccessPolicy - dashboard_url: https://grafana.live.cloud-platform.service.justice.gov.uk/d/bdwyqxz07sxkwg/intranet-service?orgId=1 - - - alert: ServiceAbsentAccessPolicy - expr: absent(http_status_code_wp_home{namespace="intranet-dev"}) == 1 - for: 1m - labels: - severity: intranet-dev - annotations: - message: Namespace {{ $labels.namespace }} (homepage) is not returning a status code. - runbook_url: https://dsdmoj.atlassian.net/wiki/spaces/CDPT/pages/5124292758/Alerts+runbooks#ServiceAbsentAccessPolicy - dashboard_url: https://grafana.live.cloud-platform.service.justice.gov.uk/d/bdwyqxz07sxkwg/intranet-service?orgId=1 diff --git a/deploy/development/network-policy.yml b/deploy/development/network-policy.yml deleted file mode 100644 index d81d2ce2e..000000000 --- a/deploy/development/network-policy.yml +++ /dev/null @@ -1,16 +0,0 @@ -kind: NetworkPolicy -apiVersion: networking.k8s.io/v1 -metadata: - name: allow-prometheus-scraping - namespace: intranet-dev -spec: - podSelector: - matchLabels: - app: intranet-dev - policyTypes: - - Ingress - ingress: - - from: - - namespaceSelector: - matchLabels: - component: monitoring diff --git a/deploy/development/pod-monitor.yml b/deploy/development/pod-monitor.yml deleted file mode 100644 index 2f8b7c179..000000000 --- a/deploy/development/pod-monitor.yml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: PodMonitor -metadata: - name: intranet-dev - namespace: intranet-dev -spec: - selector: - matchLabels: - app: intranet-dev - podMetricsEndpoints: - - port: http - path: "/metrics/fpm" - interval: 15s diff --git a/deploy/development/service-monitor.yml b/deploy/development/service-monitor.yml deleted file mode 100644 index 7e7bac8a0..000000000 --- a/deploy/development/service-monitor.yml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: intranet-dev - namespace: intranet-dev -spec: - selector: - matchLabels: - app: intranet-dev - endpoints: - - port: http - interval: 15s - path: /metrics/service diff --git a/deploy/production/pod-monitor.yml b/deploy/production/pod-monitor.yml deleted file mode 100644 index 4dcbf68ef..000000000 --- a/deploy/production/pod-monitor.yml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: PodMonitor -metadata: - name: intranet-production - namespace: intranet-production -spec: - selector: - matchLabels: - app: intranet-production - podMetricsEndpoints: - - port: http - path: "/metrics/fpm" - interval: 15s diff --git a/deploy/staging/alerts.yml b/deploy/staging/alerts.yml deleted file mode 100644 index 2241d2bfd..000000000 --- a/deploy/staging/alerts.yml +++ /dev/null @@ -1,40 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - namespace: intranet-staging - labels: - role: alert-rules - name: prometheus-custom-rules-intranet-staging -spec: - groups: - - name: application-rules - rules: - - alert: ServiceInsufficientAccessPolicy - expr: http_status_code_wp_home{namespace="intranet-staging"} != 401 - for: 1m - labels: - severity: intranet-staging - annotations: - message: Namespace {{ $labels.namespace }} (homepage) is returning an unexpected status code {{ printf "%0.0f" $value}}. - runbook_url: https://dsdmoj.atlassian.net/wiki/spaces/CDPT/pages/5124292758/Alerts+runbooks#ServiceInsufficientAccessPolicy - dashboard_url: https://grafana.live.cloud-platform.service.justice.gov.uk/d/bdwyqxz07sxkwg/intranet-service?orgId=1&var-namespace=intranet-staging - - - alert: ServiceAbsentAccessPolicy - expr: absent(http_status_code_wp_home{namespace="intranet-staging"}) == 1 - for: 1m - labels: - severity: intranet-staging - annotations: - message: Namespace {{ $labels.namespace }} (homepage) is not returning a status code. - runbook_url: https://dsdmoj.atlassian.net/wiki/spaces/CDPT/pages/5124292758/Alerts+runbooks#ServiceAbsentAccessPolicy - dashboard_url: https://grafana.live.cloud-platform.service.justice.gov.uk/d/bdwyqxz07sxkwg/intranet-service?orgId=1&var-namespace=intranet-staging - - - alert: ServiceInsufficientHeaderHandling - expr: http_status_code_invalid_header{namespace="intranet-staging"} != 400 - for: 1m - labels: - severity: intranet-staging - annotations: - message: Namespace {{ $labels.namespace }} (invalid header) is returning an unexpected status code {{ printf "%0.0f" $value}}. - runbook_url: https://dsdmoj.atlassian.net/wiki/spaces/CDPT/pages/5124292758/Alerts+runbooks#ServiceInsufficientHeaderHandling - dashboard_url: https://grafana.live.cloud-platform.service.justice.gov.uk/d/bdwyqxz07sxkwg/intranet-service?orgId=1&var-namespace=intranet-staging diff --git a/deploy/staging/network-policy.yml b/deploy/staging/network-policy.yml deleted file mode 100644 index 671c35526..000000000 --- a/deploy/staging/network-policy.yml +++ /dev/null @@ -1,31 +0,0 @@ -kind: NetworkPolicy -apiVersion: networking.k8s.io/v1 -metadata: - name: allow-prometheus-scraping - namespace: intranet-staging -spec: - podSelector: - matchLabels: - app: intranet-staging - policyTypes: - - Ingress - ingress: - - from: - - namespaceSelector: - matchLabels: - component: monitoring ---- -kind: NetworkPolicy -apiVersion: networking.k8s.io/v1 -metadata: - name: allow-stability-inspector-dev - namespace: intranet-staging -spec: - podSelector: {} - policyTypes: - - Ingress - ingress: - - from: - - namespaceSelector: - matchLabels: - cloud-platform.justice.gov.uk/namespace: "stability-inspector-dev" diff --git a/deploy/staging/pod-monitor.yml b/deploy/staging/pod-monitor.yml deleted file mode 100644 index 6b36e4eaf..000000000 --- a/deploy/staging/pod-monitor.yml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: PodMonitor -metadata: - name: intranet-staging - namespace: intranet-staging -spec: - selector: - matchLabels: - app: intranet-staging - podMetricsEndpoints: - - port: http - path: "/metrics/fpm" - interval: 15s diff --git a/deploy/staging/service-monitor.yml b/deploy/staging/service-monitor.yml deleted file mode 100644 index be71dfd02..000000000 --- a/deploy/staging/service-monitor.yml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: intranet-staging - namespace: intranet-staging -spec: - selector: - matchLabels: - app: intranet-staging - endpoints: - - port: http - interval: 15s - path: /metrics/service