diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 82034495d3..c3a384baa9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,11 +1,11 @@ repos: - repo: git@github.com:Yelp/detect-secrets - rev: v1.4.0 + rev: v1.5.0 hooks: - id: detect-secrets args: ['--baseline', '.secrets.baseline'] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.5.0 + rev: v4.6.0 hooks: - id: no-commit-to-branch args: [--branch, develop, --branch, master, --pattern, release/.*] diff --git a/.secrets.baseline b/.secrets.baseline index 0c4eba0a80..ededd2dff7 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -1,5 +1,5 @@ { - "version": "1.4.0", + "version": "1.5.0", "plugins_used": [ { "name": "ArtifactoryDetector" @@ -26,6 +26,9 @@ { "name": "GitHubTokenDetector" }, + { + "name": "GitLabTokenDetector" + }, { "name": "HexHighEntropyString", "limit": 3.0 @@ -36,6 +39,9 @@ { "name": "IbmCosHmacDetector" }, + { + "name": "IPPublicDetector" + }, { "name": "JwtTokenDetector" }, @@ -49,9 +55,15 @@ { "name": "NpmDetector" }, + { + "name": "OpenAIDetector" + }, { "name": "PrivateKeyDetector" }, + { + "name": "PypiTokenDetector" + }, { "name": "SendGridDetector" }, @@ -67,6 +79,9 @@ { "name": "StripeDetector" }, + { + "name": "TelegramBotTokenDetector" + }, { "name": "TwilioKeyDetector" } @@ -246,6 +261,15 @@ "line_number": 154 } ], + "files/lambda/test-security_alerts.py": [ + { + "type": "AWS Access Key", + "filename": "files/lambda/test-security_alerts.py", + "hashed_secret": "4e041fbfd5dd5918d3d5e968f5f739f815ae92da", + "is_verified": false, + "line_number": 5 + } + ], "files/scripts/psql-fips-fix.sh": [ { "type": "Secret Keyword", @@ -640,78 +664,6 @@ "line_number": 25 } ], - "gen3/test/terraformTest.sh": [ - { - "type": "Secret Keyword", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "6b44a330b450ee550c081410c6b705dfeaa105ce", - "is_verified": false, - "line_number": 156 - }, - { - "type": "Secret Keyword", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "d869db7fe62fb07c25a0403ecaea55031744b5fb", - "is_verified": false, - "line_number": 163 - }, - { - "type": "Base64 High Entropy String", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "1cc07dccfdf640eb0e403e490a873a5536759009", - "is_verified": false, - "line_number": 172 - }, - { - "type": "Secret Keyword", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "1cc07dccfdf640eb0e403e490a873a5536759009", - "is_verified": false, - "line_number": 172 - }, - { - "type": "Base64 High Entropy String", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "185a71a740ef6b9b21c84e6eaa47b89c7de181ef", - "is_verified": false, - "line_number": 175 - }, - { - "type": "Secret Keyword", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "185a71a740ef6b9b21c84e6eaa47b89c7de181ef", - "is_verified": false, - "line_number": 175 - }, - { - "type": "Secret Keyword", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "212e1d3823c8c9af9e4c0c172164ee292b9a6768", - "is_verified": false, - "line_number": 311 - }, - { - "type": "Secret Keyword", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "cb80dbb67a1a5bdf4957eea1473789f1c65357c6", - "is_verified": false, - "line_number": 312 - }, - { - "type": "Secret Keyword", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "5f35c25f4bf588b5fad46e249fcd9221f5257ce4", - "is_verified": false, - "line_number": 313 - }, - { - "type": "Secret Keyword", - "filename": "gen3/test/terraformTest.sh", - "hashed_secret": "5308421b43dde5775f1993bd25a8163070d65598", - "is_verified": false, - "line_number": 314 - } - ], "kube/services/access-backend/access-backend-deploy.yaml": [ { "type": "Secret Keyword", @@ -745,49 +697,49 @@ "filename": "kube/services/arborist/arborist-deploy.yaml", "hashed_secret": "dbd5f43594a152b52261c8e21520a3989823fe55", "is_verified": false, - "line_number": 64 + "line_number": 65 }, { "type": "Secret Keyword", "filename": "kube/services/arborist/arborist-deploy.yaml", "hashed_secret": "1c062eaac9e6fa0766377d3cfc3e4a88982fecdb", "is_verified": false, - "line_number": 67 + "line_number": 68 }, { "type": "Secret Keyword", "filename": "kube/services/arborist/arborist-deploy.yaml", "hashed_secret": "694cfd0a009a42055e975de9111b2f3c6e8a3634", "is_verified": false, - "line_number": 70 + "line_number": 71 }, { "type": "Secret Keyword", "filename": "kube/services/arborist/arborist-deploy.yaml", "hashed_secret": "4b09a441cef18c75560f6c3caeafc96f2163c3fd", "is_verified": false, - "line_number": 77 + "line_number": 78 }, { "type": "Secret Keyword", "filename": "kube/services/arborist/arborist-deploy.yaml", "hashed_secret": "7e7478a28dcc3695a083b66b47243b050c813e2d", "is_verified": false, - "line_number": 80 + "line_number": 81 }, { "type": "Secret Keyword", "filename": "kube/services/arborist/arborist-deploy.yaml", "hashed_secret": "2f57bb00fcb93481c2be444e3e9f322b6cb5fadb", "is_verified": false, - "line_number": 83 + "line_number": 84 }, { "type": "Secret Keyword", "filename": "kube/services/arborist/arborist-deploy.yaml", "hashed_secret": "ea73fcfdaa415890d5fde24d3b2245671be32f73", "is_verified": false, - "line_number": 86 + "line_number": 87 } ], "kube/services/argo/workflows/fence-usersync-wf.yaml": [ @@ -858,7 +810,7 @@ "filename": "kube/services/audit-service/audit-service-deploy.yaml", "hashed_secret": "42cde1c58c36d8bb5804a076e55ac6ec07ef99fc", "is_verified": false, - "line_number": 64 + "line_number": 65 } ], "kube/services/aws-es-proxy/aws-es-proxy-deploy.yaml": [ @@ -867,7 +819,7 @@ "filename": "kube/services/aws-es-proxy/aws-es-proxy-deploy.yaml", "hashed_secret": "7f834ccb442433fc12ec9532f75c3a4b6a748d4c", "is_verified": false, - "line_number": 46 + "line_number": 47 } ], "kube/services/cedar-wrapper/cedar-wrapper-deploy.yaml": [ @@ -965,7 +917,7 @@ "filename": "kube/services/dicom-server/dicom-server-deploy.yaml", "hashed_secret": "706168ac2565a93cceffe2202ac45d3d31c075fb", "is_verified": false, - "line_number": 40 + "line_number": 41 } ], "kube/services/fence/fence-canary-deploy.yaml": [ @@ -1039,63 +991,63 @@ "filename": "kube/services/fence/fence-deploy.yaml", "hashed_secret": "dbd5f43594a152b52261c8e21520a3989823fe55", "is_verified": false, - "line_number": 71 + "line_number": 72 }, { "type": "Secret Keyword", "filename": "kube/services/fence/fence-deploy.yaml", "hashed_secret": "1c062eaac9e6fa0766377d3cfc3e4a88982fecdb", "is_verified": false, - "line_number": 74 + "line_number": 75 }, { "type": "Secret Keyword", "filename": "kube/services/fence/fence-deploy.yaml", "hashed_secret": "694cfd0a009a42055e975de9111b2f3c6e8a3634", "is_verified": false, - "line_number": 77 + "line_number": 78 }, { "type": "Secret Keyword", "filename": "kube/services/fence/fence-deploy.yaml", "hashed_secret": "4b09a441cef18c75560f6c3caeafc96f2163c3fd", "is_verified": false, - "line_number": 87 + "line_number": 88 }, { "type": "Secret Keyword", "filename": "kube/services/fence/fence-deploy.yaml", "hashed_secret": "7e7478a28dcc3695a083b66b47243b050c813e2d", "is_verified": false, - "line_number": 90 + "line_number": 91 }, { "type": "Secret Keyword", "filename": "kube/services/fence/fence-deploy.yaml", "hashed_secret": "2f57bb00fcb93481c2be444e3e9f322b6cb5fadb", "is_verified": false, - "line_number": 93 + "line_number": 94 }, { "type": "Secret Keyword", "filename": "kube/services/fence/fence-deploy.yaml", "hashed_secret": "98f5a68541a6d981bf5825f23dffe6a0b150e457", "is_verified": false, - "line_number": 96 + "line_number": 97 }, { "type": "Secret Keyword", "filename": "kube/services/fence/fence-deploy.yaml", "hashed_secret": "0849046cdafcdb17f5a4bf5c528430d5e04ad295", "is_verified": false, - "line_number": 99 + "line_number": 100 }, { "type": "Secret Keyword", "filename": "kube/services/fence/fence-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 102 + "line_number": 103 } ], "kube/services/fenceshib/fenceshib-canary-deploy.yaml": [ @@ -1241,28 +1193,28 @@ "filename": "kube/services/frontend-framework/frontend-framework-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 54 + "line_number": 55 }, { "type": "Secret Keyword", "filename": "kube/services/frontend-framework/frontend-framework-deploy.yaml", "hashed_secret": "6607b403f74e62246fc6a3c938feffc5a34a7e49", "is_verified": false, - "line_number": 57 + "line_number": 58 }, { "type": "Secret Keyword", "filename": "kube/services/frontend-framework/frontend-framework-deploy.yaml", "hashed_secret": "4b0bb3e58651fe56ee23e59aa6a3cb96dc61ddd2", "is_verified": false, - "line_number": 60 + "line_number": 61 }, { "type": "Secret Keyword", "filename": "kube/services/frontend-framework/frontend-framework-deploy.yaml", "hashed_secret": "e3c7565314f404e3883929f003c65a02a80366e9", "is_verified": false, - "line_number": 66 + "line_number": 67 } ], "kube/services/frontend-framework/frontend-framework-root-deploy.yaml": [ @@ -1271,28 +1223,28 @@ "filename": "kube/services/frontend-framework/frontend-framework-root-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 54 + "line_number": 55 }, { "type": "Secret Keyword", "filename": "kube/services/frontend-framework/frontend-framework-root-deploy.yaml", "hashed_secret": "6607b403f74e62246fc6a3c938feffc5a34a7e49", "is_verified": false, - "line_number": 57 + "line_number": 58 }, { "type": "Secret Keyword", "filename": "kube/services/frontend-framework/frontend-framework-root-deploy.yaml", "hashed_secret": "4b0bb3e58651fe56ee23e59aa6a3cb96dc61ddd2", "is_verified": false, - "line_number": 60 + "line_number": 61 }, { "type": "Secret Keyword", "filename": "kube/services/frontend-framework/frontend-framework-root-deploy.yaml", "hashed_secret": "e3c7565314f404e3883929f003c65a02a80366e9", "is_verified": false, - "line_number": 66 + "line_number": 67 } ], "kube/services/gdcapi/gdcapi-deploy.yaml": [ @@ -1398,14 +1350,14 @@ "filename": "kube/services/guppy/guppy-deploy.yaml", "hashed_secret": "0db22b31c9add2d3c76743c0ac6fbc99bb8b4761", "is_verified": false, - "line_number": 65 + "line_number": 66 }, { "type": "Secret Keyword", "filename": "kube/services/guppy/guppy-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 68 + "line_number": 69 } ], "kube/services/indexd/indexd-canary-deploy.yaml": [ @@ -1444,28 +1396,28 @@ "filename": "kube/services/indexd/indexd-deploy.yaml", "hashed_secret": "0b701c1fabb6ba47a7d47d455e3696d207014bd3", "is_verified": false, - "line_number": 63 + "line_number": 64 }, { "type": "Secret Keyword", "filename": "kube/services/indexd/indexd-deploy.yaml", "hashed_secret": "aee98a99696237d70b6854ee4c2d9e42bc696039", "is_verified": false, - "line_number": 66 + "line_number": 67 }, { "type": "Secret Keyword", "filename": "kube/services/indexd/indexd-deploy.yaml", "hashed_secret": "bdecca54d39013d43d3b7f05f2927eaa7df375dc", "is_verified": false, - "line_number": 72 + "line_number": 73 }, { "type": "Secret Keyword", "filename": "kube/services/indexd/indexd-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 75 + "line_number": 76 } ], "kube/services/jenkins-ci-worker/jenkins-ci-worker-deploy.yaml": [ @@ -1506,14 +1458,14 @@ "filename": "kube/services/jenkins/jenkins-deploy.yaml", "hashed_secret": "c937b6fbb346a51ef679dd02ac5c4863e02bfdbf", "is_verified": false, - "line_number": 157 + "line_number": 144 }, { "type": "Secret Keyword", "filename": "kube/services/jenkins/jenkins-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 160 + "line_number": 147 } ], "kube/services/jenkins2-ci-worker/jenkins2-ci-worker-deploy.yaml": [ @@ -1554,14 +1506,14 @@ "filename": "kube/services/jenkins2/jenkins2-deploy.yaml", "hashed_secret": "c937b6fbb346a51ef679dd02ac5c4863e02bfdbf", "is_verified": false, - "line_number": 153 + "line_number": 140 }, { "type": "Secret Keyword", "filename": "kube/services/jenkins2/jenkins2-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 156 + "line_number": 143 } ], "kube/services/jobs/arborist-rm-expired-access-cronjob.yaml": [ @@ -2870,21 +2822,21 @@ "filename": "kube/services/manifestservice/manifestservice-deploy.yaml", "hashed_secret": "3da2c49c267b6c58401bbf05e379b38d20434f78", "is_verified": false, - "line_number": 61 + "line_number": 62 }, { "type": "Secret Keyword", "filename": "kube/services/manifestservice/manifestservice-deploy.yaml", "hashed_secret": "469e0c2b1a67aa94955bae023ddc727be31581a7", "is_verified": false, - "line_number": 64 + "line_number": 65 }, { "type": "Secret Keyword", "filename": "kube/services/manifestservice/manifestservice-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 67 + "line_number": 68 } ], "kube/services/metadata/metadata-deploy.yaml": [ @@ -2893,14 +2845,14 @@ "filename": "kube/services/metadata/metadata-deploy.yaml", "hashed_secret": "e14f65c8ca7f3b27a0f0f5463569954841e162c9", "is_verified": false, - "line_number": 61 + "line_number": 62 }, { "type": "Secret Keyword", "filename": "kube/services/metadata/metadata-deploy.yaml", "hashed_secret": "c27babf45eb0ed87329e69c7d47dba611e859c5d", "is_verified": false, - "line_number": 66 + "line_number": 67 } ], "kube/services/monitoring/grafana-values.yaml": [ @@ -2982,28 +2934,28 @@ "filename": "kube/services/peregrine/peregrine-deploy.yaml", "hashed_secret": "6131c35d7eebdbc17a314bef8aac75b87323cff3", "is_verified": false, - "line_number": 67 + "line_number": 68 }, { "type": "Secret Keyword", "filename": "kube/services/peregrine/peregrine-deploy.yaml", "hashed_secret": "ca253d1c9dece2da0d6fb24ded7bdb849a475966", "is_verified": false, - "line_number": 70 + "line_number": 71 }, { "type": "Secret Keyword", "filename": "kube/services/peregrine/peregrine-deploy.yaml", "hashed_secret": "990a3202b5c94aa5e5997e7dc1a218e457f8b8ec", "is_verified": false, - "line_number": 76 + "line_number": 77 }, { "type": "Secret Keyword", "filename": "kube/services/peregrine/peregrine-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 79 + "line_number": 80 } ], "kube/services/pidgin/pidgin-deploy.yaml": [ @@ -3028,28 +2980,28 @@ "filename": "kube/services/portal/portal-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 55 + "line_number": 56 }, { "type": "Secret Keyword", "filename": "kube/services/portal/portal-deploy.yaml", "hashed_secret": "5c5a8e158ad2d8544f73cd5422072d414f497faa", "is_verified": false, - "line_number": 58 + "line_number": 59 }, { "type": "Secret Keyword", "filename": "kube/services/portal/portal-deploy.yaml", "hashed_secret": "619551216e129bbc5322678abf9c9210c0327cfb", "is_verified": false, - "line_number": 61 + "line_number": 62 }, { "type": "Secret Keyword", "filename": "kube/services/portal/portal-deploy.yaml", "hashed_secret": "e3c7565314f404e3883929f003c65a02a80366e9", "is_verified": false, - "line_number": 67 + "line_number": 68 } ], "kube/services/portal/portal-root-deploy.yaml": [ @@ -3058,28 +3010,28 @@ "filename": "kube/services/portal/portal-root-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 55 + "line_number": 56 }, { "type": "Secret Keyword", "filename": "kube/services/portal/portal-root-deploy.yaml", "hashed_secret": "5c5a8e158ad2d8544f73cd5422072d414f497faa", "is_verified": false, - "line_number": 58 + "line_number": 59 }, { "type": "Secret Keyword", "filename": "kube/services/portal/portal-root-deploy.yaml", "hashed_secret": "619551216e129bbc5322678abf9c9210c0327cfb", "is_verified": false, - "line_number": 61 + "line_number": 62 }, { "type": "Secret Keyword", "filename": "kube/services/portal/portal-root-deploy.yaml", "hashed_secret": "e3c7565314f404e3883929f003c65a02a80366e9", "is_verified": false, - "line_number": 67 + "line_number": 68 } ], "kube/services/presigned-url-fence/presigned-url-fence-deploy.yaml": [ @@ -3171,7 +3123,7 @@ "filename": "kube/services/requestor/requestor-deploy.yaml", "hashed_secret": "15debe4170aa5b89858d939f4c0644307ae7789b", "is_verified": false, - "line_number": 61 + "line_number": 62 } ], "kube/services/revproxy/gen3.nginx.conf/indexd-service.conf": [ @@ -3223,21 +3175,21 @@ "filename": "kube/services/revproxy/revproxy-deploy.yaml", "hashed_secret": "c7a87a61893a647e29289845cb51e61afb06800b", "is_verified": false, - "line_number": 74 + "line_number": 75 }, { "type": "Secret Keyword", "filename": "kube/services/revproxy/revproxy-deploy.yaml", "hashed_secret": "b3a4e2dea4c1fae8c58a07a84065b73b3a2d831c", "is_verified": false, - "line_number": 77 + "line_number": 78 }, { "type": "Secret Keyword", "filename": "kube/services/revproxy/revproxy-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 80 + "line_number": 81 } ], "kube/services/sftp/sftp-deploy.yaml": [ @@ -3285,28 +3237,28 @@ "filename": "kube/services/sheepdog/sheepdog-deploy.yaml", "hashed_secret": "ec9c944c51e87322de8d22e3ca9e2be1ad8fee0d", "is_verified": false, - "line_number": 63 + "line_number": 64 }, { "type": "Secret Keyword", "filename": "kube/services/sheepdog/sheepdog-deploy.yaml", "hashed_secret": "79496491225eda4a7be9fcddee2825c85b1535cc", "is_verified": false, - "line_number": 66 + "line_number": 67 }, { "type": "Secret Keyword", "filename": "kube/services/sheepdog/sheepdog-deploy.yaml", "hashed_secret": "e43756046ad1763d6946575fed0e05130a154bd2", "is_verified": false, - "line_number": 72 + "line_number": 73 }, { "type": "Secret Keyword", "filename": "kube/services/sheepdog/sheepdog-deploy.yaml", "hashed_secret": "9ce05cf6168d15dfe02aac9ca9e0712c19c9436d", "is_verified": false, - "line_number": 75 + "line_number": 76 } ], "kube/services/shiny/shiny-deploy.yaml": [ @@ -3324,7 +3276,7 @@ "filename": "kube/services/ssjdispatcher/ssjdispatcher-deploy.yaml", "hashed_secret": "7f932449df74fc78573fea502df8a484aef3f69d", "is_verified": false, - "line_number": 61 + "line_number": 62 } ], "kube/services/superset/superset-deploy.yaml": [ @@ -3415,7 +3367,7 @@ "filename": "kube/services/wts/wts-deploy.yaml", "hashed_secret": "5de687ae886f19c3cb68d4980e3f2e77cca3db9e", "is_verified": false, - "line_number": 65 + "line_number": 66 } ], "packer/buildAll.sh": [ @@ -3737,5 +3689,5 @@ } ] }, - "generated_at": "2024-03-07T21:26:14Z" + "generated_at": "2024-08-27T21:36:15Z" } diff --git a/Docker/jenkins/Jenkins-CI-Worker/Dockerfile b/Docker/jenkins/Jenkins-CI-Worker/Dockerfile index 6eeb8f4fd6..9401e6a4bd 100644 --- a/Docker/jenkins/Jenkins-CI-Worker/Dockerfile +++ b/Docker/jenkins/Jenkins-CI-Worker/Dockerfile @@ -83,21 +83,21 @@ RUN curl -fsSL https://www.postgresql.org/media/keys/ACCC4CF8.asc| gpg --dearmor apt-get install -y postgresql-client-13 # Copy sh script responsible for installing Python -COPY install-python3.8.sh /root/tmp/install-python3.8.sh +COPY install-python3.9.sh /root/tmp/install-python3.9.sh -# Run the script responsible for installing Python 3.8.0 and link it to /usr/bin/python -RUN chmod +x /root/tmp/install-python3.8.sh; sync && \ - bash /root/tmp/install-python3.8.sh && \ - rm -rf /root/tmp/install-python3.8.sh && \ +# Run the script responsible for installing Python 3.9.19 and link it to /usr/bin/python +RUN chmod +x /root/tmp/install-python3.9.sh; sync && \ + bash /root/tmp/install-python3.9.sh && \ + rm -rf /root/tmp/install-python3.9.sh && \ unlink /usr/bin/python3 && \ - ln -s /usr/local/bin/python3.8 /usr/bin/python3 + ln -s /usr/local/bin/python3.9 /usr/bin/python3 # Fix shebang for lsb_release -RUN sed -i 's/python3/python3.8/' /usr/bin/lsb_release && \ - sed -i 's/python3/python3.8/' /usr/bin/add-apt-repository +RUN sed -i 's/python3/python3.9/' /usr/bin/lsb_release && \ + sed -i 's/python3/python3.9/' /usr/bin/add-apt-repository # install aws cli, poetry, pytest, etc. -RUN set -xe && python3.8 -m pip install --upgrade pip setuptools && python3.8 -m pip install awscli --upgrade && python3.8 -m pip install pytest --upgrade && python3.8 -m pip install poetry && python3.8 -m pip install PyYAML --upgrade && python3.8 -m pip install lxml --upgrade && python3.8 -m pip install yq --upgrade && python3.8 -m pip install datadog --upgrade +RUN set -xe && python3.9 -m pip install --upgrade pip setuptools && python3.9 -m pip install awscli --upgrade && python3.9 -m pip install pytest --upgrade && python3.9 -m pip install poetry && python3.9 -m pip install PyYAML --upgrade && python3.9 -m pip install lxml --upgrade && python3.9 -m pip install yq --upgrade && python3.9 -m pip install datadog --upgrade # install terraform RUN curl -o /tmp/terraform.zip https://releases.hashicorp.com/terraform/0.11.15/terraform_0.11.15_linux_amd64.zip \ diff --git a/Docker/jenkins/Jenkins-CI-Worker/install-python3.8.sh b/Docker/jenkins/Jenkins-CI-Worker/install-python3.8.sh deleted file mode 100755 index a01d59420b..0000000000 --- a/Docker/jenkins/Jenkins-CI-Worker/install-python3.8.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -wget https://www.python.org/ftp/python/3.8.0/Python-3.8.0.tar.xz -tar xf Python-3.8.0.tar.xz -rm Python-3.8.0.tar.xz -cd Python-3.8.0 -./configure -make -make altinstall diff --git a/Docker/jenkins/Jenkins-CI-Worker/install-python3.9.sh b/Docker/jenkins/Jenkins-CI-Worker/install-python3.9.sh new file mode 100755 index 0000000000..88b7596ae8 --- /dev/null +++ b/Docker/jenkins/Jenkins-CI-Worker/install-python3.9.sh @@ -0,0 +1,8 @@ +#!/bin/bash +wget https://www.python.org/ftp/python/3.9.19/Python-3.9.19.tar.xz +tar xf Python-3.9.19.tar.xz +rm Python-3.9.19.tar.xz +cd Python-3.9.19 +./configure +make +make altinstall diff --git a/Docker/jenkins/Jenkins/Dockerfile b/Docker/jenkins/Jenkins/Dockerfile index 04ebe5864a..aae48e7b7a 100644 --- a/Docker/jenkins/Jenkins/Dockerfile +++ b/Docker/jenkins/Jenkins/Dockerfile @@ -1,4 +1,4 @@ -FROM jenkins/jenkins:2.426.3-lts-jdk21 +FROM jenkins/jenkins:2.462.1-lts-jdk21 USER root @@ -68,21 +68,21 @@ RUN DISTRO="$(lsb_release -c -s)" \ && rm -rf /var/lib/apt/lists/* # Copy sh script responsible for installing Python -COPY install-python3.8.sh /root/tmp/install-python3.8.sh +COPY install-python3.9.sh /root/tmp/install-python3.9.sh -# Run the script responsible for installing Python 3.8.0 and link it to /usr/bin/python -RUN chmod +x /root/tmp/install-python3.8.sh; sync && \ - ./root/tmp/install-python3.8.sh && \ - rm -rf /root/tmp/install-python3.8.sh && \ +# Run the script responsible for installing Python 3.9.19 and link it to /usr/bin/python +RUN chmod +x /root/tmp/install-python3.9.sh; sync && \ + ./root/tmp/install-python3.9.sh && \ + rm -rf /root/tmp/install-python3.9.sh && \ unlink /usr/bin/python3 && \ - ln -s /Python-3.8.0/python /usr/bin/python3 + ln -s /Python-3.9.0/python /usr/bin/python3 # Fix shebang for lsb_release -RUN sed -i 's/python3/python3.8/' /usr/bin/lsb_release && \ - sed -i 's/python3/python3.8/' /usr/bin/add-apt-repository +RUN sed -i 's/python3/python3.9/' /usr/bin/lsb_release && \ + sed -i 's/python3/python3.9/' /usr/bin/add-apt-repository # install aws cli, poetry, pytest, etc. -RUN set -xe && python3 -m pip install --upgrade pip && python3 -m pip install awscli --upgrade && python3 -m pip install pytest --upgrade && python3 -m pip install poetry && python3 -m pip install PyYAML --upgrade && python3 -m pip install lxml --upgrade && python3 -m pip install yq --upgrade +RUN set -xe && python3.9 -m pip install --upgrade pip && python3.9 -m pip install awscli --upgrade && python3.9 -m pip install pytest --upgrade && python3.9 -m pip install poetry && python3.9 -m pip install PyYAML --upgrade && python3.9 -m pip install lxml --upgrade && python3.9 -m pip install yq --upgrade # install chrome (supports headless mode) RUN set -xe \ diff --git a/Docker/jenkins/Jenkins/install-python3.8.sh b/Docker/jenkins/Jenkins/install-python3.8.sh deleted file mode 100755 index df21c66e58..0000000000 --- a/Docker/jenkins/Jenkins/install-python3.8.sh +++ /dev/null @@ -1,7 +0,0 @@ -wget https://www.python.org/ftp/python/3.8.0/Python-3.8.0.tar.xz -tar xf Python-3.8.0.tar.xz -rm Python-3.8.0.tar.xz -cd Python-3.8.0 -./configure -make -make altinstall diff --git a/Docker/jenkins/Jenkins/install-python3.9.sh b/Docker/jenkins/Jenkins/install-python3.9.sh new file mode 100755 index 0000000000..83d7f17cd9 --- /dev/null +++ b/Docker/jenkins/Jenkins/install-python3.9.sh @@ -0,0 +1,7 @@ +wget https://www.python.org/ftp/python/3.9.19/Python-3.9.19.tar.xz +tar xf Python-3.9.19.tar.xz +rm Python-3.9.19.tar.xz +cd Python-3.9.19 +./configure +make +make altinstall diff --git a/Docker/jenkins/Jenkins2/Dockerfile b/Docker/jenkins/Jenkins2/Dockerfile index e6b73bc76d..c4bf93dfab 100644 --- a/Docker/jenkins/Jenkins2/Dockerfile +++ b/Docker/jenkins/Jenkins2/Dockerfile @@ -1,4 +1,4 @@ -FROM jenkins/jenkins:2.426.3-lts-jdk21 +FROM jenkins/jenkins:2.462.1-lts-jdk21 USER root @@ -69,21 +69,21 @@ RUN DISTRO="$(lsb_release -c -s)" \ && rm -rf /var/lib/apt/lists/* # Copy sh script responsible for installing Python -COPY install-python3.8.sh /root/tmp/install-python3.8.sh +COPY install-python3.9.sh /root/tmp/install-python3.9.sh -# Run the script responsible for installing Python 3.8.0 and link it to /usr/bin/python -RUN chmod +x /root/tmp/install-python3.8.sh; sync && \ - ./root/tmp/install-python3.8.sh && \ - rm -rf /root/tmp/install-python3.8.sh && \ +# Run the script responsible for installing Python 3.9.19 and link it to /usr/bin/python +RUN chmod +x /root/tmp/install-python3.9.sh; sync && \ + ./root/tmp/install-python3.9.sh && \ + rm -rf /root/tmp/install-python3.9.sh && \ unlink /usr/bin/python3 && \ - ln -s /Python-3.8.0/python /usr/bin/python3 + ln -s /Python-3.9.19/python /usr/bin/python3 # Fix shebang for lsb_release -RUN sed -i 's/python3/python3.5/' /usr/bin/lsb_release && \ - sed -i 's/python3/python3.5/' /usr/bin/add-apt-repository +RUN sed -i 's/python3/python3.9/' /usr/bin/lsb_release && \ + sed -i 's/python3/python3.9/' /usr/bin/add-apt-repository # install aws cli, poetry, pytest, etc. -RUN set -xe && python3 -m pip install --upgrade pip && python3 -m pip install awscli --upgrade && python3 -m pip install pytest --upgrade && python3 -m pip install poetry && python3 -m pip install PyYAML --upgrade && python3 -m pip install lxml --upgrade && python3 -m pip install yq --upgrade +RUN set -xe && python3.9 -m pip install --upgrade pip && python3.9 -m pip install awscli --upgrade && python3.9 -m pip install pytest --upgrade && python3.9 -m pip install poetry && python3.9 -m pip install PyYAML --upgrade && python3.9 -m pip install lxml --upgrade && python3.9 -m pip install yq --upgrade # install chrome (supports headless mode) RUN set -xe \ diff --git a/Docker/jenkins/Jenkins2/install-python3.8.sh b/Docker/jenkins/Jenkins2/install-python3.8.sh deleted file mode 100755 index df21c66e58..0000000000 --- a/Docker/jenkins/Jenkins2/install-python3.8.sh +++ /dev/null @@ -1,7 +0,0 @@ -wget https://www.python.org/ftp/python/3.8.0/Python-3.8.0.tar.xz -tar xf Python-3.8.0.tar.xz -rm Python-3.8.0.tar.xz -cd Python-3.8.0 -./configure -make -make altinstall diff --git a/Docker/jenkins/Jenkins2/install-python3.9.sh b/Docker/jenkins/Jenkins2/install-python3.9.sh new file mode 100755 index 0000000000..83d7f17cd9 --- /dev/null +++ b/Docker/jenkins/Jenkins2/install-python3.9.sh @@ -0,0 +1,7 @@ +wget https://www.python.org/ftp/python/3.9.19/Python-3.9.19.tar.xz +tar xf Python-3.9.19.tar.xz +rm Python-3.9.19.tar.xz +cd Python-3.9.19 +./configure +make +make altinstall diff --git a/doc/dbbackup.md b/doc/dbbackup.md new file mode 100644 index 0000000000..9e21f2bde3 --- /dev/null +++ b/doc/dbbackup.md @@ -0,0 +1,52 @@ +# TL;DR + +This script facilitates the management of database backup and restore within the Gen3 environment. It can establish policies, service accounts, roles, and S3 buckets. Depending on the command provided, it can initiate a database dump, perform a restore, migrate databases to a new RDS instance on Aurora, or clone databases to an RDS Aurora instance. + +## Usage + +```sh +gen3 dbbackup [dump|restore|va-dump|create-sa|migrate-to-aurora|copy-to-aurora] +``` + +### Commands + +#### dump + +Initiates a database dump and pushes it to an S3 bucket, creating the essential AWS resources if they are absent. The dump operation is intended to be executed from the namespace/commons that requires the backup. + +```sh +gen3 dbbackup dump +``` + +#### restore + +Initiates a database restore from an S3 bucket, creating the essential AWS resources if they are absent. The restore operation is meant to be executed in the target namespace where the backup needs to be restored. + +```sh +gen3 dbbackup restore +``` + +#### create-sa + +Creates the necessary service account and roles for DB copy. + +```sh +gen3 dbbackup create-sa +``` + +#### migrate-to-aurora + +Triggers a service account creation and a job to migrate a Gen3 commons to an AWS RDS Aurora instance. + +```sh +gen3 dbbackup migrate-to-aurora +``` + +#### copy-to-aurora + +Triggers a service account creation and a job to copy the databases Indexd, Sheepdog & Metadata to new databases within an RDS Aurora cluster from another namespace in same RDS cluster. + +```sh +gen3 dbbackup copy-to-aurora +``` + diff --git a/files/openvpn_management_scripts/install_ovpn.sh b/files/openvpn_management_scripts/install_ovpn.sh index 4250d2ca2d..180d0274c5 100644 --- a/files/openvpn_management_scripts/install_ovpn.sh +++ b/files/openvpn_management_scripts/install_ovpn.sh @@ -17,7 +17,7 @@ COUNTRY="US" STATE="IL" CITY="Chicago" ORG="CDIS" -EMAIL='support\@datacommons.io' +EMAIL='support\@gen3.org' KEY_EXPIRE=365 diff --git a/files/scripts/config-update.sh b/files/scripts/config-update.sh new file mode 100644 index 0000000000..55938d4920 --- /dev/null +++ b/files/scripts/config-update.sh @@ -0,0 +1,298 @@ +#!/bin/bash + +# Script Name: config-update.sh +# Description: This script updates the gen3 config files for various services based on information +# provided in a migration file migration.txt. It updates JSON configuration files and other related files +# with new database host, username, and database name. The script also verifies the updates +# to ensure they are applied correctly. + +# Ensure the GEN3_HOME variable is set to the correct path +if [[ -z "$GEN3_HOME" ]]; then + echo "GEN3_HOME is not set. Please set it to the path of your Gen3 installation." + exit 1 +fi + +# Check if jq is installed +if ! command -v jq &> /dev/null; then + echo "jq could not be found. Please install jq to run this script." + exit 1 +fi + +source "${GEN3_HOME}/gen3/lib/utils.sh" +gen3_load "gen3/lib/kube-setup-init" + +# Backup the $HOME/Gen3Secrets directory +backup_dir="$HOME/Gen3Secrets-$(date +%Y%m%d%H%M%S)" +cp -r "$HOME/Gen3Secrets" "$backup_dir" +echo "Backup of Gen3Secrets created at $backup_dir" + +# Function to update JSON file +update_json_config() { + local file_path=$1 + local service=$2 + local db_host=$3 + local db_username=$4 + local db_database=$5 + + echo "Updating JSON config for service: $service" + echo "File path: $file_path" + echo "db_host: $db_host" + echo "db_username: $db_username" + echo "db_database: $db_database" + + if [[ -f $file_path ]]; then + local tmp_file + tmp_file=$(mktemp) + + if [[ $service == "fence" || $service == "userapi" ]]; then + jq --arg db_host "$db_host" --arg db_username "$db_username" --arg db_database "$db_database" \ + '(.fence.db_host = $db_host) | (.fence.db_username = $db_username) | (.fence.db_database = $db_database) | + (.fence.fence_database = $db_database) | + (.userapi.db_host = $db_host) | (.userapi.db_username = $db_username) | (.userapi.db_database = $db_database) | + (.userapi.fence_database = $db_database) | + (.sheepdog.fence_host = $db_host) | (.sheepdog.fence_username = $db_username) | (.sheepdog.fence_database = $db_database) | + (.gdcapi.fence_host = $db_host) | (.gdcapi.fence_username = $db_username) | (.gdcapi.fence_database = $db_database) | + (.peregrine.fence_host = $db_host) | (.peregrine.fence_username = $db_username) | (.peregrine.fence_database = $db_database)' \ + "$file_path" > "$tmp_file" && mv "$tmp_file" "$file_path" + + # Verify the update + local updated_host updated_username updated_database + updated_host=$(jq -r '.fence.db_host' "$file_path") + updated_username=$(jq -r '.fence.db_username' "$file_path") + updated_database=$(jq -r '.fence.db_database' "$file_path") + if [[ "$updated_host" == "$db_host" && "$updated_username" == "$db_username" && "$updated_database" == "$db_database" ]]; then + gen3_log_info "Updated JSON config for service: $service successfully." + else + gen3_log_err "Failed to update JSON config for service: $service." + fi + + elif [[ $service == "sheepdog" || $service == "gdcapi" ]]; then + jq --arg db_host "$db_host" --arg db_username "$db_username" --arg db_database "$db_database" \ + '(.sheepdog.db_host = $db_host) | (.sheepdog.db_username = $db_username) | (.sheepdog.db_database = $db_database) | + (.gdcapi.db_host = $db_host) | (.gdcapi.db_username = $db_username) | (.gdcapi.db_database = $db_database)' \ + "$file_path" > "$tmp_file" && mv "$tmp_file" "$file_path" + + # Verify the update + local updated_host updated_username updated_database + updated_host=$(jq -r '.sheepdog.db_host' "$file_path") + updated_username=$(jq -r '.sheepdog.db_username' "$file_path") + updated_database=$(jq -r '.sheepdog.db_database' "$file_path") + if [[ "$updated_host" == "$db_host" && "$updated_username" == "$db_username" && "$updated_database" == "$db_database" ]]; then + gen3_log_info "Updated JSON config for service: $service successfully." + else + gen3_log_err "Failed to update JSON config for service: $service." + fi + + elif [[ $service == "indexd" ]]; then + jq --arg db_host "$db_host" --arg db_username "$db_username" --arg db_database "$db_database" \ + '(.indexd.db_host = $db_host) | (.indexd.db_username = $db_username) | (.indexd.db_database = $db_database)' \ + "$file_path" > "$tmp_file" && mv "$tmp_file" "$file_path" + + # Verify the update + local updated_host updated_username updated_database + updated_host=$(jq -r '.indexd.db_host' "$file_path") + updated_username=$(jq -r '.indexd.db_username' "$file_path") + updated_database=$(jq -r '.indexd.db_database' "$file_path") + if [[ "$updated_host" == "$db_host" && "$updated_username" == "$db_username" && "$updated_database" == "$db_database" ]]; then + gen3_log_info "Updated JSON config for service: $service successfully." + else + gen3_log_err "Failed to update JSON config for service: $service." + fi + + elif [[ $service == "peregrine" ]]; then + jq --arg db_host "$db_host" --arg db_username "$db_username" --arg db_database "$db_database" \ + '(.peregrine.db_host = $db_host) | (.peregrine.db_username = $db_username) | (.peregrine.db_database = $db_database)' \ + "$file_path" > "$tmp_file" && mv "$tmp_file" "$file_path" + + # Verify the update + local updated_host updated_username updated_database + updated_host=$(jq -r '.peregrine.db_host' "$file_path") + updated_username=$(jq -r '.peregrine.db_username' "$file_path") + updated_database=$(jq -r '.peregrine.db_database' "$file_path") + if [[ "$updated_host" == "$db_host" && "$updated_username" == "$db_username" && "$updated_database" == "$db_database" ]]; then + gen3_log_info "Updated JSON config for service: $service successfully." + else + gen3_log_err "Failed to update JSON config for service: $service." + fi + + else + jq --arg db_host "$db_host" --arg db_username "$db_username" --arg db_database "$db_database" \ + '(.db_host = $db_host) | (.db_username = $db_username) | (.db_database = $db_database)' \ + "$file_path" > "$tmp_file" && mv "$tmp_file" "$file_path" + + # Verify the update + local updated_host updated_username updated_database + updated_host=$(jq -r '.db_host' "$file_path") + updated_username=$(jq -r '.db_username' "$file_path") + updated_database=$(jq -r '.db_database' "$file_path") + if [[ "$updated_host" == "$db_host" && "$updated_username" == "$db_username" && "$updated_database" == "$db_database" ]]; then + gen3_log_info "Updated JSON config for service: $service successfully." + else + gen3_log_err "Failed to update JSON config for service: $service." + fi + fi + else + echo "File $file_path does not exist." + fi +} + +# Function to update other files +update_other_files() { + local file_path=$1 + local db_host=$2 + local db_username=$3 + local db_database=$4 + + echo "Updating other files at $file_path" + echo "db_host: $db_host" + echo "db_username: $db_username" + echo "db_database: $db_database" + + if [[ -f $file_path ]]; then + if [[ "$file_path" == *".env" ]]; then + sed -i "s|DB_HOST=.*|DB_HOST=$db_host|" "$file_path" + sed -i "s|DB_USER=.*|DB_USER=$db_username|" "$file_path" + sed -i "s|DB_DATABASE=.*|DB_DATABASE=$db_database|" "$file_path" + + # Verify the update + local updated_host updated_username updated_database + updated_host=$(grep 'DB_HOST=' "$file_path" | cut -d'=' -f2) + updated_username=$(grep 'DB_USER=' "$file_path" | cut -d'=' -f2) + updated_database=$(grep 'DB_DATABASE=' "$file_path" | cut -d'=' -f2) + else + sed -i "s|DB_HOST:.*|DB_HOST: $db_host|" "$file_path" + sed -i "s|DB_USER:.*|DB_USER: $db_username|" "$file_path" + sed -i "s|DB_DATABASE:.*|DB_DATABASE: $db_database|" "$file_path" + + # Verify the update + local updated_host updated_username updated_database + updated_host=$(grep 'DB_HOST:' "$file_path" | cut -d':' -f2 | xargs) + updated_username=$(grep 'DB_USER:' "$file_path" | cut -d':' -f2 | xargs) + updated_database=$(grep 'DB_DATABASE:' "$file_path" | cut -d':' -f2 | xargs) + fi + + if [[ "$updated_host" == "$db_host" && "$updated_username" == "$db_username" && "$updated_database" == "$db_database" ]]; then + gen3_log_info "Updated file at $file_path successfully." + else + gen3_log_err "Failed to update file at $file_path." + fi + else + echo "File $file_path does not exist." + fi +} + +# Function to update fence-config.yaml +update_fence_config() { + local creds_json_path="$HOME/Gen3Secrets/creds.json" + local file_path=$1 + local db_host=$2 + local db_username=$3 + local db_database=$4 + + echo "Updating fence-config.yaml at $file_path" + echo "db_host: $db_host" + echo "db_username: $db_username" + echo "db_database: $db_database" + + if [[ -f $file_path ]]; then + local current_password + current_password=$(jq -r '.fence.db_password' "$creds_json_path") + + sed -i "s|DB: postgresql://.*:.*@.*:5432/.*|DB: postgresql://$db_username:$current_password@$db_host:5432/$db_database|" "$file_path" + + # Verify the update + local updated_entry + updated_entry=$(grep 'DB: postgresql://' "$file_path") + if [[ "$updated_entry" == *"$db_host"* && "$updated_entry" == *"$db_username"* && "$updated_entry" == *"$db_database"* ]]; then + gen3_log_info "Updated fence-config.yaml at $file_path successfully." + else + gen3_log_err "Failed to update fence-config.yaml at $file_path." + fi + else + echo "File $file_path does not exist." + fi +} + +# Function to parse the migration file and apply updates +parse_and_update() { + local migration_file=$1 + local creds_json_path="$HOME/Gen3Secrets/creds.json" + local namespace + namespace=$(gen3 db namespace) + local new_db_host + new_db_host=$(grep "INFO" "$migration_file" | awk '{print $8}') + + gen3_log_info "New db_host identified: $new_db_host" + while read -r line; do + if [[ $line == Source_Database* || $line == User* ]]; then + echo "Processing line: $line" + + IFS=' ' read -r -a parts <<< "$line" + local db_host="$new_db_host" + local db_username + local db_database + + if [[ $line == Source_Database* ]]; then + db_username="${parts[9]}" + echo "db_username='${parts[9]}'" + db_database="${parts[7]}" + echo "db_database='${parts[7]}'" + elif [[ $line == User* ]]; then + db_username="${parts[1]}" + echo "db_username='${parts[1]}'" + db_database="${parts[7]}" + echo "db_database='${parts[7]}'" + else + continue + fi + + # Extract the service name from db_username + if [[ $db_username =~ ^([a-zA-Z]+)_user_ ]]; then + local service="${BASH_REMATCH[1]}" + else + echo "Skipping line: $line due to improper db_username format" + continue + fi + + gen3_log_info "Updating service: $service with db_username: $db_username and db_database: $db_database" + + # Update specific config files for each service + case $service in + arborist) + update_json_config "$HOME/Gen3Secrets/g3auto/arborist/dbcreds.json" "$service" "$db_host" "$db_username" "$db_database" + ;; + audit) + update_json_config "$HOME/Gen3Secrets/g3auto/audit/dbcreds.json" "$service" "$db_host" "$db_username" "$db_database" + update_other_files "$HOME/Gen3Secrets/g3auto/audit/audit-service-config.yaml" "$db_host" "$db_username" "$db_database" + ;; + metadata) + update_json_config "$HOME/Gen3Secrets/g3auto/metadata/dbcreds.json" "$service" "$db_host" "$db_username" "$db_database" + update_other_files "$HOME/Gen3Secrets/g3auto/metadata/metadata.env" "$db_host" "$db_username" "$db_database" + ;; + ohdsi) + update_json_config "$HOME/Gen3Secrets/g3auto/ohdsi/dbcreds.json" "$service" "$db_host" "$db_username" "$db_database" + ;; + orthanc) + update_json_config "$HOME/Gen3Secrets/g3auto/orthanc/dbcreds.json" "$service" "$db_host" "$db_username" "$db_database" + ;; + requestor) + update_json_config "$HOME/Gen3Secrets/g3auto/requestor/dbcreds.json" "$service" "$db_host" "$db_username" "$db_database" + update_other_files "$HOME/Gen3Secrets/g3auto/requestor/requestor-config.yaml" "$db_host" "$db_username" "$db_database" + ;; + wts) + update_json_config "$HOME/Gen3Secrets/g3auto/wts/dbcreds.json" "$service" "$db_host" "$db_username" "$db_database" + ;; + fence) + update_fence_config "$HOME/Gen3Secrets/apis_configs/fence-config.yaml" "$db_host" "$db_username" "$db_database" + update_json_config "$creds_json_path" "$service" "$db_host" "$db_username" "$db_database" + ;; + sheepdog | peregrine | indexd) + update_json_config "$creds_json_path" "$service" "$db_host" "$db_username" "$db_database" + ;; + esac + fi + done < "$migration_file" +} + +# Run the script +parse_and_update "migration.txt" diff --git a/files/scripts/ecr-access-job.md b/files/scripts/ecr-access-job.md index 9659b186b9..5f8dff7670 100644 --- a/files/scripts/ecr-access-job.md +++ b/files/scripts/ecr-access-job.md @@ -59,7 +59,7 @@ Trust policy (allows Acct2): } ``` -- Policy in the account (Acct2) that contains the DynamoDB table (created automatically by `kube-setup-ecr-access-job.sh`): +- Policy in the account (Acct2) that contains the DynamoDB table (created automatically by `kube-setup-ecr-access-cronjob.sh`): ``` { "Version": "2012-10-17", diff --git a/files/scripts/psql-fips-fix.sh b/files/scripts/psql-fips-fix.sh index fcbb6e20cc..8cb0ed0494 100644 --- a/files/scripts/psql-fips-fix.sh +++ b/files/scripts/psql-fips-fix.sh @@ -16,7 +16,7 @@ for name in indexd fence sheepdog peregrine; do update_pass $name $username $password done -for name in wts metadata gearbox audit arborist access-backend argo_db atlas argo thor; do +for name in wts metadata gearbox audit arborist access-backend argo_db requestor atlas ohdsi argo thor; do if [[ ! -z $(gen3 secrets decode $name-g3auto dbcreds.json) ]]; then username=$(gen3 secrets decode $name-g3auto dbcreds.json | jq -r .db_username) password=$(gen3 secrets decode $name-g3auto dbcreds.json | jq -r .db_password) diff --git a/files/squid_whitelist/web_whitelist b/files/squid_whitelist/web_whitelist index e32c7f483a..b0759ba32e 100644 --- a/files/squid_whitelist/web_whitelist +++ b/files/squid_whitelist/web_whitelist @@ -14,6 +14,7 @@ clinicaltrials.gov charts.bitnami.com ctds-planx.atlassian.net data.cityofchicago.org +data.stage.qdr.org dataguids.org api.login.yahoo.com apt.kubernetes.io diff --git a/flavors/vpn_nlb_central/vpnvm.sh b/flavors/vpn_nlb_central/vpnvm.sh index 879488eabf..5489622314 100644 --- a/flavors/vpn_nlb_central/vpnvm.sh +++ b/flavors/vpn_nlb_central/vpnvm.sh @@ -102,7 +102,7 @@ export FQDN="$SERVERNAME.planx-pla.net"; export cloud="$CLOUDNAME"; export SERVE #export FQDN="raryatestvpnv1.planx-pla.net"; export cloud="planxvpn1"; export SERVER_PEM="/root/server.pem"; bash /root/openvpn_management_scripts/install_ovpn.sh -#export FQDN="raryatestvpnv1.planx-pla.net"; export cloud="planxvpn"; export EMAIL="support@datacommons.io"; export SERVER_PEM="/root/server.pem"; export VPN_SUBNET="192.168.192.0/20"; export VM_SUBNET="10.128.0.0/20"; bash install_ovpn.sh +#export FQDN="raryatestvpnv1.planx-pla.net"; export cloud="planxvpn"; export EMAIL="support@gen3.org"; export SERVER_PEM="/root/server.pem"; export VPN_SUBNET="192.168.192.0/20"; export VM_SUBNET="10.128.0.0/20"; bash install_ovpn.sh ### need to install lighttpd @@ -174,4 +174,4 @@ sudo chmod 755 /etc/init.d/awslogs sudo systemctl enable awslogs sudo systemctl restart awslogs -echo "Install is completed" \ No newline at end of file +echo "Install is completed" diff --git a/flavors/vpn_nlb_central/vpnvm_new.sh b/flavors/vpn_nlb_central/vpnvm_new.sh index 00f8306fc9..6276726947 100644 --- a/flavors/vpn_nlb_central/vpnvm_new.sh +++ b/flavors/vpn_nlb_central/vpnvm_new.sh @@ -32,7 +32,7 @@ COUNTRY="US" STATE="IL" CITY="Chicago" ORG="CTDS" -EMAIL='support\@datacommons.io' +EMAIL='support\@gen3.org' KEY_EXPIRE=365 #OpenVPN diff --git a/flavors/vpn_nlb_central/vpnvm_ubuntu18.sh b/flavors/vpn_nlb_central/vpnvm_ubuntu18.sh index af5efdfaf3..e2f8210ea8 100644 --- a/flavors/vpn_nlb_central/vpnvm_ubuntu18.sh +++ b/flavors/vpn_nlb_central/vpnvm_ubuntu18.sh @@ -28,7 +28,7 @@ COUNTRY="US" STATE="IL" CITY="Chicago" ORG="CTDS" -EMAIL='support\@datacommons.io' +EMAIL='support\@gen3.org' KEY_EXPIRE=365 #OpenVPN diff --git a/gen3/bin/awsrole.sh b/gen3/bin/awsrole.sh index dd19ea7a48..b9e9f95149 100644 --- a/gen3/bin/awsrole.sh +++ b/gen3/bin/awsrole.sh @@ -14,7 +14,6 @@ gen3_load "gen3/gen3setup" gen3_awsrole_help() { gen3 help awsrole } - # # Assume-role policy - allows SA's to assume role. # NOTE: service-account to role is 1 to 1 @@ -71,7 +70,8 @@ function gen3_awsrole_ar_policy() { "${issuer_url}:aud": "sts.amazonaws.com", "${issuer_url}:sub": [ "system:serviceaccount:*:${serviceAccount}", - "system:serviceaccount:argo:default" + "system:serviceaccount:argo:default", + "system:serviceaccount:argo:argo-argo-workflows-server" ] } } diff --git a/gen3/bin/dbbackup.sh b/gen3/bin/dbbackup.sh index eb9611a907..df0139d3bb 100644 --- a/gen3/bin/dbbackup.sh +++ b/gen3/bin/dbbackup.sh @@ -1,37 +1,32 @@ #!/bin/bash #################################################################################################### -# Script: dbdump.sh +# Script: dbbackup.sh # # Description: # This script facilitates the management of database backups within the gen3 environment. It is -# equipped to establish policies, service accounts, roles, and S3 buckets. Depending on the -# command provided, it will either initiate a database dump or perform a restore. +# equipped to establish policies, service accounts, roles, and S3 buckets. Depending on the +# command provided, it will either initiate a database dump, perform a restore, migrate to Aurora, +# or copy to Aurora. # # Usage: -# gen3 dbbackup [dump|restore] +# gen3 dbbackup [dump|restore|va-dump|create-sa|migrate-to-aurora|copy-to-aurora|encrypt|setup-cron ] # -# dump - Initiates a database dump, creating the essential AWS resources if they are absent. -# The dump operation is intended to be executed from the namespace/commons that requires -# the backup. -# restore - Initiates a database restore, creating the essential AWS resources if they are absent. -# The restore operation is meant to be executed in the target namespace, where the backup -# needs to be restored. -# -# Notes: -# This script extensively utilizes the AWS CLI and the gen3 CLI. Proper functioning demands a -# configured gen3 environment and the availability of the necessary CLI tools. +# dump - Initiates a database dump, creating the essential AWS resources if they are absent. +# The dump operation is intended to be executed from the namespace/commons that requires +# the backup. +# restore - Initiates a database restore, creating the essential AWS resources if they are absent. +# The restore operation is meant to be executed in the target namespace, where the backup +# needs to be restored. +# va-dump - Runs a va-testing DB dump. +# create-sa - Creates the necessary service account and roles for DB copy. +# migrate-to-aurora - Triggers a service account creation and a job to migrate a Gen3 commons to an AWS RDS Aurora instance. +# copy-to-aurora - Triggers a service account creation and a job to copy the databases Indexd, Sheepdog & Metadata to new databases within an RDS Aurora cluster. The source_namespace must be provided. The job should be run at the destination, not at the source. +# encrypt - Perform encrypted backup. +# setup-cron - Set up a cronjob for encrypted backup. # #################################################################################################### -# Exit on error -#set -e - -# Print commands before executing -#set -x - -#trap 'echo "Error at Line $LINENO"' ERR - source "${GEN3_HOME}/gen3/lib/utils.sh" gen3_load "gen3/lib/kube-setup-init" @@ -40,21 +35,36 @@ account_id=$(aws sts get-caller-identity --query "Account" --output text) vpc_name="$(gen3 api environment)" namespace="$(gen3 db namespace)" sa_name="dbbackup-sa" -bucket_name="gen3-db-backups-${account_id}" +bucket_name_encrypted="gen3-db-backups-encrypted-${account_id}" +kms_key_alias="alias/gen3-db-backups-kms-key" + +cluster_arn=$(kubectl config current-context) +eks_cluster=$(echo "$cluster_arn" | awk -F'/' '{print $2}') -gen3_log_info "policy_name: $policy_name" gen3_log_info "account_id: $account_id" gen3_log_info "vpc_name: $vpc_name" gen3_log_info "namespace: $namespace" gen3_log_info "sa_name: $sa_name" -gen3_log_info "bucket_name: $bucket_name" +gen3_log_info "bucket_name_encrypted: $bucket_name_encrypted" +gen3_log_info "kms_key_alias: $kms_key_alias" +gen3_log_info "eks_cluster: $eks_cluster" +# Create or get the KMS key +create_or_get_kms_key() { + kms_key_id=$(aws kms list-aliases --query "Aliases[?AliasName=='$kms_key_alias'].TargetKeyId" --output text) + if [ -z "$kms_key_id" ]; then + gen3_log_info "Creating new KMS key with alias $kms_key_alias" + kms_key_id=$(aws kms create-key --query "KeyMetadata.KeyId" --output text) + aws kms create-alias --alias-name $kms_key_alias --target-key-id $kms_key_id + else + gen3_log_info "KMS key with alias $kms_key_alias already exists" + fi + kms_key_arn=$(aws kms describe-key --key-id $kms_key_id --query "KeyMetadata.Arn" --output text) +} # Create an S3 access policy if it doesn't exist create_policy() { - # Check if policy exists if ! aws iam list-policies --query "Policies[?PolicyName == '$policy_name'] | [0].Arn" --output text | grep -q "arn:aws:iam"; then - # Create the S3 access policy - policy document access_policy=$(cat <<-EOM { "Version": "2012-10-17", @@ -69,15 +79,14 @@ create_policy() { "s3:DeleteObject" ], "Resource": [ - "arn:aws:s3:::gen3-db-backups-*" + "arn:aws:s3:::gen3-db-backups-*", + "arn:aws:s3:::gen3-db-backups-encrypted-*" ] } ] } EOM ) - - # Create the S3 access policy from the policy document policy_arn=$(aws iam create-policy --policy-name "$policy_name" --policy-document "$access_policy" --query "Policy.Arn" --output text) gen3_log_info "policy_arn: $policy_arn" else @@ -87,20 +96,12 @@ EOM fi } - # Create or update the Service Account and its corresponding IAM Role create_service_account_and_role() { - cluster_arn=$(kubectl config current-context) - eks_cluster=$(echo "$cluster_arn" | awk -F'/' '{print $2}') oidc_url=$(aws eks describe-cluster --name $eks_cluster --query 'cluster.identity.oidc.issuer' --output text | sed -e 's/^https:\/\///') role_name="${vpc_name}-${namespace}-${sa_name}-role" role_arn="arn:aws:iam::${account_id}:role/${role_name}" local trust_policy=$(mktemp -p "$XDG_RUNTIME_DIR" "tmp_policy.XXXXXX") - gen3_log_info "trust_policy: $trust_policy" - gen3_log_info "eks_cluster: $eks_cluster" - gen3_log_info "oidc_url: $oidc_url" - gen3_log_info "role_name: $role_name" - cat > ${trust_policy} <&1; then - gen3_log_info "Updating existing role: $role_name" aws iam update-assume-role-policy --role-name $role_name --policy-document "file://$trust_policy" else - gen3_log_info "Creating new role: $role_name" aws iam create-role --role-name $role_name --assume-role-policy-document "file://$trust_policy" fi @@ -144,30 +138,34 @@ EOF if ! kubectl get serviceaccount -n $namespace $sa_name 2>&1; then kubectl create serviceaccount -n $namespace $sa_name fi - # Annotate the KSA with the IAM role ARN - gen3_log_info "Annotating Service Account with IAM role ARN" + # Annotate the KSA with the IAM role ARN kubectl annotate serviceaccount -n ${namespace} ${sa_name} eks.amazonaws.com/role-arn=${role_arn} --overwrite - } -# Create an S3 bucket if it doesn't exist +# Create an S3 bucket with SSE-KMS if it doesn't exist create_s3_bucket() { + local bucket_name=$1 + local kms_key_arn=$2 # Check if bucket already exists if aws s3 ls "s3://$bucket_name" 2>&1 | grep -q 'NoSuchBucket'; then - gen3_log_info "Bucket does not exist, creating..." aws s3 mb "s3://$bucket_name" - else - gen3_log_info "Bucket $bucket_name already exists, skipping bucket creation." + # Enable SSE-KMS encryption on the bucket + aws s3api put-bucket-encryption --bucket $bucket_name --server-side-encryption-configuration '{ + "Rules": [{ + "ApplyServerSideEncryptionByDefault": { + "SSEAlgorithm": "aws:kms", + "KMSMasterKeyID": "'"$kms_key_arn"'" + } + }] + }' fi } - # Function to trigger the database backup job db_dump() { gen3 job run psql-db-prep-dump } - # Function to trigger the database backup restore job db_restore() { gen3 job run psql-db-prep-restore @@ -177,36 +175,214 @@ va_testing_db_dump() { gen3 job run psql-db-dump-va-testing } +# Function to create the psql-db-copy service account and roles +create_db_copy_service_account() { + cat <&1; then + cat <&1; then + cat <" + exit 1 + fi + gen3_log_info "Copying databases within Aurora..." + copy_to_aurora "$2" + ;; + encrypt) + gen3_log_info "Performing encrypted backup..." + check_prerequisites + encrypt_backup + ;; + setup-cron) + gen3_log_info "Setting up cronjob for encrypted backup..." + check_prerequisites + setup_cronjob + ;; *) - echo "Invalid command. Usage: gen3 dbbackup [dump|restore|va-dump]" + echo "Invalid command. Usage: gen3 dbbackup [dump|restore|va-dump|create-sa|migrate-to-aurora|copy-to-aurora|encrypt|setup-cron ]" return 1 ;; esac } -main "$1" +main "$@" diff --git a/gen3/bin/jupyter.sh b/gen3/bin/jupyter.sh index 169ec59dc0..b5c1c5390b 100644 --- a/gen3/bin/jupyter.sh +++ b/gen3/bin/jupyter.sh @@ -210,7 +210,7 @@ gen3_jupyter_idle_pods() { fi # Get the list of idle ambassador clusters from prometheus - local promQuery="sum by (envoy_cluster_name) (rate(envoy_cluster_upstream_rq_total{kubernetes_namespace=\"${namespace}\"}[${ttl}]))" + local promQuery="sum by (envoy_cluster_name) (rate(envoy_cluster_upstream_rq_total{namespace=\"${namespace}\"}[${ttl}]))" local tempClusterFile="$(mktemp "$XDG_RUNTIME_DIR/idle_apps.json_XXXXXX")" gen3 prometheus query "$promQuery" "${tokenKey#none}" | jq -e -r '.data.result[] | { "cluster": .metric.envoy_cluster_name, "rate": .value[1] } | select(.rate == "0")' | tee "$tempClusterFile" 1>&2 if [[ $? != 0 ]]; then @@ -245,7 +245,7 @@ gen3_jupyter_idle_pods() { current_time=$(date +%s) age=$((current_time - pod_creation)) - # potential workspaces to be reaped for inactivity must be at least 60 minutes old + # potential workspaces to be reaped for inactivity must be at least 60 minutes old if ((age >= 3600)); then gen3_log_info "try to kill pod $name in $jnamespace" g3kubectl delete pod --namespace "$jnamespace" "$name" 1>&2 diff --git a/gen3/bin/kube-setup-ambassador.sh b/gen3/bin/kube-setup-ambassador.sh index 5f92af5cc6..06ae1ee569 100644 --- a/gen3/bin/kube-setup-ambassador.sh +++ b/gen3/bin/kube-setup-ambassador.sh @@ -68,11 +68,9 @@ case "$command" in ;; "hatchery") deploy_hatchery_proxy "$@" - gen3 kube-setup-prometheus prometheus ;; *) deploy_hatchery_proxy "$@" deploy_api_gateway "$@" - gen3 kube-setup-prometheus prometheus ;; esac \ No newline at end of file diff --git a/gen3/bin/kube-setup-argo.sh b/gen3/bin/kube-setup-argo.sh index 88af5f3282..1a25a98c82 100644 --- a/gen3/bin/kube-setup-argo.sh +++ b/gen3/bin/kube-setup-argo.sh @@ -188,11 +188,13 @@ EOF roleArn=$(aws iam get-role --role-name "${roleName}" --query 'Role.Arn' --output text) gen3_log_info "Role annotate" g3kubectl annotate serviceaccount default eks.amazonaws.com/role-arn=${roleArn} --overwrite -n $argo_namespace + g3kubectl annotate serviceaccount argo-argo-workflows-server eks.amazonaws.com/role-arn=${roleArn} --overwrite -n $argo_namespace g3kubectl annotate serviceaccount argo eks.amazonaws.com/role-arn=${roleArn} --overwrite -n $nameSpace else gen3 awsrole create $roleName argo $nameSpace -all_namespaces roleArn=$(aws iam get-role --role-name "${roleName}" --query 'Role.Arn' --output text) g3kubectl annotate serviceaccount default eks.amazonaws.com/role-arn=${roleArn} -n $argo_namespace + g3kubectl annotate serviceaccount argo-argo-workflows-server eks.amazonaws.com/role-arn=${roleArn} -n $argo_namespace fi # Grant access within the current namespace to the argo SA in the current namespace @@ -202,6 +204,18 @@ EOF aws iam put-role-policy --role-name ${roleName} --policy-name ${internalBucketPolicy} --policy-document file://$internalBucketPolicyFile || true fi + # Create a secret for the slack webhook + alarm_webhook=$(g3kubectl get cm global -o yaml | yq .data.slack_alarm_webhook | tr -d '"') + + if [ -z "$alarm_webhook" ]; then + gen3_log_err "Please set a slack_alarm_webhook in the 'global' configmap. This is needed to alert for failed workflows." + exit 1 + fi + + g3kubectl -n argo delete secret slack-webhook-secret + g3kubectl -n argo create secret generic "slack-webhook-secret" --from-literal=SLACK_WEBHOOK_URL=$alarm_webhook + + ## if new bucket then do the following # Get the aws keys from secret # Create and attach lifecycle policy diff --git a/gen3/bin/kube-setup-aws-es-proxy.sh b/gen3/bin/kube-setup-aws-es-proxy.sh index 986c5bf05e..3feee143a5 100644 --- a/gen3/bin/kube-setup-aws-es-proxy.sh +++ b/gen3/bin/kube-setup-aws-es-proxy.sh @@ -2,7 +2,7 @@ # # Deploy aws-es-proxy into existing commons # https://github.com/abutaha/aws-es-proxy -# +# source "${GEN3_HOME}/gen3/lib/utils.sh" @@ -11,17 +11,33 @@ gen3_load "gen3/lib/kube-setup-init" # Deploy Datadog with argocd if flag is set in the manifest path manifestPath=$(g3k_manifest_path) es7="$(jq -r ".[\"global\"][\"es7\"]" < "$manifestPath" | tr '[:upper:]' '[:lower:]')" +esDomain="$(jq -r ".[\"global\"][\"esDomain\"]" < "$manifestPath" | tr '[:upper:]' '[:lower:]')" [[ -z "$GEN3_ROLL_ALL" ]] && gen3 kube-setup-secrets if g3kubectl get secrets/aws-es-proxy > /dev/null 2>&1; then envname="$(gen3 api environment)" - - if [ "$es7" = true ]; then + if [ "$esDomain" != "null" ]; then + if ES_ENDPOINT="$(aws es describe-elasticsearch-domains --domain-names ${esDomain} --query "DomainStatusList[*].Endpoints" --output text)" \ + && [[ -n "${ES_ENDPOINT}" && -n "${esDomain}" ]]; then + gen3 roll aws-es-proxy GEN3_ES_ENDPOINT "${ES_ENDPOINT}" + g3kubectl apply -f "${GEN3_HOME}/kube/services/aws-es-proxy/aws-es-proxy-priority-class.yaml" + g3kubectl apply -f "${GEN3_HOME}/kube/services/aws-es-proxy/aws-es-proxy-service.yaml" + gen3_log_info "kube-setup-aws-es-proxy" "The aws-es-proxy service has been deployed onto the k8s cluster." + else + # + # probably running in jenkins or job environment + # try to make sure network policy labels are up to date + # + gen3_log_info "kube-setup-aws-es-proxy" "Not deploying aws-es-proxy, no endpoint to hook it up." + gen3 kube-setup-networkpolicy service aws-es-proxy + g3kubectl patch deployment "aws-es-proxy-deployment" -p '{"spec":{"template":{"metadata":{"labels":{"netvpc":"yes"}}}}}' || true + fi + elif [ "$es7" = true ]; then if ES_ENDPOINT="$(aws es describe-elasticsearch-domains --domain-names ${envname}-gen3-metadata-2 --query "DomainStatusList[*].Endpoints" --output text)" \ && [[ -n "${ES_ENDPOINT}" && -n "${envname}" ]]; then gen3 roll aws-es-proxy GEN3_ES_ENDPOINT "${ES_ENDPOINT}" - g3kubectl apply -f "${GEN3_HOME}/kube/services/aws-es-proxy/aws-es-proxy-priority-class.yaml" + g3kubectl apply -f "${GEN3_HOME}/kube/services/aws-es-proxy/aws-es-proxy-priority-class.yaml" g3kubectl apply -f "${GEN3_HOME}/kube/services/aws-es-proxy/aws-es-proxy-service.yaml" gen3_log_info "kube-setup-aws-es-proxy" "The aws-es-proxy service has been deployed onto the k8s cluster." else diff --git a/gen3/bin/kube-setup-cluster-level-resources.sh b/gen3/bin/kube-setup-cluster-level-resources.sh new file mode 100644 index 0000000000..f4349398f6 --- /dev/null +++ b/gen3/bin/kube-setup-cluster-level-resources.sh @@ -0,0 +1,41 @@ +#!/bin/bash +source "${GEN3_HOME}/gen3/lib/utils.sh" +gen3_load "gen3/gen3setup" + +# Set default value for TARGET_REVISION +TARGET_REVISION="master" + +# Ask for TARGET_REVISION +read -p "Please provide a target revision for the cluster resources chart (default is master): " user_target_revision +# If user input is not empty, use it; otherwise, keep the default +TARGET_REVISION=${user_target_revision:-$TARGET_REVISION} + +# Ask for CLUSTER_NAME (no default value) +read -p "Enter the name of the cluster: " CLUSTER_NAME + +# Check if CLUSTER_NAME is provided +if [ -z "$CLUSTER_NAME" ]; then + echo "Error: CLUSTER_NAME cannot be empty." + exit 1 +fi + +# Create a temporary file +temp_file=$(mktemp) + +# Use sed to replace placeholders in the original file +sed -e "s|TARGET_REVISION|$TARGET_REVISION|g" \ + -e "s|CLUSTER_NAME|$CLUSTER_NAME|g" \ + $GEN3_HOME/kube/services/cluster-level-resources/app.yaml > "$temp_file" + +echo "WARNING: Do you have a folder already set up for this environment in gen3-gitops, in the form of /cluster-values/cluster-values.yaml? If not, this will not work." +echo "" +read -n 1 -s -r -p "Press any key to confirm and continue, or Ctrl+C to cancel..." +echo "" + +# Apply the templated file with kubectl +kubectl apply -f "$temp_file" + +# Clean up the temporary file +rm "$temp_file" + +echo "Application has been applied to the cluster." \ No newline at end of file diff --git a/gen3/bin/kube-setup-hatchery.sh b/gen3/bin/kube-setup-hatchery.sh index dadbbd9307..97365677d3 100644 --- a/gen3/bin/kube-setup-hatchery.sh +++ b/gen3/bin/kube-setup-hatchery.sh @@ -175,6 +175,8 @@ $assumeImageBuilderRolePolicyBlock "Action": [ "batch:DescribeComputeEnvironments", "batch:CreateComputeEnvironment", + "batch:UpdateComputeEnvironment", + "batch:ListJobs", "batch:CreateJobQueue", "batch:TagResource", "iam:ListPolicies", @@ -197,10 +199,28 @@ $assumeImageBuilderRolePolicyBlock "iam:CreateInstanceProfile", "iam:AddRoleToInstanceProfile", "iam:PassRole", - "s3:CreateBucket" + "kms:CreateKey", + "kms:CreateAlias", + "kms:DescribeKey", + "kms:TagResource", + "s3:CreateBucket", + "s3:PutEncryptionConfiguration", + "s3:PutBucketPolicy", + "s3:PutLifecycleConfiguration" ], "Resource": "*" }, + { + "Sid": "CreateSlrForNextflowBatchWorkspaces", + "Effect": "Allow", + "Action": "iam:CreateServiceLinkedRole", + "Resource": "arn:aws:iam::*:role/aws-service-role/batch.amazonaws.com/*", + "Condition": { + "StringLike": { + "iam:AWSServiceName": "batch.amazonaws.com" + } + } + }, { "Sid": "PassRoleForNextflowBatchWorkspaces", "Effect": "Allow", diff --git a/gen3/bin/kube-setup-s3-csi-driver.sh b/gen3/bin/kube-setup-s3-csi-driver.sh new file mode 100644 index 0000000000..c93ccf8dd3 --- /dev/null +++ b/gen3/bin/kube-setup-s3-csi-driver.sh @@ -0,0 +1,202 @@ +#!/bin/bash + +#################################################################################################### +# Script: kube-setup-s3-csi-driver.sh +# +# Description: +# This script sets up the Mountpoint for Amazon S3 CSI driver in an EKS cluster. +# It creates necessary IAM policies and roles. +# +# Usage: +# gen3 kube-setup-s3-csi-driver [bucket_name] +# +#################################################################################################### + +source "${GEN3_HOME}/gen3/lib/utils.sh" +gen3_load "gen3/lib/kube-setup-init" + +account_id=$(aws sts get-caller-identity --query "Account" --output text) +vpc_name="$(gen3 api environment)" +namespace="$(gen3 db namespace)" +default_bucket_name_encrypted="gen3-db-backups-encrypted-${account_id}" +bucket_name=${1:-$default_bucket_name_encrypted} + +cluster_arn=$(kubectl config current-context) +eks_cluster=$(echo "$cluster_arn" | awk -F'/' '{print $2}') + +gen3_log_info "account_id: $account_id" +gen3_log_info "vpc_name: $vpc_name" +gen3_log_info "namespace: $namespace" +gen3_log_info "bucket_name: $bucket_name" +gen3_log_info "eks_cluster: $eks_cluster" + +# Create policy for Mountpoint for Amazon S3 CSI driver +create_s3_csi_policy() { + policy_name="AmazonS3CSIDriverPolicy-${eks_cluster}" + policy_arn=$(aws iam list-policies --query "Policies[?PolicyName == '$policy_name'].[Arn]" --output text) + if [ -z "$policy_arn" ]; then + cat < /tmp/s3-csi-policy-$$.json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "MountpointFullBucketAccess", + "Effect": "Allow", + "Action": [ + "s3:ListBucket" + ], + "Resource": [ + "arn:aws:s3:::${bucket_name}" + ] + }, + { + "Sid": "MountpointFullObjectAccess", + "Effect": "Allow", + "Action": [ + "s3:GetObject", + "s3:PutObject", + "s3:AbortMultipartUpload", + "s3:DeleteObject" + ], + "Resource": [ + "arn:aws:s3:::${bucket_name}/*" + ] + } + ] +} +EOF + policy_arn=$(aws iam create-policy --policy-name "$policy_name" --policy-document file:///tmp/s3-csi-policy-$$.json --query "Policy.Arn" --output text) + rm -f /tmp/s3-csi-policy-$$.json + fi + gen3_log_info "Created or found policy with ARN: $policy_arn" + echo $policy_arn +} + +# Create the trust policy for Mountpoint for Amazon S3 CSI driver +create_s3_csi_trust_policy() { + oidc_url=$(aws eks describe-cluster --name $eks_cluster --query 'cluster.identity.oidc.issuer' --output text | sed -e 's/^https:\/\///') + trust_policy_file="/tmp/aws-s3-csi-driver-trust-policy-$$.json" + cat < ${trust_policy_file} +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "Federated": "arn:aws:iam::${account_id}:oidc-provider/${oidc_url}" + }, + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringLike": { + "${oidc_url}:aud": "sts.amazonaws.com", + "${oidc_url}:sub": "system:serviceaccount:*:s3-csi-*" + } + } + } + ] +} +EOF +} + +# Create the IAM role for Mountpoint for Amazon S3 CSI driver +create_s3_csi_role() { + role_name="AmazonEKS_S3_CSI_DriverRole-${eks_cluster}" + if ! aws iam get-role --role-name $role_name 2>/dev/null; then + aws iam create-role --role-name $role_name --assume-role-policy-document file:///tmp/aws-s3-csi-driver-trust-policy-$$.json + rm -f /tmp/aws-s3-csi-driver-trust-policy-$$.json + fi + gen3_log_info "Created or found role: $role_name" + echo $role_name +} + +# Attach the policies to the IAM role +attach_s3_csi_policies() { + role_name=$1 + policy_arn=$2 + eks_policy_name="eks-s3-csi-policy-${eks_cluster}" + gen3_log_info "Attaching S3 CSI policy with ARN: $policy_arn to role: $role_name" + eks_policy_arn=$(aws iam list-policies --query "Policies[?PolicyName == '$eks_policy_name'].Arn" --output text) + if [ -z "$eks_policy_arn" ]; then + cat < /tmp/eks-s3-csi-policy-$$.json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:ListBucket", + "s3:GetObject", + "s3:PutObject", + "s3:DeleteObject" + ], + "Resource": [ + "arn:aws:s3:::${bucket_name}", + "arn:aws:s3:::${bucket_name}/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:Decrypt", + "kms:Encrypt", + "kms:GenerateDataKey" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "eks:DescribeCluster" + ], + "Resource": "*" + } + ] +} +EOF + eks_policy_arn=$(aws iam create-policy --policy-name "$eks_policy_name" --policy-document file:///tmp/eks-s3-csi-policy-$$.json --query "Policy.Arn" --output text) + rm -f /tmp/eks-s3-csi-policy-$$.json + fi + aws iam attach-role-policy --role-name $role_name --policy-arn $policy_arn + aws iam attach-role-policy --role-name $role_name --policy-arn $eks_policy_arn +} + +# Create or update the CSI driver and its resources +setup_csi_driver() { + create_s3_csi_policy + policy_arn=$(aws iam list-policies --query "Policies[?PolicyName == 'AmazonS3CSIDriverPolicy-${eks_cluster}'].[Arn]" --output text) + create_s3_csi_trust_policy + create_s3_csi_role + role_name="AmazonEKS_S3_CSI_DriverRole-${eks_cluster}" + attach_s3_csi_policies $role_name $policy_arn + + # Install CSI driver + gen3_log_info "eks cluster name: $eks_cluster" + + # Capture the output of the command and prevent it from exiting the script + csi_driver_check=$(aws eks describe-addon --cluster-name $eks_cluster --addon-name aws-mountpoint-s3-csi-driver --query 'addon.addonName' --output text 2>&1 || true) + + if echo "$csi_driver_check" | grep -q "ResourceNotFoundException"; then + gen3_log_info "CSI driver not found, installing..." + aws eks create-addon --cluster-name $eks_cluster --addon-name aws-mountpoint-s3-csi-driver --service-account-role-arn arn:aws:iam::${account_id}:role/AmazonEKS_S3_CSI_DriverRole-${eks_cluster} + csi_status="CREATING" + retries=0 + while [ "$csi_status" != "ACTIVE" ] && [ $retries -lt 12 ]; do + gen3_log_info "Waiting for CSI driver to become active... (attempt $((retries+1)))" + sleep 10 + csi_status=$(aws eks describe-addon --cluster-name $eks_cluster --addon-name aws-mountpoint-s3-csi-driver --query 'addon.status' --output text || echo "CREATING") + retries=$((retries+1)) + done + if [ "$csi_status" == "ACTIVE" ]; then + gen3_log_info "CSI driver successfully installed and active." + else + gen3_log_error "CSI driver installation failed or not active. Current status: $csi_status" + fi + elif echo "$csi_driver_check" | grep -q "aws-mountpoint-s3-csi-driver"; then + gen3_log_info "CSI driver already exists, skipping installation." + else + gen3_log_info "Unexpected error occurred: $csi_driver_check" + exit 1 + fi +} + +setup_csi_driver diff --git a/gen3/bin/prometheus.sh b/gen3/bin/prometheus.sh index 1d71c6a7a0..d7290451ca 100644 --- a/gen3/bin/prometheus.sh +++ b/gen3/bin/prometheus.sh @@ -4,9 +4,7 @@ source "${GEN3_HOME}/gen3/lib/utils.sh" gen3_load "gen3/gen3setup" - -#export GEN3_PROMHOST="${GEN3_PROMHOST:-"http://prometheus-server.prometheus.svc.cluster.local"}" -export GEN3_PROMHOST="${GEN3_PROMHOST:-"http://prometheus-operated.monitoring.svc.cluster.local:9090"}" +export GEN3_PROMHOST="${GEN3_PROMHOST:-"https://mimir.planx-pla.net"}" gen3_prom_help() { gen3 help prometheus @@ -16,11 +14,11 @@ function gen3_prom_curl() { local urlBase="$1" shift || return 1 local hostOrKey="${1:-${GEN3_PROMHOST}}" - local urlPath="api/v1/$urlBase" + local urlPath="prometheus/api/v1/$urlBase" if [[ "$hostOrKey" =~ ^http ]]; then gen3_log_info "fetching $hostOrKey/$urlPath" - curl -s -H 'Accept: application/json' "$hostOrKey/$urlPath" + curl -s -H 'Accept: application/json' -H "X-Scope-OrgID: anonymous" "$hostOrKey/$urlPath" else gen3 api curl "$urlPath" "$hostOrKey" fi diff --git a/gen3/lib/g3k_manifest.sh b/gen3/lib/g3k_manifest.sh index ae42e84ba5..d69ef5b99a 100644 --- a/gen3/lib/g3k_manifest.sh +++ b/gen3/lib/g3k_manifest.sh @@ -253,8 +253,11 @@ g3k_manifest_filter() { kvList+=("$kvLabelKey" "tags.datadoghq.com/version: '$version'") done environment="$(g3k_config_lookup ".global.environment" "$manifestPath")" + hostname="$(g3k_config_lookup ".global.hostname" "$manifestPath")" kvEnvKey=$(echo "GEN3_ENV_LABEL" | tr '[:lower:]' '[:upper:]') + kvHostKey=$(echo "GEN3_HOSTNAME_LABEL" | tr '[:lower:]' '[:upper:]') kvList+=("$kvEnvKey" "tags.datadoghq.com/env: $environment") + kvList+=("$kvHostKey" "hostname: $hostname") for key in $(g3k_config_lookup '. | keys[]' "$manifestPath"); do gen3_log_debug "harvesting key $key" for key2 in $(g3k_config_lookup ".[\"${key}\"] "' | to_entries | map(select((.value|type != "array") and (.value|type != "object"))) | map(.key)[]' "$manifestPath" | grep '^[a-zA-Z]'); do diff --git a/gen3/test/bootstrapTest.sh b/gen3/test/bootstrapTest.sh index be3241f310..d07512d8b2 100644 --- a/gen3/test/bootstrapTest.sh +++ b/gen3/test/bootstrapTest.sh @@ -12,7 +12,7 @@ test_bootstrap_fenceconfig() { because $? "secret template exists and is valid yaml: $secretConf" [[ -f "$publicConf" ]] && yq -r . < "$secretConf" > /dev/null; because $? "public template exists and is valid yaml: $secretConf" - python3.8 "$GEN3_HOME/apis_configs/yaml_merge.py" "$publicConf" "$secretConf" | yq -r . > /dev/null; + python3.9 "$GEN3_HOME/apis_configs/yaml_merge.py" "$publicConf" "$secretConf" | yq -r . > /dev/null; because $? "yaml_perge public private should yield valid yaml" } diff --git a/gen3/test/fenceStuffTest.sh b/gen3/test/fenceStuffTest.sh index 09a0eb125e..df250a1ad3 100644 --- a/gen3/test/fenceStuffTest.sh +++ b/gen3/test/fenceStuffTest.sh @@ -17,7 +17,7 @@ EOM C: 4 B: 3 EOM - json3="$(python3.8 "$GEN3_HOME/apis_configs/yaml_merge.py" "$yaml1" "$yaml2")"; because $? "yaml_merge should succeed" + json3="$(python3.9 "$GEN3_HOME/apis_configs/yaml_merge.py" "$yaml1" "$yaml2")"; because $? "yaml_merge should succeed" [[ "1" == "$(jq -r .A <<<"$json3")" ]]; because $? ".A should be 1" /bin/rm "$yaml1" /bin/rm "$yaml2" diff --git a/kube/services/ambassador/ambassador-deploy.yaml b/kube/services/ambassador/ambassador-deploy.yaml index 8788cef13c..28e6a41fda 100644 --- a/kube/services/ambassador/ambassador-deploy.yaml +++ b/kube/services/ambassador/ambassador-deploy.yaml @@ -24,6 +24,7 @@ spec: netnolimit: "yes" userhelper: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/arborist/arborist-deploy.yaml b/kube/services/arborist/arborist-deploy.yaml index 5deef6ac7b..360c5c04ac 100644 --- a/kube/services/arborist/arborist-deploy.yaml +++ b/kube/services/arborist/arborist-deploy.yaml @@ -24,6 +24,7 @@ spec: # for revproxy authz public: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/argo-events/workflows/configmap.yaml b/kube/services/argo-events/workflows/configmap.yaml index c754c36949..f57ae07d08 100644 --- a/kube/services/argo-events/workflows/configmap.yaml +++ b/kube/services/argo-events/workflows/configmap.yaml @@ -84,11 +84,11 @@ data: purpose: workflow limits: resources: - cpu: 2000 + cpu: 4000 providerRef: name: workflow-WORKFLOW_NAME - # Kill nodes after 30 days to ensure they stay up to date - ttlSecondsUntilExpired: 2592000 + # Kill nodes after 2 days to ensure they stay up to date + ttlSecondsUntilExpired: 172800 ttlSecondsAfterEmpty: 10 nodetemplate.yaml: | @@ -97,6 +97,9 @@ data: metadata: name: workflow-WORKFLOW_NAME spec: + amiSelector: + aws::name: EKS-FIPS* + aws::owners: "143731057154" subnetSelector: karpenter.sh/discovery: ENVIRONMENT securityGroupSelector: @@ -129,22 +132,6 @@ data: sysctl -w fs.inotify.max_user_watches=12000 - sudo yum update -y - sudo yum install -y dracut-fips openssl >> /opt/fips-install.log - sudo dracut -f - # configure grub - sudo /sbin/grubby --update-kernel=ALL --args="fips=1" - - --BOUNDARY - Content-Type: text/cloud-config; charset="us-ascii" - - power_state: - delay: now - mode: reboot - message: Powering off - timeout: 2 - condition: true - --BOUNDARY-- blockDeviceMappings: - deviceName: /dev/xvda diff --git a/kube/services/argo-pod-pending-monitor/application.yaml b/kube/services/argo-pod-pending-monitor/application.yaml new file mode 100644 index 0000000000..9bfc1a7e6d --- /dev/null +++ b/kube/services/argo-pod-pending-monitor/application.yaml @@ -0,0 +1,22 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: argo-pod-pending-monitor-application + namespace: argocd +spec: + destination: + namespace: default + server: https://kubernetes.default.svc + project: default + source: + repoURL: https://github.com/uc-cdis/cloud-automation.git + targetRevision: master + path: kube/services/argo-pod-pending-monitor + directory: + exclude: "application.yaml" + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/kube/services/argo-pod-pending-monitor/argo-pod-pending.yaml b/kube/services/argo-pod-pending-monitor/argo-pod-pending.yaml new file mode 100644 index 0000000000..d3d75a84e1 --- /dev/null +++ b/kube/services/argo-pod-pending-monitor/argo-pod-pending.yaml @@ -0,0 +1,42 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: argo-pod-pending-monitor + namespace: default +spec: + schedule: "*/5 * * * *" + jobTemplate: + spec: + template: + metadata: + labels: + app: gen3job + spec: + serviceAccountName: node-monitor + containers: + - name: kubectl + image: quay.io/cdis/awshelper + env: + # This is the label we want to monitor, probably will never need to change + - name: NODE_LABEL + value: purpose=workflow + # This is in minutes + - name: SLACK_WEBHOOK_URL + valueFrom: + configMapKeyRef: + name: global + key: slack_alarm_webhook + + command: ["/bin/bash"] + args: + - "-c" + - | + #!/bin/bash + pending_pods=$(kubectl get pods -n argo -o json | jq -r '.items[] | select(.status.phase == "Pending") | {name: .metadata.name, creationTimestamp: .metadata.creationTimestamp} | select(((now - (.creationTimestamp | fromdateiso8601)) / 60) > 15) | .name') + if [[ ! -z $pending_pods ]]; then + echo "Pods $pending_pods has been around too long, sending an alert" + curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"WARNING: Pods \`${pending_pods}\` are older than 15 minutes!\"}" $SLACK_WEBHOOK_URL + else + echo "All good here!" + fi + restartPolicy: OnFailure diff --git a/kube/services/argo-wrapper/argo-wrapper-deploy.yaml b/kube/services/argo-wrapper/argo-wrapper-deploy.yaml index 89ec29ecce..3b9d1b6a2d 100644 --- a/kube/services/argo-wrapper/argo-wrapper-deploy.yaml +++ b/kube/services/argo-wrapper/argo-wrapper-deploy.yaml @@ -24,6 +24,7 @@ spec: GEN3_ENV_LABEL GEN3_ARGO-WRAPPER_VERSION GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/argo/values.yaml b/kube/services/argo/values.yaml index 011253e1ed..c1e951773b 100644 --- a/kube/services/argo/values.yaml +++ b/kube/services/argo/values.yaml @@ -1,6 +1,5 @@ controller: - parallelism: 10 - namespaceParallelism: 5 + parallelism: 7 metricsConfig: # -- Enables prometheus metrics server enabled: true @@ -62,6 +61,20 @@ controller: workflowDefaults: spec: archiveLogs: true + onExit: alert-on-timeout + templates: + - name: alert-on-timeout + script: + image: quay.io/cdis/amazonlinux-debug:master + command: [sh] + envFrom: + - secretRef: + name: slack-webhook-secret + source: | + failure_reason=$(echo {{workflow.failures}} | jq 'any(.[]; .message == "Step exceeded its deadline")' ) + if [ "$failure_reason" = "true" ]; then + curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"ALERT: Workflow {{workflow.name}} has been killed due to timeout\"}" "$SLACK_WEBHOOK_URL" + fi # -- [Node selector] nodeSelector: diff --git a/kube/services/audit-service/audit-service-deploy.yaml b/kube/services/audit-service/audit-service-deploy.yaml index 935cab408b..b7081a7f51 100644 --- a/kube/services/audit-service/audit-service-deploy.yaml +++ b/kube/services/audit-service/audit-service-deploy.yaml @@ -27,6 +27,7 @@ spec: # for network policy netnolimit: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: serviceAccountName: audit-service-sa affinity: diff --git a/kube/services/aws-es-proxy/aws-es-proxy-deploy.yaml b/kube/services/aws-es-proxy/aws-es-proxy-deploy.yaml index 34f18d9730..c7f72b4d81 100644 --- a/kube/services/aws-es-proxy/aws-es-proxy-deploy.yaml +++ b/kube/services/aws-es-proxy/aws-es-proxy-deploy.yaml @@ -21,6 +21,7 @@ spec: app: esproxy netvpc: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: nodeAffinity: diff --git a/kube/services/cedar-wrapper/cedar-wrapper-deploy.yaml b/kube/services/cedar-wrapper/cedar-wrapper-deploy.yaml index fa6b741a2a..740e18c91d 100644 --- a/kube/services/cedar-wrapper/cedar-wrapper-deploy.yaml +++ b/kube/services/cedar-wrapper/cedar-wrapper-deploy.yaml @@ -97,6 +97,36 @@ spec: secretKeyRef: name: cedar-g3auto key: "cedar_api_key.txt" + - name: GEN3_DEBUG + GEN3_DEBUG_FLAG|-value: "False"-| + - name: DD_ENABLED + valueFrom: + configMapKeyRef: + name: manifest-global + key: dd_enabled + optional: true + - name: DD_ENV + valueFrom: + fieldRef: + fieldPath: metadata.labels['tags.datadoghq.com/env'] + - name: DD_SERVICE + valueFrom: + fieldRef: + fieldPath: metadata.labels['tags.datadoghq.com/service'] + - name: DD_VERSION + valueFrom: + fieldRef: + fieldPath: metadata.labels['tags.datadoghq.com/version'] + - name: DD_LOGS_INJECTION + value: "true" + - name: DD_PROFILING_ENABLED + value: "true" + - name: DD_TRACE_SAMPLE_RATE + value: "1" + - name: DD_AGENT_HOST + valueFrom: + fieldRef: + fieldPath: status.hostIP volumeMounts: - name: "ca-volume" readOnly: true diff --git a/kube/services/cluster-level-resources/app.yaml b/kube/services/cluster-level-resources/app.yaml new file mode 100644 index 0000000000..95a2ed4c42 --- /dev/null +++ b/kube/services/cluster-level-resources/app.yaml @@ -0,0 +1,21 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: cluster-level-resources + namespace: argocd +spec: + project: default + destination: + namespace: argocd + server: https://kubernetes.default.svc + source: + repoURL: https://github.com/uc-cdis/gen3-gitops.git + targetRevision: TARGET_REVISION + path: cluster-level-resources + helm: + valueFiles: + - ../CLUSTER_NAME/cluster-values/cluster-values.yaml + releaseName: cluster-level-resources + syncPolicy: + automated: + selfHeal: true diff --git a/kube/services/dicom-server/dicom-server-deploy.yaml b/kube/services/dicom-server/dicom-server-deploy.yaml index 43bd90e5da..58040e6d45 100644 --- a/kube/services/dicom-server/dicom-server-deploy.yaml +++ b/kube/services/dicom-server/dicom-server-deploy.yaml @@ -16,6 +16,7 @@ spec: release: production public: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: nodeAffinity: diff --git a/kube/services/dicom-viewer/dicom-viewer-deploy.yaml b/kube/services/dicom-viewer/dicom-viewer-deploy.yaml index 9df6fbc930..7cd9b6bbe0 100644 --- a/kube/services/dicom-viewer/dicom-viewer-deploy.yaml +++ b/kube/services/dicom-viewer/dicom-viewer-deploy.yaml @@ -16,6 +16,7 @@ spec: release: production public: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: nodeAffinity: diff --git a/kube/services/fence/fence-deploy.yaml b/kube/services/fence/fence-deploy.yaml index 9524315d98..cf03036df4 100644 --- a/kube/services/fence/fence-deploy.yaml +++ b/kube/services/fence/fence-deploy.yaml @@ -17,6 +17,9 @@ spec: maxUnavailable: 0 template: metadata: + annotations: + prometheus.io/scrape: "true" + prometheus.io/path: /metrics labels: app: fence release: production @@ -28,6 +31,7 @@ spec: userhelper: "yes" tags.datadoghq.com/service: "fence" GEN3_ENV_LABEL + GEN3_HOSTNAME_LABEL GEN3_FENCE_VERSION GEN3_DATE_LABEL spec: diff --git a/kube/services/frontend-framework/frontend-framework-deploy.yaml b/kube/services/frontend-framework/frontend-framework-deploy.yaml index f0da277dce..7acc9f745c 100644 --- a/kube/services/frontend-framework/frontend-framework-deploy.yaml +++ b/kube/services/frontend-framework/frontend-framework-deploy.yaml @@ -18,6 +18,7 @@ spec: app: frontend-framework public: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/frontend-framework/frontend-framework-root-deploy.yaml b/kube/services/frontend-framework/frontend-framework-root-deploy.yaml index 8cad981c8b..15ca3d6a11 100644 --- a/kube/services/frontend-framework/frontend-framework-root-deploy.yaml +++ b/kube/services/frontend-framework/frontend-framework-root-deploy.yaml @@ -18,6 +18,7 @@ spec: app: frontend-framework public: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/guppy/guppy-deploy.yaml b/kube/services/guppy/guppy-deploy.yaml index c3e8d121c4..1dc6c7da00 100644 --- a/kube/services/guppy/guppy-deploy.yaml +++ b/kube/services/guppy/guppy-deploy.yaml @@ -23,6 +23,7 @@ spec: GEN3_GUPPY_VERSION GEN3_ENV_LABEL GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/hatchery/hatchery-deploy.yaml b/kube/services/hatchery/hatchery-deploy.yaml index f7de81d79b..80e64a5821 100644 --- a/kube/services/hatchery/hatchery-deploy.yaml +++ b/kube/services/hatchery/hatchery-deploy.yaml @@ -24,6 +24,7 @@ spec: GEN3_HATCHERY_VERSION GEN3_ENV_LABEL GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/indexd/indexd-deploy.yaml b/kube/services/indexd/indexd-deploy.yaml index 2390790587..af60e9b4a3 100644 --- a/kube/services/indexd/indexd-deploy.yaml +++ b/kube/services/indexd/indexd-deploy.yaml @@ -27,6 +27,7 @@ spec: GEN3_ENV_LABEL GEN3_INDEXD_VERSION GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/jenkins/jenkins-deploy.yaml b/kube/services/jenkins/jenkins-deploy.yaml index 954e996f21..596c726a0f 100644 --- a/kube/services/jenkins/jenkins-deploy.yaml +++ b/kube/services/jenkins/jenkins-deploy.yaml @@ -135,19 +135,6 @@ spec: subPath: "ca.pem" - name: dockersock mountPath: "/var/run/docker.sock" - - name: selenium - image: selenium/standalone-chrome:3.14 - ports: - - containerPort: 4444 - readinessProbe: - httpGet: - path: /wd/hub/sessions - port: 4444 - readinessProbe: - httpGet: - path: /wd/hub/sessions - port: 4444 - imagePullPolicy: Always volumes: - name: datadir persistentVolumeClaim: diff --git a/kube/services/jenkins2/jenkins2-deploy.yaml b/kube/services/jenkins2/jenkins2-deploy.yaml index 08365f811b..c54464b006 100644 --- a/kube/services/jenkins2/jenkins2-deploy.yaml +++ b/kube/services/jenkins2/jenkins2-deploy.yaml @@ -131,19 +131,6 @@ spec: subPath: "ca.pem" - name: dockersock mountPath: "/var/run/docker.sock" - - name: selenium - image: selenium/standalone-chrome:3.14 - ports: - - containerPort: 4444 - readinessProbe: - httpGet: - path: /wd/hub/sessions - port: 4444 - readinessProbe: - httpGet: - path: /wd/hub/sessions - port: 4444 - imagePullPolicy: Always volumes: - name: datadir persistentVolumeClaim: diff --git a/kube/services/jobs/hatchery-reaper-job.yaml b/kube/services/jobs/hatchery-reaper-job.yaml index 77d249e372..4f54752c98 100644 --- a/kube/services/jobs/hatchery-reaper-job.yaml +++ b/kube/services/jobs/hatchery-reaper-job.yaml @@ -110,7 +110,7 @@ spec: done # legacy reaper code - gen3_log_info "Running legacy reaper job (based on local cluster/ prometheus)" + gen3_log_info "Running legacy reaper job (based on Mimir)" if appList="$(gen3 jupyter idle none "$(gen3 db namespace)" kill)" && [[ -n "$appList" && -n "$slackWebHook" && "$slackWebHook" != "None" ]]; then curl -X POST --data-urlencode "payload={\"text\": \"hatchery-reaper in $gen3Hostname: \n\`\`\`\n${appList}\n\`\`\`\"}" "${slackWebHook}" fi diff --git a/kube/services/jobs/psql-db-aurora-migration-job.yaml b/kube/services/jobs/psql-db-aurora-migration-job.yaml new file mode 100644 index 0000000000..ca81c37e85 --- /dev/null +++ b/kube/services/jobs/psql-db-aurora-migration-job.yaml @@ -0,0 +1,219 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: psql-db-aurora-migration +spec: + template: + metadata: + labels: + app: gen3job + spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - weight: 99 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType + operator: In + values: + - ONDEMAND + serviceAccountName: psql-db-copy-sa + containers: + - name: pgdump + image: quay.io/cdis/awshelper:master + imagePullPolicy: Always + env: + - name: gen3Env + valueFrom: + configMapKeyRef: + name: global + key: environment + - name: JENKINS_HOME + value: "devterm" + - name: GEN3_HOME + value: /home/ubuntu/cloud-automation + command: [ "/bin/bash" ] + args: + - "-c" + - | + # This job migrates (takes backup and restores) the databases in a Gen3 instance to an Aurora RDS cluster. + # Requirements: + # 1. Aurora server credentials should be present in the Gen3Secrets/creds.json with name 'aurora'. + # 2. Ensure that `gen3 psql aurora` and `gen3 secrets decode aurora-creds` work as expected. + # 3. The job needs the "psql-db-copy-sa" service account with the necessary permissions to read secrets from all relevant namespaces. + + source "${GEN3_HOME}/gen3/lib/utils.sh" + gen3_load "gen3/gen3setup" + namespace=$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace) + default_databases=($(echo -e "$(gen3 db services)" | sort -r)) + date_str=$(date -u +%y%m%d_%H%M%S) + databases=("${default_databases[@]}") + gen3_log_info "databases: ${databases[@]}" + + # Initialize sheepdog_db_name and failed_migrations variables + sheepdog_db_name="" + failed_migrations="" + + # find Aurora Server credentials + aurora_host_name=$(gen3 secrets decode aurora-creds creds.json | jq -r '.db_host') + aurora_master_username=$(gen3 secrets decode aurora-creds creds.json | jq -r '.db_username') + aurora_master_password=$(gen3 secrets decode aurora-creds creds.json | jq -r '.db_password') + aurora_master_database=$(gen3 secrets decode aurora-creds creds.json | jq -r '.db_database') + + gen3_log_info "Aurora Creds: \n aurora_host_name: $aurora_host_name \n aurora_master_username: $aurora_master_username \n aurora_master_database: $aurora_master_database" + + # Verify important variables are present + if [ -z "$aurora_host_name" ] || [ -z "$aurora_master_username" ] || [ -z "$aurora_master_password" ] || [ -z "$aurora_master_database" ]; then + gen3_log_err "Aurora credentials are missing. Exiting." + exit 1 + fi + + new_resources="" + + # Function to truncate to 63 characters + function truncate_identifier() { + local identifier=$1 + if [ ${#identifier} -gt 63 ]; then + echo "${identifier:0:63}" + else + echo "$identifier" + fi + } + + # Function to create a database with retry logic + function create_database_with_retry() { + local db_name=$1 + local retries=5 + local wait_time=10 + for i in $(seq 1 $retries); do + PGPASSWORD=${db_password} psql -h $aurora_host_name -U "$db_user" -d postgres -c "CREATE DATABASE $db_name" + if [ $? -eq 0 ]; then + return 0 + fi + gen3_log_err "Failed to create database $db_name. Retrying in $wait_time seconds..." + sleep $wait_time + done + return 1 + } + + # Looping through each service to: + # - Extract the database credentials. + # - Check if the user already exists, if not, create the user. + # - Grant required privileges. + # - Create the database (except for peregrine). + # - Backup and restore the database on the Aurora Cluster. + for database in "${databases[@]}"; do + for secret_name in "${database}-creds creds.json" "$database-g3auto dbcreds.json"; do + creds=$(gen3 secrets decode $secret_name 2>/dev/null) + if [ $? -eq 0 ] && [ ! -z "$creds" ]; then + db_hostname=$(echo $creds | jq -r .db_host) + db_username=$(echo $creds | jq -r .db_username) + db_password=$(echo $creds | jq -r .db_password) + db_database=$(echo $creds | jq -r .db_database) + gen3_log_info "Extracting service credentials for $database from $secret_name: \n db_hostname: $db_hostname \n db_username: $db_username \n db_database: $db_database \n" + break + fi + done + + if [ -z "$db_hostname" ] || [ -z "$db_username" ] || [ -z "$db_password" ] || [ -z "$db_database" ]; then + gen3_log_err "Failed to extract database credentials for $database" + failed_migrations="${failed_migrations}\nDatabase: $database, Error: Failed to extract credentials" + continue + fi + + # Check source database accessibility + PGPASSWORD=${db_password} pg_isready -h $db_hostname -U "$db_username" -d "$db_database" + if [ $? -ne 0 ]; then + gen3_log_err "Cannot connect to source database $db_database at $db_hostname. Skipping database $database." + failed_migrations="${failed_migrations}\nDatabase: $database, Error: Cannot connect to source database at $db_hostname" + continue + fi + + # Define db_user and db_name variables with replaced hyphens + db_user="$(echo $database | tr '-' '_')_user_$(echo $namespace | tr '-' '_')" + db_name="$(echo $database | tr '-' '_')_$(echo $namespace | tr '-' '_')_${date_str}" + + # Truncate identifiers if necessary + db_user=$(truncate_identifier $db_user) + db_name=$(truncate_identifier $db_name) + + # Try to connect to the Aurora database with the extracted credentials. + # If the connection is successful, it means the user already exists. + # If not, create the user. + + PGPASSWORD=${db_password} psql -h $aurora_host_name -U "$db_user" -d postgres -c "\q" + if [ $? -eq 0 ]; then + gen3_log_info "User $db_user, password already exists" + else + gen3 psql aurora -c "SET password_encryption = 'scram-sha-256';CREATE USER \"$db_user\" WITH PASSWORD '$db_password' CREATEDB" + if [ $? -ne 0 ]; then + gen3_log_err "Failed to create user for $database" + failed_migrations="${failed_migrations}\nDatabase: $database, Error: Failed to create user" + continue + else + gen3_log_info "Database user $db_user created successfully" + fi + fi + + if [ "$database" != "peregrine" ]; then + # Create the database with a unique name by appending namespace and date. + create_database_with_retry $db_name + if [ $? -ne 0 ]; then + gen3_log_err "Failed to create database for $database" + failed_migrations="${failed_migrations}\nDatabase: $database, Error: Failed to create database" + continue + else + gen3_log_info "Database $db_name created successfully" + if [ "$database" == "sheepdog" ]; then + sheepdog_db_name=$db_name + fi + fi + + # Backup the current database and restore it to the newly created database. + if gen3 db backup $database | PGPASSWORD=${db_password} psql -h $aurora_host_name -U "$db_user" -d "$db_name"; then + gen3_log_info "Database $database restored successfully to $db_name" + new_resources="${new_resources}\nSource_Database: $db_database Source_Host: $db_hostname Source_User: $db_username Restored_Database: $db_name User: $db_user" + else + gen3_log_err "Failed to backup and restore database for $database" + failed_migrations="${failed_migrations}\nDatabase: $database, Error: Failed to backup and restore database" + fi + fi + + if [ "$database" == "peregrine" ]; then + if [ -n "$sheepdog_db_name" ]; then + gen3 psql aurora -d "$sheepdog_db_name" -c "GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO \"$db_user\"" + if [ $? -ne 0 ]; then + gen3_log_err "Failed to grant access to sheepdog tables for peregrine user" + failed_migrations="${failed_migrations}\nDatabase: $database, Error: Failed to grant access to sheepdog tables for peregrine user" + continue + else + gen3_log_info "Access to sheepdog tables granted successfully for peregrine user" + new_resources="${new_resources}\nUser: $db_user with access to sheepdog database $sheepdog_db_name" + fi + else + gen3_log_err "Sheepdog database not found for granting permissions to peregrine user" + failed_migrations="${failed_migrations}\nDatabase: $database, Error: Sheepdog database not found for granting permissions" + fi + fi + done + + # Logging the newly created resources + gen3_log_info "New resources created on $aurora_host_name\n$new_resources" + + # Logging the failed migrations + if [ -n "$failed_migrations" ]; then + gen3_log_info "Failed migrations:\n$failed_migrations" + fi + + # Sleep for 600 seconds to allow the user to check the logs + sleep 600 + restartPolicy: Never diff --git a/kube/services/jobs/psql-db-backup-encrypt-job.yaml b/kube/services/jobs/psql-db-backup-encrypt-job.yaml new file mode 100644 index 0000000000..914b81ffa5 --- /dev/null +++ b/kube/services/jobs/psql-db-backup-encrypt-job.yaml @@ -0,0 +1,224 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: psql-db-backup-encrypt +spec: + template: + metadata: + labels: + app: gen3job + spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - weight: 99 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType + operator: In + values: + - ONDEMAND + serviceAccountName: dbencrypt-sa + containers: + - name: pgdump + image: quay.io/cdis/awshelper:master + imagePullPolicy: Always + env: + - name: gen3Env + valueFrom: + configMapKeyRef: + name: global + key: environment + - name: JENKINS_HOME + value: "devterm" + - name: GEN3_HOME + value: /home/ubuntu/cloud-automation + command: [ "/bin/bash" ] + args: + - "-c" + - | + #!/bin/bash + + # This script takes backup of Gen3 Service databases, encrypts it, and moves it to an encrypted S3 bucket. + # Requirements: + # 1. PGP public key must be available as a variable in the script. + # 2. The job needs the necessary permissions to read secrets, config maps from the target namespace. + + source "${GEN3_HOME}/gen3/lib/utils.sh" + gen3_load "gen3/gen3setup" + + # Fetch necessary information + namespace=$(gen3 api namespace) + environment=$(gen3 api environment) + hostname=$(gen3 api hostname) + default_databases=($(gen3 db services)) + date_str=$(date -u +%y%m%d_%H%M%S) + databases=("${default_databases[@]}") + gen3_log_info "Databases to be backed up: ${databases[@]}" + + # Define backup directory structure + BACKUP_DIR="/home/ubuntu/backup" + HOST_DIR="${BACKUP_DIR}/${hostname}" + ENV_NS_DIR="${HOST_DIR}/${environment}-${namespace}" + DATE_DIR="${ENV_NS_DIR}/${date_str}" + mkdir -p "${DATE_DIR}" + + # PGP public key + PUBLIC_KEY="-----BEGIN PGP PUBLIC KEY BLOCK----- + + mQINBGar5esBEADFHTpT8IzB5Vn77Ied9O1MlsEkn+k/Qbn1giEZia+BiGSGfJqD + ebJn3B/6NeUqyfq55ADw9oNNXw+LcTZrRtZeOv8kq+mfdJ64e1Qnv9i0l6e+LXbq + An3wUvQy+djtTIpQDIdtk0UyYQcNsxkdaqjbYzbNr33mbEjD4JfsOt7qkFJRLG26 + Mc8GEJxYfK0PYX8P54LuU+jc2bq/O9VK13YJ7WYYhrRBsoAej2aRr+3KELrACCeE + RZ8G0XPBhAI96FE6dcohoVo1+m3mXTR6BBtqAIslc0tWyqk5S5YPrGsYeogOl+yq + HyVildf25/ZLFHEnfnyOYAx5ghKRisKRx8bJ2esbSVSryvnpeOMtA57Wba3y+cFn + 5W5YG+MqLG+tqWFIRMs+zLeYnZtP/F2Qdc+5CgT0rEtPI8OpilaB+GiPlRjgDM4m + mbv1XABJvho7uWco1yASrBDsaDQKgkWpVnyIETZOP+FWpK7LJvUz9l/aoSMUK9iQ + Ko1SggewM4fCyBeoSso7aZ75xQK+XkRyFeyd2DqotT/e2ZgIt//TzQ9LF61SVq+Q + hYKJsTxFedAK6Q1C5sHzzG+fFbOTrQ71vgOtKh7eT8quM9sAsCXw4YMGS2v0mSyB + kiJllrz6I54pKiXs2iXYQZLs6hDNDHH0/uEjOVGsG9y/vAdVuRr39VbVGQARAQAB + tCtQbGF0Zm9ybSBFbmdpbmVlcmluZyA8cGVAY3Rkcy51Y2hpY2Fnby5lZHU+iQJO + BBMBCgA4FiEEkqaslDgj+ReG0CykPBvbSP+i50gFAmar5esCGy8FCwkIBwIGFQoJ + CAsCBBYCAwECHgECF4AACgkQPBvbSP+i50gm7xAAwCvhBeEESHUbwd27r8YyOY1r + ZEELagJSaCMUA5W7C780y2RTxVHJ7XmVVEwMCCXnZ0u7G+GZH3k6jHuIRrYwPGgY + ehjAwmLU3uRTQDnbGtrWpLlgFZtqHSQO09sstiuuYYEniIGTt3/yGDMPsuqgguPN + pCT4IARoke3RdHAu5LQRZKaN1U0a/8qWIckCCOWLY8vkzjo/5IKoJQhel4eN3Zwn + 4qokIbDU9En+9De//rniIPMJFn26mQc9NIBW0uy6J2tNG/y3wJba3MNWL+WdCznE + yaFsTTGVzfdyCI3+HWq+fjCnrTQeYcsfPTbifpyaVdb6+FDj1yhY+hlJzLmDCMgP + OT3MD9NyWgBxuB2XqPOjo5RtA8uh3avNljRYC9k0bvuawNpGSZu7LKd2w79jAknm + Vh6TJ4+WHWx9vAozrwQ+O+8RmD0CPojsj4OQHb9lVTDd++6D7pq9o8yrBaZNCv9l + /gXk+f/3D19v0iYTlJF4OlGJyiTRfpJ27lq5Z0AuSm0SO/sc5O2tOV4fYDKUHnn9 + G+kw9+ZAdRpNS4x3W6j3sC3/Y5kKhD2fpyycHUfm2n0j2mGmXN1kQ28NU0mhJttB + OZazdgeijPXqN7+DM64iwKz9fSamc09FK7JTDgb64oAA0Py29bT9WLAMdYTNhFrE + agGOzCqb4TEjHoDIa9u5Ag0EZqvl6wEQAN1eAl7+ttfCd3NicrzwUHczsCyRyqde + HCjWRPhQ5rQ8kAmvt/b1SD/cTZM8nhLGOUBNEq9cK9ZuOS71AYvxKG30yYz0VakX + VDcHO0iAxSXqSKK1jrr9x4qqU1jw7Phy80WsVv7yA/vAsOug5ikqwAFVIEkSAltu + wk48xLqSeP0+g9fJGzEi78p+itjkhz9n+vgQEZN8OuI6yShlA2mB5Sq9ekvs4mHC + BvAFNBhTgK2szl6GUOBmoREnqf7hId7IhmhirzZxdI3v7yMrGMB0aH3++JdNHA7x + SeYN8B41RAH61kwz7JEoh4sVdfppYF7xx94numfX4YTftQGYvLIgbW4WzoE3BKAl + LSV3+1mERp06QM5zdH8zBwGRiM/ob/x+g2htyqYMG+6M1ZjMgrrNjsP5Zy80k//F + LBok3inKLNalM28WwtYdoXNnsYTOo3+UzIjtl1hfZoYgbn6LuiL0Oewga7QrOZ/P + UCZOwPdL2TgKDOqt7usdHso5i4139BOu6quBBp7ouqFSKFbWoOdffik/g0f+5UPw + +nEBN0JfpN6ACA1P6p/GzHkfYcOflumFjkpFFhB4PvHxpdBSH7T90ec+a/9XGImL + EIoeKMpCl3+yayd9u8JzLCZVYo2rgTnp/DoqoGPzv5W7DR709sAtSbxcuA4Klbzu + t9Xc9DKc6in/ABEBAAGJBGwEGAEKACAWIQSSpqyUOCP5F4bQLKQ8G9tI/6LnSAUC + Zqvl6wIbLgJACRA8G9tI/6LnSMF0IAQZAQoAHRYhBEubwQz2su3GAKUEIgZh6MFg + Klj0BQJmq+XrAAoJEAZh6MFgKlj0iHoP/0vEZkRVCkNnWQPUuq9c1tqc+9dX9VJh + Mx6EYh8tcuAbilEZpAsWT0qasM6Y87BO56X/suG+2agZfLkz9o60uBPlcHkCuttW + vrAmXaVSXWv8EEvDaaGTymSM0cEDEd+4hIaFobbeOm6FQfdp57yAI4QGDmK1bzkE + fG6bejHkI0DubR4kumHXlMiDgSLeOdUh0IbsDWl8+3wcpocNtIy8Q2V+hCuRW5l1 + Ag4I6P2qadpPlbbV4mxQzOCfn/Y2vHmpXL7FJBaCTgiYgT+uyFj91b9tbYcsVFW5 + 2vuXWpVFrDNhMzRS8Fa3GXoM3SQn9cKMDgSp9X0lyDrj8DnGjG/0o+zHB4VnC3jz + Lms56al3t0lBuG9unz0e3sFCwvwUeYIjnqU1ViosZvz3u7TrpsMdsuKHISs7ck2j + rLNbi97/vdRjTARpQCNAN5V6YIjvx53OWSMJscGvGpOGlM9GbSy1a4eZ2vKbNelN + TQDWWao6nfInvbewG2OhZrx9MzajJvF1YD85O6LpDkIFCyZLb3rjKUWtEduQrJMe + ztj/hHhl+702EXWPxHFaYySfatcAutrB+n9Z7l96gzLqt8trrsopEYNLH9rmNesL + DrDwRjN5C0W3hGIhq03kR0tq/hQeZfhvREKDzGCITi1wef41ZUSG7dkGWT7n+WCw + 1IQ6DzzALDAyzH4QAKrQ4NCM+39sV+NPi+eyAIJ94P+cerhMPZh0LEdzHlX+DSUv + UoRAHuIml2VBe9cnwgD0tHXdxvjg3XLDwufvCfOu06jEmnEHpsokd/3qYj5dJ3Nd + Q4HvLQVKCnEvtM5uOoUZYxkGxobhH8ah18eC5/YmA95V3fiYF/Jg96I//Zbq/BZY + lTO5NjQzutNrrnEsr5BDbHDbURLZ58iixWLtYIVI04FRuu2UDZa9bNvjEQuwZos3 + nzHxmJeluo91HbW+FdRFByehrAOfUhkb04xJKEBXjhOqdUeSezIGhp88pb+yhV+w + WNSsxK+uOJ9Pr1Sjz3/pr9nopVFF1kqY8iE3GYgiYpu3p2A1zGUxlaoHQCZ/aT08 + whGzEsGkgQGOGX3pseKaYIVbxMNbfRGsJCKjdukQbuy5Gz/ffAm8vvf7JfPWmVUO + G+zU9L9ZIHZKlQ76PQTA1mEWa3akU6vVScDbNUiObCNZPQJdj6V6HpVAlo/sOXOt + 1RaIB2Oz5ViwAOJFYxO/PomcXiMOThnkF7op8R2I4cVoYlKnxK0VUoluNX9fiH5D + aI9PgmA2NVbQ/LqP+rP3hLbFSlh0nXjt4NxCbE14ApSslsoEaqilNgtL9UcIzkBE + 3lNYclZLeQk5SLPsohmsXoYJ6W8G1XopvZ/cG417GQ4N7FOr9VRBXimRX71O + =/4uP + -----END PGP PUBLIC KEY BLOCK-----" + + # Import the public key + echo "$PUBLIC_KEY" | gpg --import + + # Function to encrypt a database with PGP public key + function encrypt_database() { + local db_name=$1 + gpg --yes --trust-model always --output "${DATE_DIR}/${db_name}.sql.gpg" --encrypt --recipient pe@ctds.uchicago.edu "${DATE_DIR}/${db_name}.sql" + + if [ $? -eq 0 ]; then + rm "${DATE_DIR}/${db_name}.sql" + gen3_log_info "Successfully encrypted and removed the original file for database $db_name. \n" + return 0 + fi + gen3_log_err "Failed to encrypt database $db_name.\n" + return 1 + } + + # Loop through each service to back up and encrypt the database + for database in "${databases[@]}"; do + for secret_name in "${database}-creds creds.json" "$database-g3auto dbcreds.json"; do + creds=$(gen3 secrets decode $secret_name 2>/dev/null) + # Extracting service credentials + if [ $? -eq 0 ] && [ ! -z "$creds" ]; then + db_hostname=$(echo $creds | jq -r .db_host) + db_username=$(echo $creds | jq -r .db_username) + db_password=$(echo $creds | jq -r .db_password) + db_database=$(echo $creds | jq -r .db_database) + gen3_log_info "Extracting service credentials for $database from $secret_name:\n db_hostname: $db_hostname\n db_username: $db_username\n db_database: $db_database\n" + break + fi + done + + # Verify credentials are extracted + if [ -z "$db_hostname" ] || [ -z "$db_username" ] || [ -z "$db_password" ] || [ -z "$db_database" ]; then + gen3_log_err "Failed to extract database credentials for $database" + failed_backups="${failed_backups}\nDatabase: $database, Error: Failed to extract credentials" + continue + fi + + # Check database accessibility + PGPASSWORD=${db_password} pg_isready -h $db_hostname -U "$db_username" -d "$db_database" + if [ $? -ne 0 ]; then + gen3_log_err "Cannot connect to source database $db_database at $db_hostname. Skipping database $database." + failed_backups="${failed_backups}\nDatabase: $database, Error: Cannot connect to source database at $db_hostname" + continue + fi + + if [ "$database" != "peregrine" ]; then + # Backup the current database + if PGPASSWORD=${db_password} pg_dump -h $db_hostname -U "$db_username" -d "$db_database" > "${DATE_DIR}/${db_database}.sql"; then + gen3_log_info "Database $database backed up to ${DATE_DIR}/${db_database}.sql" + if encrypt_database "$db_database"; then + backedup_databases="${backedup_databases}\nDatabase: $db_database" + else + failed_backups="${failed_backups}\nDatabase: $database, Error: Failed to encrypt database" + fi + else + gen3_log_err "Failed to backup $database" + failed_backups="${failed_backups}\nDatabase: $database, Error: Failed to backup database" + fi + fi + done + + # Logging the successful backups + if [ -n "$backedup_databases" ]; then + gen3_log_info "Successfully backed up and encrypted databases:\n$backedup_databases" + fi + + # Logging the failed backups + if [ -n "$failed_backups" ]; then + gen3_log_info "Failed backups:\n$failed_backups" + fi + + # Sleep for 600 seconds to allow the user to check the logs + sleep 600 + volumeMounts: + - mountPath: "/home/ubuntu/backup" + name: s3-volume + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + volumes: + - name: s3-volume + persistentVolumeClaim: + claimName: s3-pvc-db-backups + restartPolicy: Never + diff --git a/kube/services/jobs/psql-db-copy-aurora-job.yaml b/kube/services/jobs/psql-db-copy-aurora-job.yaml new file mode 100644 index 0000000000..8fd6e899aa --- /dev/null +++ b/kube/services/jobs/psql-db-copy-aurora-job.yaml @@ -0,0 +1,193 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: psql-db-copy-aurora +spec: + template: + metadata: + labels: + app: gen3job + spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + - weight: 99 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType + operator: In + values: + - ONDEMAND + serviceAccountName: psql-db-copy-sa + containers: + - name: pgdump + image: quay.io/cdis/awshelper:master + imagePullPolicy: Always + env: + - name: gen3Env + valueFrom: + configMapKeyRef: + name: global + key: environment + - name: JENKINS_HOME + value: "devterm" + - name: GEN3_HOME + value: /home/ubuntu/cloud-automation + - name: SOURCE_NAMESPACE + GEN3_SOURCE_NAMESPACE|-value: "staging"-| # Default value, should be overwritten by the environment variable + command: [ "/bin/bash" ] + args: + - "-c" + - | + # This script copies specified databases from a source namespace to the current namespace on the same Aurora RDS instance. + # + # This script requires the following to work properly: + # + # 1. Aurora server credentials must be present in the Gen3Secrets/creds.json file. + # These credentials should be present as a Kubernetes secret named "aurora-creds". + # This secret should contain the keys: db_host, db_username, db_password, and db_database. + # + # 2. The "gen3 psql aurora" command should be available to connect to the Aurora server. + # + # 3. The "gen3 secrets decode aurora-creds creds.json" command should work, allowing the script to decode the necessary secrets. + # + # 4. The source and the destination databases should be on the same Aurora instance. + # + # 5. The ServiceAccount, roles, and role binding must be set up using the script psql-db-copy-aurora-sa.yaml. + # The psql-db-copy-aurora-sa.yaml script is configured for the default namespace. + # Modify the namespace as needed before applying it where the script will run. + # These can be created by executing the command: + # kubectl apply -f ${GEN3_HOME}/kube/services/jobs/psql-db-copy-aurora-sa.yaml + # + # How to run the script: + # gen3 job run psql-db-copy-aurora -v SOURCE_NAMESPACE + # + + source "${GEN3_HOME}/gen3/lib/utils.sh" + gen3_load "gen3/gen3setup" + namespace=$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace) + date_str=$(date -u +%y%m%d_%H%M%S) + # Define the default databases to be copied + databases=( "indexd" "sheepdog" "metadata") + gen3_log_info "databases to be processed: ${databases[@]}" + source_namespace=$SOURCE_NAMESPACE + gen3_log_info "Source Namespace: $source_namespace" + + # find Aurora Server credentials + aurora_host_name=$(gen3 secrets decode aurora-creds creds.json | jq -r '.db_host') + aurora_master_username=$(gen3 secrets decode aurora-creds creds.json | jq -r '.db_username') + aurora_master_password=$(gen3 secrets decode aurora-creds creds.json | jq -r '.db_password') + aurora_database=$(gen3 secrets decode aurora-creds creds.json | jq -r '.db_database') + + # Verify important variables are present + if [ -z "$aurora_host_name" ] || [ -z "$aurora_master_username" ] || [ -z "$aurora_master_password" ] || [ -z "$aurora_database" ]; then + gen3_log_err "Aurora credentials are missing. Exiting." + exit 1 + fi + + # Function to truncate to 63 characters + function truncate_identifier() { + local identifier=$1 + if [ ${#identifier} -gt 63 ]; then + echo "${identifier:0:63}" + else + echo "$identifier" + fi + } + + # Function to decode Kubernetes secrets + function secrets_decode() { + local namespace=$1 + local secret=$2 + local key=$3 + local secrets_value + + secrets_value=$(kubectl get secret -n $namespace $secret -o json 2>/dev/null | jq -r --arg key "$key" '.data[$key]' | base64 --decode --ignore-garbage 2>/dev/null) + if [ $? -ne 0 ] || [ -z "$secrets_value" ]; then + echo "Secret $secret in namespace $namespace not found or failed to decode" >&2 + return 1 + else + echo "$secrets_value" + fi + } + + # Array to hold the names of newly created databases + new_databases=() + + # Looping through each database + for database in "${databases[@]}"; do + source_creds="" + creds="" + + # Try to get the source and destination credentials with the "-g3auto" suffix and key "dbcreds.json" + source_creds=$(secrets_decode $source_namespace ${database}-g3auto dbcreds.json) + if [ $? -ne 0 ]; then + source_creds="" + fi + creds=$(secrets_decode $namespace ${database}-g3auto dbcreds.json) + if [ $? -ne 0 ]; then + creds="" + fi + + # If the "-g3auto" suffix didn't work for both source_creds and creds, try with the suffix "creds" and key "creds.json" + if [ -z "$source_creds" ] && [ -z "$creds" ]; then + source_creds=$(secrets_decode $source_namespace ${database}-creds creds.json) + if [ $? -ne 0 ]; then + source_creds="" + fi + creds=$(secrets_decode $namespace ${database}-creds creds.json) + if [ $? -ne 0 ]; then + creds="" + fi + fi + + # If we still couldn't get the credentials, log an error and continue to the next database + if [ -z "$source_creds" ] || [ -z "$creds" ]; then + gen3_log_err "Failed to extract database credentials for $database" + continue + fi + + source_db_database=$(echo $source_creds | jq -r .db_database) + db_username=$(echo $creds | jq -r .db_username) + db_database=$(echo $creds | jq -r .db_database) + + if [ -z "$source_db_database" ] || [ -z "$db_username" ] || [ -z "$db_database" ]; then + gen3_log_err "One or more required credentials are missing for $database. Skipping." + continue + fi + target_db=$(truncate_identifier $(echo "${database}_${namespace}_${date_str}" | tr '-' '_')) + gen3_log_info "Processing database: $database" + gen3_log_info "Source DB: $source_db_database, Username: $db_username, Current DB: $db_database, Target DB: $target_db" + + # DB commands + gen3 psql aurora -c "GRANT $db_username TO $aurora_master_username" + gen3 psql aurora -c "SELECT pg_terminate_backend(pg_stat_activity.pid) FROM pg_stat_activity WHERE pg_stat_activity.datname = '$source_db_database' AND pid <> pg_backend_pid()" + gen3 psql aurora -c "CREATE DATABASE $target_db WITH TEMPLATE $source_db_database OWNER $db_username" + pg_command="DO \$\$ DECLARE tbl record; BEGIN FOR tbl IN (SELECT table_schema || '.' || table_name AS full_table_name FROM information_schema.tables WHERE table_schema = 'public') LOOP EXECUTE 'ALTER TABLE ' || tbl.full_table_name || ' OWNER TO $db_username;'; END LOOP; END \$\$;" + PGPASSWORD=${aurora_master_password} psql -h $aurora_host_name -U $aurora_master_username -d "$target_db" -c "$pg_command" + if [ $? -eq 0 ]; then + gen3_log_info "Successfully processed $database" + new_databases+=("$target_db") + else + gen3_log_err "Failed to process $database" + fi + done + + gen3_log_info "Job Completed" + + # Print the list of newly created databases + gen3_log_info "Newly created Database Names::" + for new_db in "${new_databases[@]}"; do + gen3_log_info "$new_db" + done + + sleep 600 + restartPolicy: Never diff --git a/kube/services/jobs/psql-db-copy-aurora-sa.yaml b/kube/services/jobs/psql-db-copy-aurora-sa.yaml new file mode 100644 index 0000000000..e6977a187f --- /dev/null +++ b/kube/services/jobs/psql-db-copy-aurora-sa.yaml @@ -0,0 +1,30 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: psql-db-copy-sa + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: psql-db-copy-role +rules: +- apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "watch", "list"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: psql-db-copy-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: psql-db-copy-role +subjects: +- kind: ServiceAccount + name: psql-db-copy-sa + namespace: default # Ensure this references the correct namespace + diff --git a/kube/services/karpenter/nodeTemplateDefault.yaml b/kube/services/karpenter/nodeTemplateDefault.yaml index 6ba8b3a0f7..fbb7831351 100644 --- a/kube/services/karpenter/nodeTemplateDefault.yaml +++ b/kube/services/karpenter/nodeTemplateDefault.yaml @@ -3,6 +3,9 @@ kind: AWSNodeTemplate metadata: name: default spec: + amiSelector: + aws::name: EKS-FIPS* + aws::owners: "143731057154" subnetSelector: karpenter.sh/discovery: VPC_NAME securityGroupSelector: @@ -32,30 +35,12 @@ spec: sysctl -w fs.inotify.max_user_watches=12000 - sudo yum update -y - sudo yum install -y dracut-fips openssl >> /opt/fips-install.log - sudo dracut -f - # configure grub - sudo /sbin/grubby --update-kernel=ALL --args="fips=1" - # --BOUNDARY # Content-Type: text/cloud-config; charset="us-ascii" # mounts: # - ['fstype': 'bpf', 'mountpoint': '/sys/fs/bpf', 'opts': 'rw,relatime'] - --BOUNDARY - - Content-Type: text/cloud-config; charset="us-ascii" - - power_state: - delay: now - mode: reboot - message: Powering off - timeout: 2 - condition: true - - --BOUNDARY-- blockDeviceMappings: - deviceName: /dev/xvda diff --git a/kube/services/karpenter/provisionerDefault.yaml b/kube/services/karpenter/provisionerDefault.yaml index ac08284ce1..f92a5e383e 100644 --- a/kube/services/karpenter/provisionerDefault.yaml +++ b/kube/services/karpenter/provisionerDefault.yaml @@ -11,14 +11,14 @@ spec: - key: kubernetes.io/arch operator: In values: - - amd64 + - amd64 - key: karpenter.k8s.aws/instance-category operator: In values: - - c - - m - - r - - t + - c + - m + - r + - t # Set a limit of 1000 vcpus limits: resources: @@ -30,6 +30,4 @@ spec: consolidation: enabled: true # Kill nodes after 30 days to ensure they stay up to date - ttlSecondsUntilExpired: 2592000 - - + ttlSecondsUntilExpired: 604800 diff --git a/kube/services/manifestservice/manifestservice-deploy.yaml b/kube/services/manifestservice/manifestservice-deploy.yaml index 0966f24803..3551cfa66c 100644 --- a/kube/services/manifestservice/manifestservice-deploy.yaml +++ b/kube/services/manifestservice/manifestservice-deploy.yaml @@ -24,6 +24,7 @@ spec: userhelper: "yes" netvpc: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: serviceAccountName: manifestservice-sa affinity: diff --git a/kube/services/metadata/metadata-deploy.yaml b/kube/services/metadata/metadata-deploy.yaml index 72986e7958..68a83078e6 100644 --- a/kube/services/metadata/metadata-deploy.yaml +++ b/kube/services/metadata/metadata-deploy.yaml @@ -25,6 +25,7 @@ spec: # for network policy netnolimit: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/node-monitors/argo-monitors/argo-node-age.yaml b/kube/services/node-monitors/argo-monitors/argo-node-age.yaml index 890495ee00..7a60a32ce4 100644 --- a/kube/services/node-monitors/argo-monitors/argo-node-age.yaml +++ b/kube/services/node-monitors/argo-monitors/argo-node-age.yaml @@ -5,8 +5,11 @@ metadata: namespace: default spec: schedule: "*/5 * * * *" + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 1 jobTemplate: spec: + backoffLimit: 4 template: metadata: labels: @@ -27,7 +30,7 @@ spec: valueFrom: configMapKeyRef: name: global - key: slack_webhook + key: slack_alarm_webhook command: ["/bin/bash"] args: @@ -55,4 +58,4 @@ spec: curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"WARNING: Node \`${NODE_NAME}\` is older than 3 hours!\"}" $SLACK_WEBHOOK_URL fi done - restartPolicy: OnFailure \ No newline at end of file + restartPolicy: OnFailure diff --git a/kube/services/node-monitors/fenceshib-jenkins-test.yaml b/kube/services/node-monitors/fenceshib-jenkins-test.yaml index e9e27af983..deaf26b3e2 100644 --- a/kube/services/node-monitors/fenceshib-jenkins-test.yaml +++ b/kube/services/node-monitors/fenceshib-jenkins-test.yaml @@ -5,8 +5,11 @@ metadata: namespace: default spec: schedule: "0 */4 * * *" + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 1 jobTemplate: spec: + backoffLimit: 4 template: metadata: labels: diff --git a/kube/services/node-monitors/node-not-ready.yaml b/kube/services/node-monitors/node-not-ready.yaml index 500832fc34..709dfc79e4 100644 --- a/kube/services/node-monitors/node-not-ready.yaml +++ b/kube/services/node-monitors/node-not-ready.yaml @@ -5,8 +5,11 @@ metadata: namespace: default spec: schedule: "*/30 * * * *" + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 1 jobTemplate: spec: + backoffLimit: 4 template: metadata: labels: @@ -21,7 +24,7 @@ spec: valueFrom: configMapKeyRef: name: global - key: slack_webhook + key: slack_alarm_webhook - name: ENVIRONMENT valueFrom: configMapKeyRef: diff --git a/kube/services/ohdsi-atlas/ohdsi-atlas-config-local.yaml b/kube/services/ohdsi-atlas/ohdsi-atlas-config-local.yaml index 6b724eb2d7..7c686df913 100644 --- a/kube/services/ohdsi-atlas/ohdsi-atlas-config-local.yaml +++ b/kube/services/ohdsi-atlas/ohdsi-atlas-config-local.yaml @@ -29,6 +29,7 @@ data: configLocal.cohortComparisonResultsEnabled = false; configLocal.userAuthenticationEnabled = true; configLocal.plpResultsEnabled = false; + configLocal.refreshTokenThreshold = 1000 * 60 * 4; // refresh auth token if it will expire within 4 minutes return configLocal; }); diff --git a/kube/services/ohdsi-webapi/ohdsi-webapi-config.yaml b/kube/services/ohdsi-webapi/ohdsi-webapi-config.yaml index 8eb01ec08f..a5d0972eb8 100644 --- a/kube/services/ohdsi-webapi/ohdsi-webapi-config.yaml +++ b/kube/services/ohdsi-webapi/ohdsi-webapi-config.yaml @@ -22,7 +22,7 @@ stringData: security_cors_enabled: "true" security_origin: "*" - security_token_expiration: "43200" + security_token_expiration: "900" security_ssl_enabled: "false" security_provider: AtlasRegularSecurity diff --git a/kube/services/peregrine/peregrine-deploy.yaml b/kube/services/peregrine/peregrine-deploy.yaml index 20bba64adf..6467fe3251 100644 --- a/kube/services/peregrine/peregrine-deploy.yaml +++ b/kube/services/peregrine/peregrine-deploy.yaml @@ -29,6 +29,7 @@ spec: GEN3_ENV_LABEL GEN3_PEREGRINE_VERSION GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/portal/portal-deploy.yaml b/kube/services/portal/portal-deploy.yaml index 742f1b71cb..fb1aa2d6f1 100644 --- a/kube/services/portal/portal-deploy.yaml +++ b/kube/services/portal/portal-deploy.yaml @@ -19,6 +19,7 @@ spec: app: portal public: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/portal/portal-root-deploy.yaml b/kube/services/portal/portal-root-deploy.yaml index f639a1e150..28ac8fc310 100644 --- a/kube/services/portal/portal-root-deploy.yaml +++ b/kube/services/portal/portal-root-deploy.yaml @@ -19,6 +19,7 @@ spec: app: portal public: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/presigned-url-fence/presigned-url-fence-deploy.yaml b/kube/services/presigned-url-fence/presigned-url-fence-deploy.yaml index 45e6daaeac..375f424ed8 100644 --- a/kube/services/presigned-url-fence/presigned-url-fence-deploy.yaml +++ b/kube/services/presigned-url-fence/presigned-url-fence-deploy.yaml @@ -30,6 +30,10 @@ spec: GEN3_ENV_LABEL GEN3_FENCE_VERSION GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL + annotations: + prometheus.io/scrape: "true" + prometheus.io/path: /metrics spec: serviceAccountName: fence-sa affinity: diff --git a/kube/services/requestor/requestor-deploy.yaml b/kube/services/requestor/requestor-deploy.yaml index fb5ce173f7..2ed8866384 100644 --- a/kube/services/requestor/requestor-deploy.yaml +++ b/kube/services/requestor/requestor-deploy.yaml @@ -25,6 +25,7 @@ spec: # for network policy netnolimit: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/revproxy/revproxy-deploy.yaml b/kube/services/revproxy/revproxy-deploy.yaml index 7ea798b778..80fd582e0b 100644 --- a/kube/services/revproxy/revproxy-deploy.yaml +++ b/kube/services/revproxy/revproxy-deploy.yaml @@ -23,6 +23,7 @@ spec: userhelper: "yes" internet: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/sheepdog/sheepdog-deploy.yaml b/kube/services/sheepdog/sheepdog-deploy.yaml index a260c87411..2f476d0f0a 100644 --- a/kube/services/sheepdog/sheepdog-deploy.yaml +++ b/kube/services/sheepdog/sheepdog-deploy.yaml @@ -27,6 +27,7 @@ spec: GEN3_ENV_LABEL GEN3_SHEEPDOG_VERSION GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/sower/sower-deploy.yaml b/kube/services/sower/sower-deploy.yaml index b66739d068..2c4e5f610c 100644 --- a/kube/services/sower/sower-deploy.yaml +++ b/kube/services/sower/sower-deploy.yaml @@ -22,6 +22,7 @@ spec: public: "yes" netnolimit: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: diff --git a/kube/services/ssjdispatcher/ssjdispatcher-deploy.yaml b/kube/services/ssjdispatcher/ssjdispatcher-deploy.yaml index 554c60cb5e..990f583cb0 100644 --- a/kube/services/ssjdispatcher/ssjdispatcher-deploy.yaml +++ b/kube/services/ssjdispatcher/ssjdispatcher-deploy.yaml @@ -21,6 +21,7 @@ spec: netnolimit: "yes" public: "yes" GEN3_DATE_LABEL + GEN3_HOSTNAME_LABEL spec: serviceAccountName: ssjdispatcher-service-account securityContext: diff --git a/kube/services/workflow-age-monitor/argo-workflow-age.yaml b/kube/services/workflow-age-monitor/argo-workflow-age.yaml index 0d0c29115b..52910ad4a1 100644 --- a/kube/services/workflow-age-monitor/argo-workflow-age.yaml +++ b/kube/services/workflow-age-monitor/argo-workflow-age.yaml @@ -24,7 +24,7 @@ spec: valueFrom: configMapKeyRef: name: global - key: slack_webhook + key: slack_alarm_webhook command: ["/bin/bash"] args: @@ -32,24 +32,30 @@ spec: - | #!/bin/bash # Get all workflows with specific label and check their age - kubectl get workflows --all-namespaces -o json | jq -c '.items[] | {name: .metadata.name, creationTimestamp: .metadata.creationTimestamp}' | while read workflow_info; do + kubectl get workflows --all-namespaces -o json | jq -c '.items[] | {name: .metadata.name, startedTimestamp: .status.startedAt}' | while read workflow_info; do WORKFLOW_NAME=$(echo $workflow_info | jq -r '.name') - CREATION_TIMESTAMP=$(echo $workflow_info | jq -r '.creationTimestamp') + STARTED_TIMESTAMP=$(echo $workflow_info | jq -r '.startedTimestamp') - # Convert creation timestamp to Unix Epoch time - CREATION_EPOCH=$(date -d "$CREATION_TIMESTAMP" +%s) + echo "Checking workflow $WORKFLOW_NAME" + echo "$STARTED_TIMESTAMP" - # Get current Unix Epoch time - CURRENT_EPOCH=$(date +%s) + if [ "$STARTED_TIMESTAMP" != "null" ]; then + echo "Workflow $WORKFLOW_NAME started at $STARTED_TIMESTAMP" + # Convert creation timestamp to Unix Epoch time + CREATION_EPOCH=$(date -d "$STARTED_TIMESTAMP" +%s) - # Calculate workflow age in seconds - WORKFLOW_AGE=$(($CURRENT_EPOCH - $CREATION_EPOCH)) + # Get current Unix Epoch time + CURRENT_EPOCH=$(date +%s) - # Check if workflow age is greater than threshold - if [ "$WORKFLOW_AGE" -gt "$THRESHOLD_TIME" ]; then - echo "Workflow $WORKFLOW_NAME has been running for over $THRESHOLD_TIME seconds, sending an alert" - # Send alert to Slack - curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"WARNING: Workflow \`${WORKFLOW_NAME}\` has been running longer than $THRESHOLD_TIME seconds\"}" $SLACK_WEBHOOK_URL + # Calculate workflow age in seconds + WORKFLOW_AGE=$(($CURRENT_EPOCH - $CREATION_EPOCH)) + + # Check if workflow age is greater than threshold + if [ "$WORKFLOW_AGE" -gt "$THRESHOLD_TIME" ]; then + echo "Workflow $WORKFLOW_NAME has been running for over $THRESHOLD_TIME seconds, sending an alert" + # Send alert to Slack + curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"WARNING: Workflow \`${WORKFLOW_NAME}\` has been running longer than $THRESHOLD_TIME seconds\"}" $SLACK_WEBHOOK_URL + fi fi done restartPolicy: OnFailure diff --git a/kube/services/wts/wts-deploy.yaml b/kube/services/wts/wts-deploy.yaml index e54a9cfc4f..06f43fe01c 100644 --- a/kube/services/wts/wts-deploy.yaml +++ b/kube/services/wts/wts-deploy.yaml @@ -29,6 +29,7 @@ spec: GEN3_DATE_LABEL GEN3_WTS_VERSION GEN3_ENV_LABEL + GEN3_HOSTNAME_LABEL spec: affinity: podAntiAffinity: