Please check back later.
-
+
diff --git a/files/scripts/ecr-access-job-requirements.txt b/files/scripts/ecr-access-job-requirements.txt
new file mode 100644
index 000000000..bb6d4b847
--- /dev/null
+++ b/files/scripts/ecr-access-job-requirements.txt
@@ -0,0 +1 @@
+boto3<2
diff --git a/files/scripts/ecr-access-job.md b/files/scripts/ecr-access-job.md
new file mode 100644
index 000000000..9659b186b
--- /dev/null
+++ b/files/scripts/ecr-access-job.md
@@ -0,0 +1,85 @@
+# ecr-access-job
+
+### How to run
+
+Configure `global.ecr-access-job-role-arn` to the ARN of the `EcrRepoPolicyUpdateRole` role (described below) in the `manifest.json` file.
+
+Run `gen3 kube-setup-ecr-access-cronjob` to set up the ECR access cronjob.
+
+### What does it do?
+
+The job runs the `ecr-access-job.py` script.
+
+This script updates the configuration of ECR repositories so that users can access the repositories that were created for them.
+
+It queries a DynamoDB table which has the following (simplified) structure:
+| user_id | workspace_type | account_id |
+| ------------------ | -------------------- | ---------- |
+| user1@username.com | Direct Pay | 123456 |
+| user2@username.com | Direct Pay | 789012 |
+| user1@username.com | Other workspace type | |
+
+and then allows each AWS account to acccess the appropriate ECR repositories. The users' ECR repositories are based on their username as stored in the table. For example, `user1@username.com`'s ECR repository is assumed to be `nextflow-approved/user1-40username-2ecom`.
+
+### Access needed
+
+- "EcrRepoPolicyUpdateRole" role in the account (Acct1) that contains the ECR repositories:
+
+**Note:** `kube-setup-ecr-access-cronjob.sh` assumes this role already exists.
+
+Permissions:
+```
+{
+ "Version": "2012-10-17",
+ "Statement": [
+ {
+ "Sid": "UpdateEcrRepoPolicy",
+ "Effect": "Allow",
+ "Action": "ecr:SetRepositoryPolicy",
+ "Resource": "arn:aws:ecr:us-east-1::repository/nextflow-approved/*"
+ }
+ ]
+}
+```
+
+Trust policy (allows Acct2):
+```
+{
+ "Version": "2012-10-17",
+ "Statement": [
+ {
+ "Sid": "AllowAssumingRole",
+ "Effect": "Allow",
+ "Principal": {
+ "AWS": "arn:aws:iam:::root"
+ },
+ "Action": "sts:AssumeRole"
+ }
+ ]
+}
+```
+
+- Policy in the account (Acct2) that contains the DynamoDB table (created automatically by `kube-setup-ecr-access-job.sh`):
+```
+{
+ "Version": "2012-10-17",
+ "Statement": [
+ {
+ "Sid": "ReadDynamoDB",
+ "Effect": "Allow",
+ "Action": [
+ "dynamodb:Scan"
+ ],
+ "Resource": "arn:aws:dynamodb:::table/
"
+ },
+ {
+ "Sid": "AssumeEcrRole",
+ "Effect": "Allow",
+ "Action": [
+ "sts:AssumeRole"
+ ],
+ "Resource": "arn:aws:iam:::role/"
+ }
+ ]
+}
+```
diff --git a/files/scripts/ecr-access-job.py b/files/scripts/ecr-access-job.py
new file mode 100644
index 000000000..828d94c96
--- /dev/null
+++ b/files/scripts/ecr-access-job.py
@@ -0,0 +1,177 @@
+"""
+See documentation at https://github.com/uc-cdis/cloud-automation/blob/master/files/scripts/ecr-access-job.md
+"""
+
+from decimal import Decimal
+import json
+import os
+from typing import List
+import uuid
+
+import boto3
+from boto3.dynamodb.conditions import Attr
+
+
+REGION = "us-east-1"
+
+# for local testing. in production, use a service account instead of a key.
+MAIN_ACCOUNT_CREDS = {"key_id": os.environ.get("KEY_ID"), "key_secret": os.environ.get("KEY_SECRET")}
+
+
+def escapism(string: str) -> str:
+ """
+ This is a direct translation of Hatchery's `escapism` golang function to python.
+ We need to escape the username in the same way it's escaped by Hatchery's `escapism` function because
+ special chars cannot be used in an ECR repo name, and so that the ECR repo generated here matches the
+ name expected by Hatchery.
+ """
+ safeBytes = "abcdefghijklmnopqrstuvwxyz0123456789"
+ escaped = ""
+ for v in string:
+ if v not in safeBytes:
+ hexCode = "{0:02x}".format(ord(v))
+ escaped += "-" + hexCode
+ else:
+ escaped += v
+ return escaped
+
+
+def get_configs() -> (str, str):
+ table_name = os.environ.get("PAY_MODELS_DYNAMODB_TABLE")
+ if not table_name:
+ raise Exception("Missing 'PAY_MODELS_DYNAMODB_TABLE' environment variable")
+
+ ecr_role_arn = os.environ.get("ECR_ACCESS_JOB_ARN")
+ if not ecr_role_arn:
+ raise Exception("Missing 'ECR_ACCESS_JOB_ARN' environment variable")
+
+ return table_name, ecr_role_arn
+
+
+def query_usernames_and_account_ids(table_name: str) -> List[dict]:
+ """
+ Returns:
+ List[dict]: [ { "user_id": "user1@username.com", "account_id": "123456" } ]
+ """
+ if MAIN_ACCOUNT_CREDS["key_id"]:
+ session = boto3.Session(
+ aws_access_key_id=MAIN_ACCOUNT_CREDS["key_id"],
+ aws_secret_access_key=MAIN_ACCOUNT_CREDS["key_secret"],
+ )
+ else:
+ session = boto3.Session()
+ dynamodb = session.resource("dynamodb", region_name=REGION)
+ table = dynamodb.Table(table_name)
+
+ # get usernames and AWS account IDs from DynamoDB
+ queried_keys = ["user_id", "account_id"]
+ filter_expr = Attr("workspace_type").eq("Direct Pay")
+ proj = ", ".join("#" + key for key in queried_keys)
+ expr = {"#" + key: key for key in queried_keys}
+ response = table.scan(
+ FilterExpression=filter_expr,
+ ProjectionExpression=proj,
+ ExpressionAttributeNames=expr,
+ )
+ assert response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 200, response
+ items = response["Items"]
+ # if the response is paginated, get the rest of the items
+ while response["Count"] > 0:
+ if "LastEvaluatedKey" not in response:
+ break
+ response = table.scan(
+ FilterExpression=filter_expr,
+ ProjectionExpression=proj,
+ ExpressionAttributeNames=expr,
+ ExclusiveStartKey=response["LastEvaluatedKey"],
+ )
+ assert (
+ response.get("ResponseMetadata", {}).get("HTTPStatusCode") == 200
+ ), response
+ items.extend(response["Items"])
+
+ return items
+
+
+def update_access_in_ecr(repo_to_account_ids: List[dict], ecr_role_arn: str) -> None:
+ # get access to ECR in the account that contains the ECR repos
+ if MAIN_ACCOUNT_CREDS["key_id"]:
+ sts = boto3.client(
+ "sts",
+ aws_access_key_id=MAIN_ACCOUNT_CREDS["key_id"],
+ aws_secret_access_key=MAIN_ACCOUNT_CREDS["key_secret"],
+ )
+ else:
+ sts = boto3.client("sts")
+ assumed_role = sts.assume_role(
+ RoleArn=ecr_role_arn,
+ DurationSeconds=900, # minimum time for aws assume role as per boto docs
+ RoleSessionName=f"ecr-access-assume-role-{str(uuid.uuid4())[:8]}",
+ )
+ assert "Credentials" in assumed_role, "Unable to assume role"
+ ecr = boto3.client(
+ "ecr",
+ aws_access_key_id=assumed_role["Credentials"]["AccessKeyId"],
+ aws_secret_access_key=assumed_role["Credentials"]["SecretAccessKey"],
+ aws_session_token=assumed_role["Credentials"]["SessionToken"],
+ )
+
+ # for each ECR repo, whitelist the account IDs so users can access the repo
+ for repo, account_ids in repo_to_account_ids.items():
+ print(f"Allowing AWS accounts {account_ids} to use ECR repository '{repo}'")
+ policy = {
+ "Version": "2008-10-17",
+ "Statement": [
+ {
+ "Sid": "AllowCrossAccountPull",
+ "Effect": "Allow",
+ "Principal": {
+ "AWS": [
+ f"arn:aws:iam::{account_id}:root"
+ for account_id in account_ids
+ ]
+ },
+ "Action": [
+ "ecr:BatchCheckLayerAvailability",
+ "ecr:BatchGetImage",
+ "ecr:GetAuthorizationToken",
+ "ecr:GetDownloadUrlForLayer",
+ ],
+ }
+ ],
+ }
+ # Note that this is overwriting the repo policy, not appending to it. This means we can't have 2 dynamodb
+ # tables pointing at the same set of ECR repos: the repos would only allow the accounts in the table for
+ # which the script was run most recently. eg QA and Staging can't use the same ECR repos.
+ # Appending is not possible since this code will eventually rely on Arborist for authorization information
+ # and we'll need to overwrite in order to remove expired access.
+ try:
+ ecr.set_repository_policy(
+ repositoryName=repo,
+ policyText=json.dumps(policy),
+ )
+ except Exception as e:
+ print(f" Unable to update '{repo}'; skipping it: {e}")
+
+
+def main() -> None:
+ table_name, ecr_role_arn = get_configs()
+ items = query_usernames_and_account_ids(table_name)
+
+ # construct mapping: { ECR repo url: [ AWS account IDs with access ] }
+ ecr_repo_prefix = "nextflow-approved"
+ repo_to_account_ids = {
+ f"{ecr_repo_prefix}/{escapism(e['user_id'])}": [e["account_id"]]
+ for e in items
+ if "account_id" in e
+ }
+ print(
+ "Mapping of ECR repository to allowed AWS accounts:\n",
+ json.dumps(repo_to_account_ids, indent=2),
+ )
+
+ update_access_in_ecr(repo_to_account_ids, ecr_role_arn)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/files/scripts/healdata/heal-cedar-data-ingest.py b/files/scripts/healdata/heal-cedar-data-ingest.py
index 1235c6f58..e0c4b3c46 100644
--- a/files/scripts/healdata/heal-cedar-data-ingest.py
+++ b/files/scripts/healdata/heal-cedar-data-ingest.py
@@ -1,15 +1,19 @@
import argparse
+import copy
+import json
import sys
import requests
import pydash
+from uuid import UUID
# Defines how a field in metadata is going to be mapped into a key in filters
FILTER_FIELD_MAPPINGS = {
- "Study Type.study_stage": "Study Type",
- "Data.data_type": "Data Type",
- "Study Type.study_subject_type": "Subject Type",
- "Human Subject Applicability.gender_applicability": "Gender",
- "Human Subject Applicability.age_applicability": "Age"
+ "study_metadata.study_type.study_stage": "Study Type",
+ "study_metadata.data.data_type": "Data Type",
+ "study_metadata.study_type.study_subject_type": "Subject Type",
+ "study_metadata.human_subject_applicability.gender_applicability": "Gender",
+ "study_metadata.human_subject_applicability.age_applicability": "Age",
+ "research_program": "Research Program",
}
# Defines how to handle special cases for values in filters
@@ -21,19 +25,54 @@
"Questionnaire/Survey/Assessment - unvalidated instrument": "Questionnaire/Survey/Assessment",
"Cis Male": "Male",
"Cis Female": "Female",
- "Trans Male": "Female-to-male transsexual",
- "Trans Female": "Male-to-female transsexual",
- "Agender, Non-binary, gender non-conforming": "Other",
- "Gender Queer": "Other",
- "Intersex": "Intersexed",
- "Buisness Development": "Business Development"
+ "Trans Male": "Transgender man/trans man/female-to-male (FTM)",
+ "Female-to-male transsexual": "Transgender man/trans man/female-to-male (FTM)",
+ "Trans Female": "Transgender woman/trans woman/male-to-female (MTF)",
+ "Male-to-female transsexual": "Transgender woman/trans woman/male-to-female (MTF)",
+ "Agender, Non-binary, gender non-conforming": "Genderqueer/gender nonconforming/neither exclusively male nor female",
+ "Gender Queer": "Genderqueer/gender nonconforming/neither exclusively male nor female",
+ "Intersex": "Genderqueer/gender nonconforming/neither exclusively male nor female",
+ "Intersexed": "Genderqueer/gender nonconforming/neither exclusively male nor female",
+ "Buisness Development": "Business Development",
}
# Defines field that we don't want to include in the filters
OMITTED_VALUES_MAPPING = {
- "Human Subject Applicability.gender_applicability": "Not applicable"
+ "study_metadata.human_subject_applicability.gender_applicability": "Not applicable"
}
+# repository links
+REPOSITORY_STUDY_ID_LINK_TEMPLATE = {
+ "NIDDK Central": "https://repository.niddk.nih.gov/studies//",
+ "NIDA Data Share": "https://datashare.nida.nih.gov/study/",
+ "NICHD DASH": "https://dash.nichd.nih.gov/study/",
+ "ICPSR": "https://www.icpsr.umich.edu/web/ICPSR/studies/",
+ "BioSystics-AP": "https://biosystics-ap.com/assays/assaystudy//",
+}
+
+
+def is_valid_uuid(uuid_to_test, version=4):
+ """
+ Check if uuid_to_test is a valid UUID.
+
+ Parameters
+ ----------
+ uuid_to_test : str
+ version : {1, 2, 3, 4}
+
+ Returns
+ -------
+ `True` if uuid_to_test is a valid UUID, otherwise `False`.
+
+ """
+
+ try:
+ uuid_obj = UUID(uuid_to_test, version=version)
+ except ValueError:
+ return False
+ return str(uuid_obj) == uuid_to_test
+
+
def update_filter_metadata(metadata_to_update):
filter_metadata = []
for metadata_field_key, filter_field_key in FILTER_FIELD_MAPPINGS.items():
@@ -45,19 +84,91 @@ def update_filter_metadata(metadata_to_update):
print(filter_field_values)
raise TypeError("Neither a string nor a list")
for filter_field_value in filter_field_values:
- if (metadata_field_key, filter_field_value) in OMITTED_VALUES_MAPPING.items():
+ if (
+ metadata_field_key,
+ filter_field_value,
+ ) in OMITTED_VALUES_MAPPING.items():
continue
if filter_field_value in SPECIAL_VALUE_MAPPINGS:
filter_field_value = SPECIAL_VALUE_MAPPINGS[filter_field_value]
- filter_metadata.append({"key": filter_field_key, "value": filter_field_value})
+ filter_metadata.append(
+ {"key": filter_field_key, "value": filter_field_value}
+ )
filter_metadata = pydash.uniq(filter_metadata)
metadata_to_update["advSearchFilters"] = filter_metadata
+ # Retain these from existing tags
+ save_tags = ["Data Repository"]
+ tags = [tag for tag in metadata_to_update["tags"] if tag["category"] in save_tags]
+ # Add any new tags from advSearchFilters
+ for f in metadata_to_update["advSearchFilters"]:
+ if f["key"] == "Gender":
+ continue
+ tag = {"name": f["value"], "category": f["key"]}
+ if tag not in tags:
+ tags.append(tag)
+ metadata_to_update["tags"] = tags
return metadata_to_update
+
+def get_client_token(client_id: str, client_secret: str):
+ try:
+ token_url = f"http://revproxy-service/user/oauth2/token"
+ headers = {"Content-Type": "application/x-www-form-urlencoded"}
+ params = {"grant_type": "client_credentials"}
+ data = "scope=openid user data"
+
+ token_result = requests.post(
+ token_url,
+ params=params,
+ headers=headers,
+ data=data,
+ auth=(client_id, client_secret),
+ )
+ token = token_result.json()["access_token"]
+ except:
+ raise Exception("Could not get token")
+ return token
+
+
+def get_related_studies(serial_num, guid, hostname):
+ related_study_result = []
+
+ if serial_num:
+ mds = requests.get(
+ f"http://revproxy-service/mds/metadata?nih_reporter.project_num_split.serial_num={serial_num}&data=true&limit=2000"
+ )
+ if mds.status_code == 200:
+ related_study_metadata = mds.json()
+
+ for (
+ related_study_metadata_key,
+ related_study_metadata_value,
+ ) in related_study_metadata.items():
+ if related_study_metadata_key == guid or (
+ related_study_metadata_value["_guid_type"] != "discovery_metadata"
+ and related_study_metadata_value["_guid_type"]
+ != "unregistered_discovery_metadata"
+ ):
+ # do nothing for self, or for archived studies
+ continue
+ title = (
+ related_study_metadata_value.get("gen3_discovery", {})
+ .get("study_metadata", {})
+ .get("minimal_info", {})
+ .get("study_name", "")
+ )
+ link = (
+ f"https://{hostname}/portal/discovery/{related_study_metadata_key}/"
+ )
+ related_study_result.append({"title": title, "link": link})
+ return related_study_result
+
+
parser = argparse.ArgumentParser()
parser.add_argument("--directory", help="CEDAR Directory ID for registering ")
-parser.add_argument("--access_token", help="User access token")
+parser.add_argument("--cedar_client_id", help="The CEDAR client id")
+parser.add_argument("--cedar_client_secret", help="The CEDAR client secret")
parser.add_argument("--hostname", help="Hostname")
@@ -66,95 +177,220 @@ def update_filter_metadata(metadata_to_update):
if not args.directory:
print("Directory ID is required!")
sys.exit(1)
-if not args.access_token:
- print("User access token is required!")
+if not args.cedar_client_id:
+ print("CEDAR client id is required!")
+ sys.exit(1)
+if not args.cedar_client_secret:
+ print("CEDAR client secret is required!")
sys.exit(1)
if not args.hostname:
print("Hostname is required!")
sys.exit(1)
dir_id = args.directory
-access_token = args.access_token
+client_id = args.cedar_client_id
+client_secret = args.cedar_client_secret
hostname = args.hostname
-token_header = {"Authorization": 'bearer ' + access_token}
+print("Getting CEDAR client access token")
+access_token = get_client_token(client_id, client_secret)
+token_header = {"Authorization": "bearer " + access_token}
limit = 10
offset = 0
-# initalize this to be bigger than our inital call so we can go through while loop
+# initialize this to be bigger than our initial call so we can go through while loop
total = 100
-while((limit + offset <= total)):
+if not is_valid_uuid(dir_id):
+ print("Directory ID is not in UUID format!")
+ sys.exit(1)
+
+while limit + offset <= total:
# Get the metadata from cedar to register
print("Querying CEDAR...")
- cedar = requests.get(f"http://revproxy-service/cedar/get-instance-by-directory/{dir_id}?limit={limit}&offset={offset}", headers=token_header)
+ cedar = requests.get(
+ f"http://revproxy-service/cedar/get-instance-by-directory/{dir_id}?limit={limit}&offset={offset}",
+ headers=token_header,
+ )
# If we get metadata back now register with MDS
if cedar.status_code == 200:
metadata_return = cedar.json()
if "metadata" not in metadata_return:
- print("Got 200 from CEDAR wrapper but no metadata in body, something is not right!")
+ print(
+ "Got 200 from CEDAR wrapper but no metadata in body, something is not right!"
+ )
sys.exit(1)
total = metadata_return["metadata"]["totalCount"]
returned_records = len(metadata_return["metadata"]["records"])
print(f"Successfully got {returned_records} record(s) from CEDAR directory")
for cedar_record in metadata_return["metadata"]["records"]:
- if "appl_id" not in cedar_record:
+ # get the appl id from cedar for querying in our MDS
+ cedar_appl_id = pydash.get(
+ cedar_record, "metadata_location.nih_application_id"
+ )
+ if cedar_appl_id is None:
print("This record doesn't have appl_id, skipping...")
continue
- # get the appl id from cedar for querying in our MDS
- cedar_appl_id = str(cedar_record["appl_id"])
-
# Get the metadata record for the nih_application_id
- mds = requests.get(f"http://revproxy-service/mds/metadata?gen3_discovery.appl_id={cedar_appl_id}&data=true")
+ mds = requests.get(
+ f"http://revproxy-service/mds/metadata?gen3_discovery.study_metadata.metadata_location.nih_application_id={cedar_appl_id}&data=true"
+ )
if mds.status_code == 200:
mds_res = mds.json()
# the query result key is the record of the metadata. If it doesn't return anything then our query failed.
if len(list(mds_res.keys())) == 0 or len(list(mds_res.keys())) > 1:
- print("Query returned nothing for ", cedar_appl_id, "appl id")
+ print("Query returned nothing for", cedar_appl_id, "appl id")
continue
# get the key for our mds record
- cedar_record_id = list(mds_res.keys())[0]
+ mds_record_guid = list(mds_res.keys())[0]
- mds_res = mds_res[cedar_record_id]
- mds_cedar_register_data_body = {}
+ mds_res = mds_res[mds_record_guid]
+ mds_cedar_register_data_body = {**mds_res}
mds_discovery_data_body = {}
+ mds_clinical_trials = {}
if mds_res["_guid_type"] == "discovery_metadata":
print("Metadata is already registered. Updating MDS record")
elif mds_res["_guid_type"] == "unregistered_discovery_metadata":
- print("Metadata is has not been registered. Registering it in MDS record")
+ print(
+ "Metadata has not been registered. Registering it in MDS record"
+ )
+ else:
+ print(
+ f"This metadata data record has a special GUID type \"{mds_res['_guid_type']}\" and will be skipped"
+ )
continue
- pydash.merge(mds_discovery_data_body, mds_res["gen3_discovery"], cedar_record)
- mds_discovery_data_body = update_filter_metadata(mds_discovery_data_body)
+ if "clinicaltrials_gov" in cedar_record:
+ mds_clinical_trials = cedar_record["clinicaltrials_gov"]
+ del cedar_record["clinicaltrials_gov"]
+
+ # some special handing for this field, because its parent will be deleted before we merging the CEDAR and MDS SLMD to avoid duplicated values
+ cedar_record_other_study_websites = cedar_record.get(
+ "metadata_location", {}
+ ).get("other_study_websites", [])
+ del cedar_record["metadata_location"]
+
+ mds_res["gen3_discovery"]["study_metadata"].update(cedar_record)
+ mds_res["gen3_discovery"]["study_metadata"]["metadata_location"][
+ "other_study_websites"
+ ] = cedar_record_other_study_websites
+
+ # setup citations
+ doi_citation = mds_res["gen3_discovery"]["study_metadata"].get(
+ "doi_citation", ""
+ )
+ mds_res["gen3_discovery"]["study_metadata"]["citation"][
+ "heal_platform_citation"
+ ] = doi_citation
+
+ # setup repository_study_link
+ data_repositories = (
+ mds_res.get("gen3_discovery", {})
+ .get("study_metadata", {})
+ .get("metadata_location", {})
+ .get("data_repositories", [])
+ )
+ repository_citation = "Users must also include a citation to the data as specified by the local repository."
+ repository_citation_additional_text = ' The link to the study page at the local repository can be found in the "Data" tab.'
+ for repository in data_repositories:
+ if (
+ repository["repository_name"]
+ and repository["repository_name"]
+ in REPOSITORY_STUDY_ID_LINK_TEMPLATE
+ and repository["repository_study_ID"]
+ ):
+ repository_study_link = REPOSITORY_STUDY_ID_LINK_TEMPLATE[
+ repository["repository_name"]
+ ].replace("", repository["repository_study_ID"])
+ repository.update(
+ {"repository_study_link": repository_study_link}
+ )
+ if (
+ repository_citation_additional_text
+ not in repository_citation
+ ):
+ repository_citation += repository_citation_additional_text
+ if len(data_repositories):
+ data_repositories[0] = {
+ **data_repositories[0],
+ "repository_citation": repository_citation,
+ }
+
+ mds_res["gen3_discovery"]["study_metadata"]["metadata_location"][
+ "data_repositories"
+ ] = copy.deepcopy(data_repositories)
+
+ # set up related studies
+ serial_num = None
+ try:
+ serial_num = (
+ mds_res.get("nih_reporter", {})
+ .get("project_num_split", {})
+ .get("serial_num", None)
+ )
+ except Exception:
+ print("Unable to get serial number for study")
+
+ if serial_num is None:
+ print("Unable to get serial number for study")
+
+ related_study_result = get_related_studies(
+ serial_num, mds_record_guid, hostname
+ )
+ mds_res["gen3_discovery"]["related_studies"] = copy.deepcopy(related_study_result)
+
+ # merge data from cedar that is not study level metadata into a level higher
+ deleted_keys = []
+ for key, value in mds_res["gen3_discovery"]["study_metadata"].items():
+ if not isinstance(value, dict):
+ mds_res["gen3_discovery"][key] = value
+ deleted_keys.append(key)
+ for key in deleted_keys:
+ del mds_res["gen3_discovery"]["study_metadata"][key]
+
+ mds_discovery_data_body = update_filter_metadata(
+ mds_res["gen3_discovery"]
+ )
+
mds_cedar_register_data_body["gen3_discovery"] = mds_discovery_data_body
+ if mds_clinical_trials:
+ mds_cedar_register_data_body["clinicaltrials_gov"] = {
+ **mds_cedar_register_data_body.get("clinicaltrials_gov", {}),
+ **mds_clinical_trials,
+ }
+
mds_cedar_register_data_body["_guid_type"] = "discovery_metadata"
- print("Metadata is now being registered.")
- mds_put = requests.put(f"http://revproxy-service/mds/metadata/{cedar_record_id}",
+ print(f"Metadata {mds_record_guid} is now being registered.")
+ mds_put = requests.put(
+ f"http://revproxy-service/mds/metadata/{mds_record_guid}",
headers=token_header,
- json = mds_cedar_register_data_body
+ json=mds_cedar_register_data_body,
)
if mds_put.status_code == 200:
- print(f"Successfully registered: {cedar_record_id}")
+ print(f"Successfully registered: {mds_record_guid}")
else:
- print(f"Failed to register: {cedar_record_id}. Might not be MDS admin")
+ print(
+ f"Failed to register: {mds_record_guid}. Might not be MDS admin"
+ )
print(f"Status from MDS: {mds_put.status_code}")
else:
print(f"Failed to get information from MDS: {mds.status_code}")
+ else:
+ print(
+ f"Failed to get information from CEDAR wrapper service: {cedar.status_code}"
+ )
+
if offset + limit == total:
break
offset = offset + limit
if (offset + limit) > total:
- limit = (offset + limit) - total
-
-
-else:
- print(f"Failed to get information from CEDAR wrapper service: {cedar.status_code}")
+ limit = total - offset
diff --git a/files/squid_whitelist/web_whitelist b/files/squid_whitelist/web_whitelist
index c36194765..6896314ab 100644
--- a/files/squid_whitelist/web_whitelist
+++ b/files/squid_whitelist/web_whitelist
@@ -7,14 +7,15 @@ achecker.ca
apache.github.io
api.epigraphdb.org
api.monqcle.com
+awslabs.github.io
biodata-integration-tests.net
marketing.biorender.com
clinicaltrials.gov
+charts.bitnami.com
ctds-planx.atlassian.net
data.cityofchicago.org
dataguids.org
api.login.yahoo.com
-api.snapcraft.io
apt.kubernetes.io
argoproj.github.io
archive.cloudera.com
@@ -33,6 +34,7 @@ cernvm.cern.ch
charts.bitnami.com
charts.helm.sh
cloud.r-project.org
+coredns.github.io
coreos.com
covidstoplight.org
cpan.mirrors.tds.net
@@ -69,11 +71,14 @@ ftp.ussg.iu.edu
fmwww.bc.edu
gcr.io
get.helm.sh
+ghcr.io
git.io
go.googlesource.com
golang.org
gopkg.in
grafana.com
+grafana.github.io
+helm.elastic.co
http.us.debian.org
ifconfig.io
ingress.coralogix.us
@@ -123,6 +128,7 @@ orcid.org
pgp.mit.edu
ppa.launchpad.net
prometheus-community.github.io
+proxy.golang.org
public.ecr.aws
pubmirrors.dal.corespace.com
reflector.westga.edu
@@ -138,8 +144,10 @@ repo.dimenoc.com
repos.mia.quadranet.com
repos.redrockhost.com
repos.sensuapp.org
+repo.vmware.com
repository.cloudera.com
resource.metadatacenter.org
+rmq.n3c.ncats.io
rules.emergingthreats.net
rweb.quant.ku.edu
sa-update.dnswl.org
@@ -158,3 +166,5 @@ www.rabbitmq.com
www.uniprot.org
vpodc.org
yahoo.com
+idp.stage.qdr.org
+stage.qdr.org
\ No newline at end of file
diff --git a/files/squid_whitelist/web_wildcard_whitelist b/files/squid_whitelist/web_wildcard_whitelist
index a8c765814..b71ee76c2 100644
--- a/files/squid_whitelist/web_wildcard_whitelist
+++ b/files/squid_whitelist/web_wildcard_whitelist
@@ -21,6 +21,7 @@
.centos.org
.ceph.com
.chef.io
+.chordshealth.org
.clamav.net
.cloud.google.com
.cloudfront.net
@@ -31,6 +32,7 @@
.data-commons.org
.datadoghq.com
.datastage.io
+.ddog-gov.com
.diseasedatahub.org
.docker.com
.docker.io
@@ -38,6 +40,7 @@
.dph.illinois.gov
.elasticsearch.org
.erlang-solutions.com
+.external-secrets.io
.extjs.com
.fedoraproject.org
.gen3.org
@@ -94,9 +97,12 @@
.sks-keyservers.net
.slack.com
.slack-msgs.com
+.snapcraft.io
+.snapcraftcontent.com
.sourceforge.net
.southsideweekly.com
.theanvil.io
+.tigera.io
.twistlock.com
.ubuntu.com
.ucsc.edu
diff --git a/flavors/squid_auto/squid_running_on_docker.sh b/flavors/squid_auto/squid_running_on_docker.sh
index 05607f304..812a9f738 100644
--- a/flavors/squid_auto/squid_running_on_docker.sh
+++ b/flavors/squid_auto/squid_running_on_docker.sh
@@ -8,6 +8,9 @@ DISTRO=$(awk -F '[="]*' '/^NAME/ { print $2 }' < /etc/os-release)
WORK_USER="ubuntu"
if [[ $DISTRO == "Amazon Linux" ]]; then
WORK_USER="ec2-user"
+ if [[ $(awk -F '[="]*' '/^VERSION_ID/ { print $2 }' < /etc/os-release) == "2023" ]]; then
+ DISTRO="al2023"
+ fi
fi
HOME_FOLDER="/home/${WORK_USER}"
SUB_FOLDER="${HOME_FOLDER}/cloud-automation"
@@ -60,6 +63,8 @@ fi
function install_basics(){
if [[ $DISTRO == "Ubuntu" ]]; then
apt -y install atop
+ elif [[ $DISTRO == "al2023" ]]; then
+ sudo dnf install cronie nc -y
fi
}
@@ -69,10 +74,18 @@ function install_docker(){
# Docker
###############################################################
# Install docker from sources
- curl -fsSL ${DOCKER_DOWNLOAD_URL}/gpg | sudo apt-key add -
- add-apt-repository "deb [arch=amd64] ${DOCKER_DOWNLOAD_URL} $(lsb_release -cs) stable"
- apt update
- apt install -y docker-ce
+ if [[ $DISTRO == "Ubuntu" ]]; then
+ curl -fsSL ${DOCKER_DOWNLOAD_URL}/gpg | sudo apt-key add -
+ add-apt-repository "deb [arch=amd64] ${DOCKER_DOWNLOAD_URL} $(lsb_release -cs) stable"
+ apt update
+ apt install -y docker-ce
+ else
+ sudo yum update -y
+ sudo yum install -y docker
+ # Start and enable Docker service
+ sudo systemctl start docker
+ sudo systemctl enable docker
+ fi
mkdir -p /etc/docker
cp ${SUB_FOLDER}/flavors/squid_auto/startup_configs/docker-daemon.json /etc/docker/daemon.json
chmod -R 0644 /etc/docker
@@ -201,8 +214,10 @@ function install_awslogs {
if [[ $DISTRO == "Ubuntu" ]]; then
wget ${AWSLOGS_DOWNLOAD_URL} -O amazon-cloudwatch-agent.deb
dpkg -i -E ./amazon-cloudwatch-agent.deb
- else
+ elif [[ $DISTRO == "Amazon Linux" ]]; then
sudo yum install amazon-cloudwatch-agent nc -y
+ elif [[ $DISTRO == "al2023" ]]; then
+ sudo dnf install amazon-cloudwatch-agent -y
fi
# Configure the AWS logs
@@ -292,6 +307,19 @@ function main(){
--volume ${SQUID_CACHE_DIR}:${SQUID_CACHE_DIR} \
--volume ${SQUID_CONFIG_DIR}:${SQUID_CONFIG_DIR}:ro \
quay.io/cdis/squid:${SQUID_IMAGE_TAG}
+
+ max_attempts=10
+ attempt_counter=0
+ while [ $attempt_counter -lt $max_attempts ]; do
+ #((attempt_counter++))
+ sleep 10
+ if [[ -z "$(sudo lsof -i:3128)" ]]; then
+ echo "Squid not healthy, restarting."
+ docker restart squid
+ else
+ echo "Squid healthy"
+ fi
+ done
}
main
diff --git a/flavors/squid_auto/startup_configs/squid.conf b/flavors/squid_auto/startup_configs/squid.conf
index 653026200..b1e44810a 100644
--- a/flavors/squid_auto/startup_configs/squid.conf
+++ b/flavors/squid_auto/startup_configs/squid.conf
@@ -56,7 +56,6 @@ http_access deny all
persistent_request_timeout 5 seconds
-cache_dir ufs /var/cache/squid 100 16 256
pid_filename /var/run/squid/squid.pid
# vi:syntax=squid.conf
diff --git a/gen3/bin/awsrole.sh b/gen3/bin/awsrole.sh
index 476e7d003..dd19ea7a4 100644
--- a/gen3/bin/awsrole.sh
+++ b/gen3/bin/awsrole.sh
@@ -20,18 +20,22 @@ gen3_awsrole_help() {
# NOTE: service-account to role is 1 to 1
#
# @param serviceAccount to link to the role
+# @param flag (optional) - specify a flag to use a different trust policy
#
function gen3_awsrole_ar_policy() {
local serviceAccount="$1"
shift || return 1
- if [[ ! -z $1 ]]; then
- local namespace=$1
+ if [[ -z $1 ]] || [[ $1 == -* ]]; then
+ namespace=$(gen3 db namespace)
else
- local namespace=$(gen3 db namespace)
+ namespace=$1
+ shift
fi
local issuer_url
local account_id
local vpc_name
+ local flag=$flag
+
vpc_name="$(gen3 api environment)" || return 1
issuer_url="$(aws eks describe-cluster \
--name ${vpc_name} \
@@ -42,7 +46,42 @@ function gen3_awsrole_ar_policy() {
local provider_arn="arn:aws:iam::${account_id}:oidc-provider/${issuer_url}"
- cat - < config.tfvars
@@ -182,10 +226,14 @@ gen3_awsrole_create() {
gen3_log_err "use: gen3 awsrole create roleName saName"
return 1
fi
- if [[ ! -z $1 ]]; then
- local namespace=$1
+ if [[ -z $1 ]] || [[ $1 == -* ]]; then
+ namespace=$(gen3 db namespace)
else
- local namespace=$(gen3 db namespace)
+ namespace=$1
+ shift
+ fi
+ if [[ ! -z $1 ]]; then
+ flag=$1
fi
# do simple validation of name
local regexp="^[a-z][a-z0-9\-]*$"
@@ -200,6 +248,7 @@ EOF
return 1
fi
+
# check if the name is already used by another entity
local entity_type
entity_type=$(_get_entity_type $rolename)
@@ -216,9 +265,11 @@ EOF
fi
TF_IN_AUTOMATION="true"
- if ! _tfplan_role $rolename $saName $namespace; then
+
+ if ! _tfplan_role $rolename $saName $namespace $flag; then
return 1
fi
+
if ! _tfapply_role $rolename; then
return 1
fi
@@ -367,4 +418,4 @@ gen3_awsrole() {
# Let testsuite source file
if [[ -z "$GEN3_SOURCE_ONLY" ]]; then
gen3_awsrole "$@"
-fi
+fi
\ No newline at end of file
diff --git a/gen3/bin/create-es7-cluster.sh b/gen3/bin/create-es7-cluster.sh
new file mode 100644
index 000000000..553dc2652
--- /dev/null
+++ b/gen3/bin/create-es7-cluster.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+source "${GEN3_HOME}/gen3/lib/utils.sh"
+gen3_load "gen3/gen3setup"
+
+# Save the new and old cluster names to vars
+environment=`gen3 api environment`
+existing_cluster_name="$environment-gen3-metadata"
+new_cluster_name="$environment-gen3-metadata-2"
+
+# Gather existing cluster information
+cluster_info=$(aws es describe-elasticsearch-domain --domain-name "$existing_cluster_name")
+
+# Extract relevant information from the existing cluster
+instance_type=`echo "$cluster_info" | jq -r '.DomainStatus.ElasticsearchClusterConfig.InstanceType'`
+instance_count=`echo "$cluster_info" | jq -r '.DomainStatus.ElasticsearchClusterConfig.InstanceCount'`
+volume_type=`echo "$cluster_info" | jq -r '.DomainStatus.EBSOptions.VolumeType'`
+volume_size=`echo "$cluster_info" | jq -r '.DomainStatus.EBSOptions.VolumeSize'`
+vpc_name=`echo "$cluster_info" | jq -r '.DomainStatus.VPCOptions.VPCId'`
+subnet_ids=`echo "$cluster_info" | jq -r '.DomainStatus.VPCOptions.SubnetIds[]'`
+security_groups=`echo "$cluster_info" | jq -r '.DomainStatus.VPCOptions.SecurityGroupIds[]'`
+access_policies=`echo "$cluster_info" | jq -r '.DomainStatus.AccessPolicies'`
+kms_key_id=`echo "$cluster_info" | jq -r '.DomainStatus.EncryptionAtRestOptions.KmsKeyId'`
+
+# Check if the new Elasticsearch cluster name already exists
+new_cluster=`aws es describe-elasticsearch-domain --domain-name "$new_cluster_name"`
+
+if [ -n "$new_cluster" ]; then
+ echo "Cluster $new_cluster_name already exists"
+else
+ echo "Cluster does not exist- creating..."
+ # Create the new Elasticsearch cluster
+ aws es create-elasticsearch-domain \
+ --domain-name "$new_cluster_name" \
+ --elasticsearch-version "7.10" \
+ --elasticsearch-cluster-config \
+ "InstanceType=$instance_type,InstanceCount=$instance_count" \
+ --ebs-options \
+ "EBSEnabled=true,VolumeType=$volume_type,VolumeSize=$volume_size" \
+ --vpc-options "SubnetIds=${subnet_ids[*]},SecurityGroupIds=${security_groups[*]}" \
+ --access-policies "$access_policies" \
+ --encryption-at-rest-options "Enabled=true,KmsKeyId=$kms_key_id"\
+ --node-to-node-encryption-options "Enabled=true"
+ > /dev/null 2>&1
+
+ # Wait for the new cluster to be available
+ sleep_duration=60
+ max_retries=10
+ retry_count=0
+
+ while [ $retry_count -lt $max_retries ]; do
+ cluster_status=$(aws es describe-elasticsearch-domain --domain-name "$new_cluster_name" | jq -r '.DomainStatus.Processing')
+ if [ "$cluster_status" != "true" ]; then
+ echo "New cluster is available."
+ break
+ fi
+ sleep $sleep_duration
+ ((retry_count++))
+ done
+
+ if [ $retry_count -eq $max_retries ]; then
+ echo "New cluster creation may still be in progress. Please check the AWS Management Console for the status."
+ fi
+fi
diff --git a/gen3/bin/dbbackup.sh b/gen3/bin/dbbackup.sh
new file mode 100644
index 000000000..eb9611a90
--- /dev/null
+++ b/gen3/bin/dbbackup.sh
@@ -0,0 +1,212 @@
+#!/bin/bash
+
+####################################################################################################
+# Script: dbdump.sh
+#
+# Description:
+# This script facilitates the management of database backups within the gen3 environment. It is
+# equipped to establish policies, service accounts, roles, and S3 buckets. Depending on the
+# command provided, it will either initiate a database dump or perform a restore.
+#
+# Usage:
+# gen3 dbbackup [dump|restore]
+#
+# dump - Initiates a database dump, creating the essential AWS resources if they are absent.
+# The dump operation is intended to be executed from the namespace/commons that requires
+# the backup.
+# restore - Initiates a database restore, creating the essential AWS resources if they are absent.
+# The restore operation is meant to be executed in the target namespace, where the backup
+# needs to be restored.
+#
+# Notes:
+# This script extensively utilizes the AWS CLI and the gen3 CLI. Proper functioning demands a
+# configured gen3 environment and the availability of the necessary CLI tools.
+#
+####################################################################################################
+
+# Exit on error
+#set -e
+
+# Print commands before executing
+#set -x
+
+#trap 'echo "Error at Line $LINENO"' ERR
+
+source "${GEN3_HOME}/gen3/lib/utils.sh"
+gen3_load "gen3/lib/kube-setup-init"
+
+policy_name="bucket_reader_writer_gen3_db_backup"
+account_id=$(aws sts get-caller-identity --query "Account" --output text)
+vpc_name="$(gen3 api environment)"
+namespace="$(gen3 db namespace)"
+sa_name="dbbackup-sa"
+bucket_name="gen3-db-backups-${account_id}"
+
+gen3_log_info "policy_name: $policy_name"
+gen3_log_info "account_id: $account_id"
+gen3_log_info "vpc_name: $vpc_name"
+gen3_log_info "namespace: $namespace"
+gen3_log_info "sa_name: $sa_name"
+gen3_log_info "bucket_name: $bucket_name"
+
+
+# Create an S3 access policy if it doesn't exist
+create_policy() {
+ # Check if policy exists
+ if ! aws iam list-policies --query "Policies[?PolicyName == '$policy_name'] | [0].Arn" --output text | grep -q "arn:aws:iam"; then
+ # Create the S3 access policy - policy document
+ access_policy=$(cat <<-EOM
+{
+ "Version": "2012-10-17",
+ "Statement": [
+ {
+ "Effect": "Allow",
+ "Action": [
+ "s3:CreateBucket",
+ "s3:PutObject",
+ "s3:GetObject",
+ "s3:ListBucket",
+ "s3:DeleteObject"
+ ],
+ "Resource": [
+ "arn:aws:s3:::gen3-db-backups-*"
+ ]
+ }
+ ]
+}
+EOM
+ )
+
+ # Create the S3 access policy from the policy document
+ policy_arn=$(aws iam create-policy --policy-name "$policy_name" --policy-document "$access_policy" --query "Policy.Arn" --output text)
+ gen3_log_info "policy_arn: $policy_arn"
+ else
+ gen3_log_info "Policy $policy_name already exists, skipping policy creation."
+ policy_arn=$(aws iam list-policies --query "Policies[?PolicyName == '$policy_name'] | [0].Arn" --output text | grep "arn:aws:iam" | head -n 1)
+ gen3_log_info "policy_arn: $policy_arn"
+ fi
+}
+
+
+# Create or update the Service Account and its corresponding IAM Role
+create_service_account_and_role() {
+ cluster_arn=$(kubectl config current-context)
+ eks_cluster=$(echo "$cluster_arn" | awk -F'/' '{print $2}')
+ oidc_url=$(aws eks describe-cluster --name $eks_cluster --query 'cluster.identity.oidc.issuer' --output text | sed -e 's/^https:\/\///')
+ role_name="${vpc_name}-${namespace}-${sa_name}-role"
+ role_arn="arn:aws:iam::${account_id}:role/${role_name}"
+ local trust_policy=$(mktemp -p "$XDG_RUNTIME_DIR" "tmp_policy.XXXXXX")
+ gen3_log_info "trust_policy: $trust_policy"
+ gen3_log_info "eks_cluster: $eks_cluster"
+ gen3_log_info "oidc_url: $oidc_url"
+ gen3_log_info "role_name: $role_name"
+
+
+ cat > ${trust_policy} <&1; then
+ gen3_log_info "Updating existing role: $role_name"
+ aws iam update-assume-role-policy --role-name $role_name --policy-document "file://$trust_policy"
+ else
+ gen3_log_info "Creating new role: $role_name"
+ aws iam create-role --role-name $role_name --assume-role-policy-document "file://$trust_policy"
+ fi
+
+ # Attach the policy to the IAM role
+ aws iam attach-role-policy --role-name $role_name --policy-arn $policy_arn
+
+ # Create the Kubernetes service account if it doesn't exist
+ if ! kubectl get serviceaccount -n $namespace $sa_name 2>&1; then
+ kubectl create serviceaccount -n $namespace $sa_name
+ fi
+ # Annotate the KSA with the IAM role ARN
+ gen3_log_info "Annotating Service Account with IAM role ARN"
+ kubectl annotate serviceaccount -n ${namespace} ${sa_name} eks.amazonaws.com/role-arn=${role_arn} --overwrite
+
+}
+
+# Create an S3 bucket if it doesn't exist
+create_s3_bucket() {
+ # Check if bucket already exists
+ if aws s3 ls "s3://$bucket_name" 2>&1 | grep -q 'NoSuchBucket'; then
+ gen3_log_info "Bucket does not exist, creating..."
+ aws s3 mb "s3://$bucket_name"
+ else
+ gen3_log_info "Bucket $bucket_name already exists, skipping bucket creation."
+ fi
+}
+
+
+# Function to trigger the database backup job
+db_dump() {
+ gen3 job run psql-db-prep-dump
+}
+
+
+# Function to trigger the database backup restore job
+db_restore() {
+ gen3 job run psql-db-prep-restore
+}
+
+va_testing_db_dump() {
+ gen3 job run psql-db-dump-va-testing
+}
+
+
+# main function to determine whether dump or restore
+main() {
+ case "$1" in
+ dump)
+ gen3_log_info "Triggering database dump..."
+ create_policy
+ create_service_account_and_role
+ create_s3_bucket
+ db_dump
+ ;;
+ restore)
+ gen3_log_info "Triggering database restore..."
+ create_policy
+ create_service_account_and_role
+ create_s3_bucket
+ db_restore
+ ;;
+ va-dump)
+ gen3_log_info "Running a va-testing DB dump..."
+ create_policy
+ create_service_account_and_role
+ create_s3_bucket
+ va_testing_db_dump
+ ;;
+ *)
+ echo "Invalid command. Usage: gen3 dbbackup [dump|restore|va-dump]"
+ return 1
+ ;;
+ esac
+}
+
+main "$1"
diff --git a/gen3/bin/ecr.sh b/gen3/bin/ecr.sh
index 930202a87..36af791ef 100644
--- a/gen3/bin/ecr.sh
+++ b/gen3/bin/ecr.sh
@@ -32,6 +32,8 @@ accountList=(
205252583234
885078588865
922467707295
+533267425233
+048463324059
)
principalStr=""
diff --git a/gen3/bin/gitops.sh b/gen3/bin/gitops.sh
index 48ba6512c..bc0358499 100644
--- a/gen3/bin/gitops.sh
+++ b/gen3/bin/gitops.sh
@@ -291,9 +291,15 @@ gen3_gitops_sync() {
if g3kubectl get configmap manifest-versions; then
oldJson=$(g3kubectl get configmap manifest-versions -o=json | jq ".data")
fi
- newJson=$(g3k_config_lookup ".versions")
echo "old JSON is: $oldJson"
- echo "new JSON is: $newJson"
+ newJson=$(g3k_config_lookup ".versions")
+ # Make sure the script exits if newJSON contains invalid JSON
+ if [ $? -ne 0 ]; then
+ echo "Error: g3k_config_lookup command failed- invalid JSON"
+ exit 1
+ else
+ echo "new JSON is: $newJson"
+ fi
if [[ -z $newJson ]]; then
echo "Manifest does not have versions section. Unable to get new versions, skipping version update."
elif [[ -z $oldJson ]]; then
@@ -439,8 +445,13 @@ gen3_gitops_sync() {
echo "DRYRUN flag detected, not rolling"
gen3_log_info "dict_roll: $dict_roll; versions_roll: $versions_roll; portal_roll: $portal_roll; etl_roll: $etl_roll; fence_roll: $fence_roll"
else
- if [[ ( "$dict_roll" = true ) || ( "$versions_roll" = true ) || ( "$portal_roll" = true )|| ( "$etl_roll" = true ) || ( "$covid_cronjob_roll" = true ) || ("fence_roll" = true) ]]; then
+ if [[ ( "$dict_roll" = true ) || ( "$versions_roll" = true ) || ( "$portal_roll" = true )|| ( "$etl_roll" = true ) || ( "$covid_cronjob_roll" = true ) || ("$fence_roll" = true) ]]; then
echo "changes detected, rolling"
+ tmpHostname=$(gen3 api hostname)
+ if [[ $slack = true ]]; then
+ curl -X POST --data-urlencode "payload={\"text\": \"Gitops-sync Cron: Changes detected on ${tmpHostname} - rolling...\"}" "${slackWebHook}"
+ fi
+
# run etl job before roll all so guppy can pick up changes
if [[ "$etl_roll" = true ]]; then
gen3 update_config etl-mapping "$(gen3 gitops folder)/etlMapping.yaml"
@@ -466,7 +477,6 @@ gen3_gitops_sync() {
rollRes=$?
# send result to slack
if [[ $slack = true ]]; then
- tmpHostname=$(gen3 api hostname)
resStr="SUCCESS"
color="#1FFF00"
if [[ $rollRes != 0 ]]; then
diff --git a/gen3/bin/healthcheck.sh b/gen3/bin/healthcheck.sh
index 149cb1aaa..b658ff033 100644
--- a/gen3/bin/healthcheck.sh
+++ b/gen3/bin/healthcheck.sh
@@ -137,6 +137,10 @@ gen3_healthcheck() {
internetAccessExplicitProxy=false
fi
+ gen3_log_info "Clearing Evicted pods"
+ sleep 5
+ clear_evicted_pods
+
local healthJson=$(cat - < /dev/null; then
+ gen3 kube-setup-gen3-discovery-ai &
+else
+ gen3_log_info "not deploying gen3-discovery-ai - no manifest entry for '.versions[\"gen3-discovery-ai\"]'"
+fi
+
+if g3k_manifest_lookup '.versions["ohdsi-atlas"]' && g3k_manifest_lookup '.versions["ohdsi-webapi"]' 2> /dev/null; then
+ gen3 kube-setup-ohdsi &
+else
+ gen3_log_info "not deploying OHDSI tools - no manifest entry for '.versions[\"ohdsi-atlas\"]' and '.versions[\"ohdsi-webapi\"]'"
+fi
+
+if g3k_manifest_lookup '.versions["cohort-middleware"]' 2> /dev/null; then
+ gen3 kube-setup-cohort-middleware
+else
+ gen3_log_info "not deploying cohort-middleware - no manifest entry for .versions[\"cohort-middleware\"]"
+fi
+
gen3 kube-setup-revproxy
if [[ "$GEN3_ROLL_FAST" != "true" ]]; then
@@ -262,7 +280,7 @@ if [[ "$GEN3_ROLL_FAST" != "true" ]]; then
else
gen3 kube-setup-autoscaler &
fi
- gen3 kube-setup-kube-dns-autoscaler &
+ #gen3 kube-setup-kube-dns-autoscaler &
gen3 kube-setup-metrics deploy || true
gen3 kube-setup-tiller || true
#
@@ -334,18 +352,6 @@ else
gen3_log_info "not deploying argo-wrapper - no manifest entry for '.versions[\"argo-wrapper\"]'"
fi
-if g3k_manifest_lookup '.versions["cohort-middleware"]' 2> /dev/null; then
- gen3 roll cohort-middleware &
-else
- gen3_log_info "not deploying cohort-middleware - no manifest entry for '.versions[\"cohort-middleware\"]'"
-fi
-
-if g3k_manifest_lookup '.versions["ohdsi-atlas"]' && g3k_manifest_lookup '.versions["ohdsi-webapi"]' 2> /dev/null; then
- gen3 kube-setup-ohdsi &
-else
- gen3_log_info "not deploying OHDSI tools - no manifest entry for '.versions[\"ohdsi-atlas\"]' and '.versions[\"ohdsi-webapi\"]'"
-fi
-
gen3_log_info "enable network policy"
gen3 kube-setup-networkpolicy "enable" || true &
diff --git a/gen3/bin/kube-setup-ambassador.sh b/gen3/bin/kube-setup-ambassador.sh
index 0f4e0be28..5f92af5cc 100644
--- a/gen3/bin/kube-setup-ambassador.sh
+++ b/gen3/bin/kube-setup-ambassador.sh
@@ -25,7 +25,6 @@ deploy_api_gateway() {
return 0
fi
gen3 roll ambassador-gen3
- g3k_kv_filter "${GEN3_HOME}/kube/services/ambassador-gen3/ambassador-gen3-service-elb.yaml" GEN3_ARN "$(g3kubectl get configmap global --output=jsonpath='{.data.revproxy_arn}')" | g3kubectl apply -f -
local luaYamlTemp="$(mktemp "$XDG_RUNTIME_DIR/lua.yaml.XXXXXX")"
cat - > "$luaYamlTemp" < /dev/null 2>&1; then
kubectl create namespace argo-events
fi
+# Check if target configmap exists
+if ! kubectl get configmap environment -n argo-events > /dev/null 2>&1; then
+
+ # Get value from source configmap
+ VALUE=$(kubectl get configmap global -n default -o jsonpath="{.data.environment}")
+
+ # Create target configmap
+ kubectl create configmap environment -n argo-events --from-literal=environment=$VALUE
+
+fi
+
if [[ "$ctxNamespace" == "default" || "$ctxNamespace" == "null" || "$override_namespace" == true ]]; then
if (! helm status argo -n argo-events > /dev/null 2>&1 ) || [[ "$force" == true ]]; then
helm repo add argo https://argoproj.github.io/argo-helm --force-update 2> >(grep -v 'This is insecure' >&2)
helm repo update 2> >(grep -v 'This is insecure' >&2)
- helm upgrade --install argo argo/argo-events -n argo-events --version "2.1.3"
+ helm upgrade --install argo-events argo/argo-events -n argo-events --version "2.1.3"
else
gen3_log_info "argo-events Helm chart already installed. To force reinstall, run with --force"
fi
@@ -46,7 +57,7 @@ if [[ "$ctxNamespace" == "default" || "$ctxNamespace" == "null" || "$override_na
kubectl apply -f ${GEN3_HOME}/kube/services/argo-events/eventbus.yaml
fi
else
- gen3_log_info "Not running in default namespace, will not install argo-events helm chart"
+ gen3_log_info "Not running in default namespace, will not install argo-events helm chart. This behavior can be overwritten with the --override-namespace flag"
fi
if [[ "$create_workflow_resources" == true ]]; then
@@ -57,4 +68,5 @@ if [[ "$create_workflow_resources" == true ]]; then
#Creating rolebindings to allow Argo Events to create jobs, and allow those jobs to manage Karpenter resources
kubectl create rolebinding argo-events-job-admin-binding --role=job-admin --serviceaccount=argo-events:default --namespace=argo-events
kubectl create clusterrolebinding karpenter-admin-binding --clusterrole=karpenter-admin --serviceaccount=argo-events:default
+ kubectl create clusterrolebinding argo-workflows-view-binding --clusterrole=argo-argo-workflows-view --serviceaccount=argo-events:default
fi
\ No newline at end of file
diff --git a/gen3/bin/kube-setup-argo-wrapper.sh b/gen3/bin/kube-setup-argo-wrapper.sh
index 5727a703e..9f7cc52ce 100644
--- a/gen3/bin/kube-setup-argo-wrapper.sh
+++ b/gen3/bin/kube-setup-argo-wrapper.sh
@@ -18,6 +18,26 @@ if [[ -z "$GEN3_SOURCE_ONLY" ]]; then
gen3 roll argo-wrapper
g3kubectl apply -f "${GEN3_HOME}/kube/services/argo-wrapper/argo-wrapper-service.yaml"
+
+
+ if g3k_manifest_lookup .argo.argo_server_service_url 2> /dev/null; then
+ export ARGO_HOST=$(g3k_manifest_lookup .argo.argo_server_service_url)
+ else
+ export ARGO_HOST="http://argo-argo-workflows-server.argo.svc.cluster.local:2746"
+ fi
+
+ if g3k_config_lookup '.argo_namespace' $(g3k_manifest_init)/$(g3k_hostname)/manifests/argo/argo.json 2> /dev/null; then
+ export ARGO_NAMESPACE=$(g3k_config_lookup '.argo_namespace' $(g3k_manifest_init)/$(g3k_hostname)/manifests/argo/argo.json)
+ else
+ export ARGO_NAMESPACE="argo"
+ fi
+
+ envsubst <"${GEN3_HOME}/kube/services/argo-wrapper/config.ini" > /tmp/config.ini
+
+ g3kubectl delete configmap argo-wrapper-namespace-config
+ g3kubectl create configmap argo-wrapper-namespace-config --from-file /tmp/config.ini
+
+ rm /tmp/config.ini
gen3_log_info "the argo-wrapper service has been deployed onto the kubernetes cluster"
-fi
\ No newline at end of file
+fi
diff --git a/gen3/bin/kube-setup-argo.sh b/gen3/bin/kube-setup-argo.sh
index c7243d3da..4c6c55eee 100644
--- a/gen3/bin/kube-setup-argo.sh
+++ b/gen3/bin/kube-setup-argo.sh
@@ -5,10 +5,25 @@ source "${GEN3_HOME}/gen3/lib/utils.sh"
gen3_load "gen3/gen3setup"
gen3_load "gen3/lib/kube-setup-init"
+override_namespace=false
+force=false
+
+for arg in "${@}"; do
+ if [ "$arg" == "--override-namespace" ]; then
+ override_namespace=true
+ elif [ "$arg" == "--force" ]; then
+ force=true
+ else
+ #Print usage info and exit
+ gen3_log_info "Usage: gen3 kube-setup-argo [--override-namespace] [--force]"
+ exit 1
+ fi
+done
ctx="$(g3kubectl config current-context)"
ctxNamespace="$(g3kubectl config view -ojson | jq -r ".contexts | map(select(.name==\"$ctx\")) | .[0] | .context.namespace")"
+argo_namespace=$(g3k_config_lookup '.argo_namespace' $(g3k_manifest_init)/$(g3k_hostname)/manifests/argo/argo.json)
function setup_argo_buckets {
local accountNumber
@@ -28,14 +43,17 @@ function setup_argo_buckets {
# try to come up with a unique but composable bucket name
bucketName="gen3-argo-${accountNumber}-${environment//_/-}"
- userName="gen3-argo-${environment//_/-}-user"
- if [[ ! -z $(g3k_config_lookup '."s3-bucket"' $(g3k_manifest_init)/$(g3k_hostname)/manifests/argo/argo.json) || ! -z $(g3k_config_lookup '.argo."s3-bucket"') ]]; then
- if [[ ! -z $(g3k_config_lookup '."s3-bucket"' $(g3k_manifest_init)/$(g3k_hostname)/manifests/argo/argo.json) ]]; then
+ nameSpace="$(gen3 db namespace)"
+ roleName="gen3-argo-${environment//_/-}-role"
+ bucketPolicy="argo-bucket-policy-${nameSpace}"
+ internalBucketPolicy="argo-internal-bucket-policy-${nameSpace}"
+ if [[ ! -z $(g3k_config_lookup '."downloadable-s3-bucket"' $(g3k_manifest_init)/$(g3k_hostname)/manifests/argo/argo.json) || ! -z $(g3k_config_lookup '.argo."downloadable-s3-bucket"') ]]; then
+ if [[ ! -z $(g3k_config_lookup '."downloadable-s3-bucket"' $(g3k_manifest_init)/$(g3k_hostname)/manifests/argo/argo.json) ]]; then
gen3_log_info "Using S3 bucket found in manifest: ${bucketName}"
- bucketName=$(g3k_config_lookup '."s3-bucket"' $(g3k_manifest_init)/$(g3k_hostname)/manifests/argo/argo.json)
+ bucketName=$(g3k_config_lookup '."downloadable-s3-bucket"' $(g3k_manifest_init)/$(g3k_hostname)/manifests/argo/argo.json)
else
gen3_log_info "Using S3 bucket found in manifest: ${bucketName}"
- bucketName=$(g3k_config_lookup '.argo."s3-bucket"')
+ bucketName=$(g3k_config_lookup '.argo."downloadable-s3-bucket"')
fi
fi
if [[ ! -z $(g3k_config_lookup '."internal-s3-bucket"' $(g3k_manifest_init)/$(g3k_hostname)/manifests/argo/argo.json) || ! -z $(g3k_config_lookup '.argo."internal-s3-bucket"') ]]; then
@@ -114,70 +132,41 @@ EOF
]
}
EOF
- if ! secret="$(g3kubectl get secret argo-s3-creds -n argo 2> /dev/null)"; then
- gen3_log_info "setting up bucket $bucketName"
-
- if aws s3 ls --page-size 1 "s3://${bucketName}" > /dev/null 2>&1; then
- gen3_log_info "${bucketName} s3 bucket already exists"
- # continue on ...
- elif ! aws s3 mb "s3://${bucketName}"; then
- gen3_log_err "failed to create bucket ${bucketName}"
- fi
-
- gen3_log_info "Creating IAM user ${userName}"
- if ! aws iam get-user --user-name ${userName} > /dev/null 2>&1; then
- aws iam create-user --user-name ${userName} || true
- else
- gen3_log_info "IAM user ${userName} already exits.."
- fi
-
- secret=$(aws iam create-access-key --user-name ${userName})
- if ! g3kubectl get namespace argo > /dev/null 2>&1; then
- gen3_log_info "Creating argo namespace"
- g3kubectl create namespace argo || true
- g3kubectl label namespace argo app=argo || true
- g3kubectl create rolebinding argo-admin --clusterrole=admin --serviceaccount=argo:default -n argo || true
- fi
- else
- # Else we want to recreate the argo-s3-creds secret so make a temp file with the current creds and delete argo-s3-creds secret
- gen3_log_info "Argo S3 setup already completed"
- local secretFile="$XDG_RUNTIME_DIR/temp_key_file_$$.json"
- cat > "$secretFile" < /dev/null 2>&1; then
+ gen3_log_info "${bucketName} s3 bucket already exists"
+ # continue on ...
+ elif ! aws s3 mb "s3://${bucketName}"; then
+ gen3_log_err "failed to create bucket ${bucketName}"
fi
-
- gen3_log_info "Creating s3 creds secret in argo namespace"
- if [[ "$ctxNamespace" == "default" || "$ctxNamespace" == "null" ]]; then
- if [[ -z $internalBucketName ]]; then
- g3kubectl delete secret -n argo argo-s3-creds || true
- g3kubectl create secret -n argo generic argo-s3-creds --from-literal=AccessKeyId=$(echo $secret | jq -r .AccessKey.AccessKeyId) --from-literal=SecretAccessKey=$(echo $secret | jq -r .AccessKey.SecretAccessKey) --from-literal=bucketname=${bucketName} || true
- g3kubectl create secret generic argo-s3-creds --from-literal=AccessKeyId=$(echo $secret | jq -r .AccessKey.AccessKeyId) --from-literal=SecretAccessKey=$(echo $secret | jq -r .AccessKey.SecretAccessKey) --from-literal=bucketname=${bucketName} || true
- else
- g3kubectl delete secret -n argo argo-s3-creds || true
- g3kubectl create secret -n argo generic argo-s3-creds --from-literal=AccessKeyId=$(echo $secret | jq -r .AccessKey.AccessKeyId) --from-literal=SecretAccessKey=$(echo $secret | jq -r .AccessKey.SecretAccessKey) --from-literal=bucketname=${bucketName} --from-literal=internalbucketname=${internalBucketName} || true
- g3kubectl create secret generic argo-s3-creds --from-literal=AccessKeyId=$(echo $secret | jq -r .AccessKey.AccessKeyId) --from-literal=SecretAccessKey=$(echo $secret | jq -r .AccessKey.SecretAccessKey) --from-literal=bucketname=${bucketName} || true
- fi
+ if ! g3kubectl get namespace argo > /dev/null 2>&1; then
+ gen3_log_info "Creating argo namespace"
+ g3kubectl create namespace argo || true
+ g3kubectl label namespace argo app=argo || true
+ # Grant admin access within the argo namespace to the default SA in the argo namespace
+ g3kubectl create rolebinding argo-admin --clusterrole=admin --serviceaccount=argo:default -n $argo_namespace || true
+ fi
+ gen3_log_info "Creating IAM role ${roleName}"
+ if aws iam get-role --role-name "${roleName}" > /dev/null 2>&1; then
+ gen3_log_info "IAM role ${roleName} already exists.."
+ roleArn=$(aws iam get-role --role-name "${roleName}" --query 'Role.Arn' --output text)
+ gen3_log_info "Role annotate"
+ g3kubectl annotate serviceaccount default eks.amazonaws.com/role-arn=${roleArn} --overwrite -n $argo_namespace
+ g3kubectl annotate serviceaccount argo eks.amazonaws.com/role-arn=${roleArn} --overwrite -n $nameSpace
else
- g3kubectl create sa argo || true
- # Grant admin access within the current namespace to the argo SA in the current namespace
- g3kubectl create rolebinding argo-admin --clusterrole=admin --serviceaccount=$(gen3 db namespace):argo -n $(gen3 db namespace) || true
- aws iam put-user-policy --user-name ${userName} --policy-name argo-bucket-policy --policy-document file://$policyFile || true
- if [[ -z $internalBucketName ]]; then
- aws iam put-user-policy --user-name ${userName} --policy-name argo-internal-bucket-policy --policy-document file://$internalBucketPolicyFile || true
- g3kubectl create secret generic argo-s3-creds --from-literal=AccessKeyId=$(echo $secret | jq -r .AccessKey.AccessKeyId) --from-literal=SecretAccessKey=$(echo $secret | jq -r .AccessKey.SecretAccessKey) --from-literal=bucketname=${bucketName} || true
- else
- g3kubectl create secret generic argo-s3-creds --from-literal=AccessKeyId=$(echo $secret | jq -r .AccessKey.AccessKeyId) --from-literal=SecretAccessKey=$(echo $secret | jq -r .AccessKey.SecretAccessKey) --from-literal=bucketname=${bucketName} --from-literal=internalbucketname=${internalBucketName} || true
-
- fi
+ gen3 awsrole create $roleName argo $nameSpace -all_namespaces
+ roleArn=$(aws iam get-role --role-name "${roleName}" --query 'Role.Arn' --output text)
+ g3kubectl annotate serviceaccount default eks.amazonaws.com/role-arn=${roleArn} -n $argo_namespace
fi
+ # Grant admin access within the current namespace to the argo SA in the current namespace
+ g3kubectl create rolebinding argo-admin --clusterrole=admin --serviceaccount=$nameSpace:argo -n $nameSpace || true
+ aws iam put-role-policy --role-name ${roleName} --policy-name ${bucketPolicy} --policy-document file://$policyFile || true
+ if [[ -z $internalBucketName ]]; then
+ aws iam put-role-policy --role-name ${roleName} --policy-name ${internalBucketPolicy} --policy-document file://$internalBucketPolicyFile || true
+ fi
## if new bucket then do the following
# Get the aws keys from secret
@@ -189,9 +178,9 @@ EOF
aws s3api put-bucket-lifecycle --bucket ${bucketName} --lifecycle-configuration file://$bucketLifecyclePolicyFile
# Always update the policy, in case manifest buckets change
- aws iam put-user-policy --user-name ${userName} --policy-name argo-bucket-policy --policy-document file://$policyFile
+ aws iam put-role-policy --role-name ${roleName} --policy-name ${bucketPolicy} --policy-document file://$policyFile
if [[ ! -z $internalBucketPolicyFile ]]; then
- aws iam put-user-policy --user-name ${userName} --policy-name argo-internal-bucket-policy --policy-document file://$internalBucketPolicyFile
+ aws iam put-role-policy --role-name ${roleName} --policy-name ${internalBucketPolicy} --policy-document file://$internalBucketPolicyFile
fi
if [[ ! -z $(g3k_config_lookup '.indexd_admin_user' $(g3k_manifest_init)/$(g3k_hostname)/manifests/argo/argo.json) || ! -z $(g3k_config_lookup '.argo.indexd_admin_user') ]]; then
if [[ ! -z $(g3k_config_lookup '.indexd_admin_user' $(g3k_manifest_init)/$(g3k_hostname)/manifests/argo/argo.json) ]]; then
@@ -203,39 +192,53 @@ EOF
for serviceName in indexd; do
secretName="${serviceName}-creds"
# Only delete if secret is found to prevent early exits
- if [[ ! -z $(g3kubectl get secrets -n argo | grep $secretName) ]]; then
- g3kubectl delete secret "$secretName" -n argo > /dev/null 2>&1
+ if [[ ! -z $(g3kubectl get secrets -n $argo_namespace | grep $secretName) ]]; then
+ g3kubectl delete secret "$secretName" -n $argo_namespace > /dev/null 2>&1
fi
done
sleep 1 # I think delete is async - give backend a second to finish
indexdFencePassword=$(cat $(gen3_secrets_folder)/creds.json | jq -r .indexd.user_db.$indexd_admin_user)
- g3kubectl create secret generic "indexd-creds" --from-literal=user=$indexd_admin_user --from-literal=password=$indexdFencePassword -n argo
+ g3kubectl create secret generic "indexd-creds" --from-literal=user=$indexd_admin_user --from-literal=password=$indexdFencePassword -n $argo_namespace
fi
}
function setup_argo_db() {
- if ! secret="$(g3kubectl get secret argo-db-creds -n argo 2> /dev/null)"; then
+ if ! secret="$(g3kubectl get secret argo-db-creds -n $argo_namespace 2> /dev/null)"; then
gen3_log_info "Setting up argo db persistence"
gen3 db setup argo || true
dbCreds=$(gen3 secrets decode argo-g3auto dbcreds.json)
- g3kubectl create secret -n argo generic argo-db-creds --from-literal=db_host=$(echo $dbCreds | jq -r .db_host) --from-literal=db_username=$(echo $dbCreds | jq -r .db_username) --from-literal=db_password=$(echo $dbCreds | jq -r .db_password) --from-literal=db_database=$(echo $dbCreds | jq -r .db_database)
+ g3kubectl create secret -n $argo_namespace generic argo-db-creds --from-literal=db_host=$(echo $dbCreds | jq -r .db_host) --from-literal=db_username=$(echo $dbCreds | jq -r .db_username) --from-literal=db_password=$(echo $dbCreds | jq -r .db_password) --from-literal=db_database=$(echo $dbCreds | jq -r .db_database)
else
gen3_log_info "Argo DB setup already completed"
fi
}
- setup_argo_buckets
+function setup_argo_template_secret() {
+ gen3_log_info "Started the template secret process"
+ downloadable_bucket_name=$(g3k_config_lookup '."downloadable-s3-bucket"' $(g3k_manifest_init)/$(g3k_hostname)/manifests/argo/argo.json)
+ # Check if the secret already exists
+ if [[ ! -z $(g3kubectl get secret argo-template-values-secret -n $argo_namespace) ]]; then
+ gen3_log_info "Argo template values secret already exists, assuming it's stale and deleting"
+ g3kubectl delete secret argo-template-values-secret -n $argo_namespace
+ fi
+ gen3_log_info "Creating argo template values secret"
+ g3kubectl create secret generic argo-template-values-secret --from-literal=DOWNLOADABLE_BUCKET=$downloadable_bucket_name -n $argo_namespace
+}
+
+setup_argo_buckets
# only do this if we are running in the default namespace
-if [[ "$ctxNamespace" == "default" || "$ctxNamespace" == "null" ]]; then
+if [[ "$ctxNamespace" == "default" || "$ctxNamespace" == "null" || "$override_namespace" == true ]]; then
setup_argo_db
- if (! helm status argo -n argo > /dev/null 2>&1 ) || [[ "$1" == "--force" ]]; then
- DBHOST=$(kubectl get secrets -n argo argo-db-creds -o json | jq -r .data.db_host | base64 -d)
- DBNAME=$(kubectl get secrets -n argo argo-db-creds -o json | jq -r .data.db_database | base64 -d)
- if [[ -z $(kubectl get secrets -n argo argo-s3-creds -o json | jq -r .data.internalbucketname | base64 -d) ]]; then
- BUCKET=$(kubectl get secrets -n argo argo-s3-creds -o json | jq -r .data.bucketname | base64 -d)
+ setup_argo_template_secret
+ if (! helm status argo -n $argo_namespace > /dev/null 2>&1 ) || [[ "$force" == true ]]; then
+ DBHOST=$(kubectl get secrets -n $argo_namespace argo-db-creds -o json | jq -r .data.db_host | base64 -d)
+ DBNAME=$(kubectl get secrets -n $argo_namespace argo-db-creds -o json | jq -r .data.db_database | base64 -d)
+ if [[ -z $internalBucketName ]]; then
+ BUCKET=$bucketName
else
- BUCKET=$(kubectl get secrets -n argo argo-s3-creds -o json | jq -r .data.internalbucketname | base64 -d)
+ BUCKET=$internalBucketName
fi
+
valuesFile="$XDG_RUNTIME_DIR/values_$$.yaml"
valuesTemplate="${GEN3_HOME}/kube/services/argo/values.yaml"
@@ -243,7 +246,7 @@ if [[ "$ctxNamespace" == "default" || "$ctxNamespace" == "null" ]]; then
helm repo add argo https://argoproj.github.io/argo-helm --force-update 2> >(grep -v 'This is insecure' >&2)
helm repo update 2> >(grep -v 'This is insecure' >&2)
- helm upgrade --install argo argo/argo-workflows -n argo -f ${valuesFile} --version 0.29.1
+ helm upgrade --install argo argo/argo-workflows -n $argo_namespace -f ${valuesFile} --version 0.29.1
else
gen3_log_info "kube-setup-argo exiting - argo already deployed, use --force to redeploy"
fi
diff --git a/gen3/bin/kube-setup-cedar-wrapper.sh b/gen3/bin/kube-setup-cedar-wrapper.sh
index 9a899a770..c8f0d03c6 100644
--- a/gen3/bin/kube-setup-cedar-wrapper.sh
+++ b/gen3/bin/kube-setup-cedar-wrapper.sh
@@ -1,6 +1,58 @@
source "${GEN3_HOME}/gen3/lib/utils.sh"
gen3_load "gen3/lib/kube-setup-init"
+create_client_and_secret() {
+ local hostname=$(gen3 api hostname)
+ local client_name="cedar_ingest_client"
+ gen3_log_info "kube-setup-cedar-wrapper" "creating fence ${client_name} for $hostname"
+ # delete any existing fence cedar clients
+ g3kubectl exec -c fence $(gen3 pod fence) -- fence-create client-delete --client ${client_name} > /dev/null 2>&1
+ local secrets=$(g3kubectl exec -c fence $(gen3 pod fence) -- fence-create client-create --client ${client_name} --grant-types client_credentials | tail -1)
+ # secrets looks like ('CLIENT_ID', 'CLIENT_SECRET')
+ if [[ ! $secrets =~ (\'(.*)\', \'(.*)\') ]]; then
+ gen3_log_err "kube-setup-cedar-wrapper" "Failed generating ${client_name}"
+ return 1
+ else
+ local client_id="${BASH_REMATCH[2]}"
+ local client_secret="${BASH_REMATCH[3]}"
+ gen3_log_info "Create cedar-client secrets file"
+ cat - < /dev/null 2>&1; then
+ local have_cedar_client_secret="1"
+ else
+ gen3_log_info "No g3auto cedar-client key present in secret"
+ fi
+
+ local client_name="cedar_ingest_client"
+ local client_list=$(g3kubectl exec -c fence $(gen3 pod fence) -- fence-create client-list)
+ local client_count=$(echo "$client_list=" | grep -cE "'name':.*'${client_name}'")
+ gen3_log_info "CEDAR client count = ${client_count}"
+
+ if [[ -z $have_cedar_client_secret ]] || [[ ${client_count} -lt 1 ]]; then
+ gen3_log_info "Creating new cedar-ingest client and secret"
+ local credsPath="$(gen3_secrets_folder)/g3auto/cedar/${cedar_creds_file}"
+ if ! create_client_and_secret > $credsPath; then
+ gen3_log_err "Failed to setup cedar-ingest secret"
+ return 1
+ else
+ gen3 secrets sync
+ gen3 job run usersync
+ fi
+ fi
+}
+
[[ -z "$GEN3_ROLL_ALL" ]] && gen3 kube-setup-secrets
if ! g3kubectl get secrets/cedar-g3auto > /dev/null 2>&1; then
@@ -8,6 +60,9 @@ if ! g3kubectl get secrets/cedar-g3auto > /dev/null 2>&1; then
return 1
fi
+gen3_log_info "Checking cedar-client creds"
+setup_creds
+
if ! gen3 secrets decode cedar-g3auto cedar_api_key.txt > /dev/null 2>&1; then
gen3_log_err "No CEDAR api key present in cedar-g3auto secret, not rolling CEDAR wrapper"
return 1
diff --git a/gen3/bin/kube-setup-cohort-middleware.sh b/gen3/bin/kube-setup-cohort-middleware.sh
index 91b414849..a6a024578 100644
--- a/gen3/bin/kube-setup-cohort-middleware.sh
+++ b/gen3/bin/kube-setup-cohort-middleware.sh
@@ -7,6 +7,10 @@ gen3_load "gen3/lib/kube-setup-init"
setup_secrets() {
gen3_log_info "Deploying secrets for cohort-middleware"
# subshell
+ if [[ -n "$JENKINS_HOME" ]]; then
+ gen3_log_err "skipping secrets setup in non-adminvm environment"
+ return 0
+ fi
(
if ! dbcreds="$(gen3 db creds ohdsi)"; then
@@ -17,7 +21,7 @@ setup_secrets() {
mkdir -p $(gen3_secrets_folder)/g3auto/cohort-middleware
credsFile="$(gen3_secrets_folder)/g3auto/cohort-middleware/development.yaml"
- if [[ (! -f "$credsFile") && -z "$JENKINS_HOME" ]]; then
+ if [[ (! -f "$credsFile") ]]; then
DB_NAME=$(jq -r ".db_database" <<< "$dbcreds")
export DB_NAME
DB_USER=$(jq -r ".db_username" <<< "$dbcreds")
@@ -46,17 +50,17 @@ EOM
fi
gen3 secrets sync "initialize cohort-middleware/development.yaml"
-
- # envsubst <"${GEN3_HOME}/kube/services/cohort-middleware/development.yaml" | g3kubectl create secret generic cohort-middleware-config --from-file=development.yaml=/dev/stdin
)
}
# main --------------------------------------
-setup_secrets
-
-gen3 roll cohort-middleware
-g3kubectl apply -f "${GEN3_HOME}/kube/services/cohort-middleware/cohort-middleware-service.yaml"
-cat < /dev/null 2>&1; then
+ export DICOM_SERVER_URL="/dicom-server"
+ gen3_log_info "attaching ohif viewer to old dicom-server (orthanc w/ aurora)"
+ fi
+
+ if g3k_manifest_lookup .versions["orthanc"] > /dev/null 2>&1; then
+ export DICOM_SERVER_URL="/orthanc"
+ gen3_log_info "attaching ohif viewer to new dicom-server (orthanc w/ s3)"
+ fi
+
+ envsubst <"${GEN3_HOME}/kube/services/ohif-viewer/app-config.js" > "$secretsFolder/app-config.js"
+
gen3 secrets sync 'setup orthanc-s3-g3auto secrets'
}
diff --git a/gen3/bin/kube-setup-ecr-access-cronjob.sh b/gen3/bin/kube-setup-ecr-access-cronjob.sh
new file mode 100644
index 000000000..5c645ad35
--- /dev/null
+++ b/gen3/bin/kube-setup-ecr-access-cronjob.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+source "${GEN3_HOME}/gen3/lib/utils.sh"
+gen3_load "gen3/lib/kube-setup-init"
+
+setup_ecr_access_job() {
+ if g3kubectl get configmap manifest-global > /dev/null; then
+ ecrRoleArn=$(g3kubectl get configmap manifest-global -o jsonpath={.data.ecr-access-job-role-arn})
+ fi
+ if [ -z "$ecrRoleArn" ]; then
+ gen3_log_err "Missing 'global.ecr-access-job-role-arn' configuration in manifest.json"
+ return 1
+ fi
+
+ local saName="ecr-access-job-sa"
+ if ! g3kubectl get sa "$saName" > /dev/null 2>&1; then
+ tempFile="ecr-access-job-policy.json"
+ cat - > $tempFile < /dev/null
+ cat ${GEN3_HOME}/kube/services/fluentd/gen3-1.15.3.conf | tee ${fluentdConfigmap} > /dev/null
gen3 update_config fluentd-gen3 "${fluentdConfigmap}"
rm ${fluentdConfigmap}
else
@@ -54,7 +54,7 @@ if [[ "$ctxNamespace" == "default" || "$ctxNamespace" == "null" ]]; then
fi
# We need this serviceaccount to be in the default namespace for the job and cronjob to properly work
g3kubectl apply -f "${GEN3_HOME}/kube/services/fluentd/fluent-jobs-serviceaccount.yaml" -n default
- if [ ${fluentdVersion} == "v1.10.2-debian-cloudwatch-1.0" ];
+ if [ ${fluentdVersion} == "v1.15.3-debian-cloudwatch-1.0" ];
then
(
unset KUBECTL_NAMESPACE
diff --git a/gen3/bin/kube-setup-gen3-discovery-ai.sh b/gen3/bin/kube-setup-gen3-discovery-ai.sh
new file mode 100644
index 000000000..44a472a74
--- /dev/null
+++ b/gen3/bin/kube-setup-gen3-discovery-ai.sh
@@ -0,0 +1,154 @@
+#!/bin/bash
+#
+# Deploy the gen3-discovery-ai service
+#
+
+source "${GEN3_HOME}/gen3/lib/utils.sh"
+gen3_load "gen3/gen3setup"
+
+# NOTE: no db for this service yet, but we'll likely need it in the future
+setup_database() {
+ gen3_log_info "setting up gen3-discovery-ai service ..."
+
+ if g3kubectl describe secret gen3-discovery-ai-g3auto > /dev/null 2>&1; then
+ gen3_log_info "gen3-discovery-ai-g3auto secret already configured"
+ return 0
+ fi
+ if [[ -n "$JENKINS_HOME" || ! -f "$(gen3_secrets_folder)/creds.json" ]]; then
+ gen3_log_err "skipping db setup in non-adminvm environment"
+ return 0
+ fi
+ # Setup .env file that gen3-discovery-ai service consumes
+ if [[ ! -f "$secretsFolder/gen3-discovery-ai.env" || ! -f "$secretsFolder/base64Authz.txt" ]]; then
+ local secretsFolder="$(gen3_secrets_folder)/g3auto/gen3-discovery-ai"
+
+ if [[ ! -f "$secretsFolder/dbcreds.json" ]]; then
+ if ! gen3 db setup gen3-discovery-ai; then
+ gen3_log_err "Failed setting up database for gen3-discovery-ai service"
+ return 1
+ fi
+ fi
+ if [[ ! -f "$secretsFolder/dbcreds.json" ]]; then
+ gen3_log_err "dbcreds not present in Gen3Secrets/"
+ return 1
+ fi
+
+ # go ahead and rotate the password whenever we regen this file
+ local password="$(gen3 random)"
+ cat - > "$secretsFolder/gen3-discovery-ai.env" < "$secretsFolder/base64Authz.txt"
+ fi
+ gen3 secrets sync 'setup gen3-discovery-ai-g3auto secrets'
+}
+
+if ! g3k_manifest_lookup '.versions."gen3-discovery-ai"' 2> /dev/null; then
+ gen3_log_info "kube-setup-gen3-discovery-ai exiting - gen3-discovery-ai service not in manifest"
+ exit 0
+fi
+
+# There's no db for this service *yet*
+#
+# if ! setup_database; then
+# gen3_log_err "kube-setup-gen3-discovery-ai bailing out - database failed setup"
+# exit 1
+# fi
+
+setup_storage() {
+ local saName="gen3-discovery-ai-sa"
+ g3kubectl create sa "$saName" > /dev/null 2>&1 || true
+
+ local secret
+ local secretsFolder="$(gen3_secrets_folder)/g3auto/gen3-discovery-ai"
+
+ secret="$(g3kubectl get secret gen3-discovery-ai-g3auto -o json 2> /dev/null)"
+ local hasStorageCfg
+ hasStorageCfg=$(jq -r '.data | has("storage_config.json")' <<< "$secret")
+
+ if [ "$hasStorageCfg" = "false" ]; then
+ gen3_log_info "setting up storage for gen3-discovery-ai service"
+ #
+ # gen3-discovery-ai-g3auto secret still does not exist
+ # we need to setup an S3 bucket and IAM creds
+ # let's avoid creating multiple buckets for different
+ # deployments to the same k8s cluster (dev, etc)
+ #
+ local bucketName
+ local accountNumber
+ local environment
+
+ if ! accountNumber="$(aws sts get-caller-identity --output text --query 'Account')"; then
+ gen3_log_err "could not determine account numer"
+ return 1
+ fi
+
+ gen3_log_info "accountNumber: ${accountNumber}"
+
+ if ! environment="$(g3kubectl get configmap manifest-global -o json | jq -r .data.environment)"; then
+ gen3_log_err "could not determine environment from manifest-global - bailing out of gen3-discovery-ai setup"
+ return 1
+ fi
+
+ gen3_log_info "environment: ${environment}"
+
+ # try to come up with a unique but composable bucket name
+ bucketName="gen3-discovery-ai-${accountNumber}-${environment//_/-}"
+
+ gen3_log_info "bucketName: ${bucketName}"
+
+ if aws s3 ls --page-size 1 "s3://${bucketName}" > /dev/null 2>&1; then
+ gen3_log_info "${bucketName} s3 bucket already exists - probably in use by another namespace - copy the creds from there to $(gen3_secrets_folder)/g3auto/gen3-discovery-ai"
+ # continue on ...
+ elif ! gen3 s3 create "${bucketName}"; then
+ gen3_log_err "maybe failed to create bucket ${bucketName}, but maybe not, because the terraform script is flaky"
+ fi
+
+ local hostname
+ hostname="$(gen3 api hostname)"
+ jq -r -n --arg bucket "${bucketName}" --arg hostname "${hostname}" '.bucket=$bucket | .prefix=$hostname' > "${secretsFolder}/storage_config.json"
+ gen3 secrets sync 'setup gen3-discovery-ai credentials'
+
+ local roleName
+ roleName="$(gen3 api safe-name gen3-discovery-ai)" || return 1
+
+ if ! gen3 awsrole info "$roleName" > /dev/null; then # setup role
+ bucketName="$( (gen3 secrets decode 'gen3-discovery-ai-g3auto' 'storage_config.json' || echo ERROR) | jq -r .bucket)" || return 1
+ gen3 awsrole create "$roleName" "$saName" || return 1
+ gen3 s3 attach-bucket-policy "$bucketName" --read-write --role-name "${roleName}"
+ # try to give the gitops role read/write permissions on the bucket
+ local gitopsRoleName
+ gitopsRoleName="$(gen3 api safe-name gitops)"
+ gen3 s3 attach-bucket-policy "$bucketName" --read-write --role-name "${gitopsRoleName}"
+ fi
+ fi
+
+ return 0
+}
+
+if ! setup_storage; then
+ gen3_log_err "kube-setup-gen3-discovery-ai bailing out - storage failed setup"
+ exit 1
+fi
+
+gen3_log_info "Setup complete, syncing configuration to bucket"
+
+bucketName="$( (gen3 secrets decode 'gen3-discovery-ai-g3auto' 'storage_config.json' || echo ERROR) | jq -r .bucket)" || exit 1
+aws s3 sync "$(dirname $(g3k_manifest_path))/gen3-discovery-ai/knowledge" "s3://$bucketName" --delete
+
+gen3 roll gen3-discovery-ai
+g3kubectl apply -f "${GEN3_HOME}/kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml"
+
+if [[ -z "$GEN3_ROLL_ALL" ]]; then
+ gen3 kube-setup-networkpolicy
+ gen3 kube-setup-revproxy
+fi
+
+gen3_log_info "The gen3-discovery-ai service has been deployed onto the kubernetes cluster"
+gen3_log_info "test with: curl https://commons-host/ai"
diff --git a/gen3/bin/kube-setup-hatchery.sh b/gen3/bin/kube-setup-hatchery.sh
index 07172aa1e..bdcff8ed0 100644
--- a/gen3/bin/kube-setup-hatchery.sh
+++ b/gen3/bin/kube-setup-hatchery.sh
@@ -20,11 +20,81 @@ gen3 jupyter j-namespace setup
#
(g3k_kv_filter ${GEN3_HOME}/kube/services/hatchery/serviceaccount.yaml BINDING_ONE "name: hatchery-binding1-$namespace" BINDING_TWO "name: hatchery-binding2-$namespace" CURRENT_NAMESPACE "namespace: $namespace" | g3kubectl apply -f -) || true
+function exists_or_create_gen3_license_table() {
+ # Create dynamodb table for gen3-license if it does not exist.
+ TARGET_TABLE="$1"
+ echo "Checking for dynamoDB table: ${TARGET_TABLE}"
-# cron job to distribute licenses if using Stata workspaces
-if [ "$(g3kubectl get configmaps/manifest-hatchery -o yaml | grep "\"image\": .*stata.*")" ];
-then
- gen3 job cron distribute-licenses '* * * * *'
+ FOUND_TABLE=`aws dynamodb list-tables | jq -r .TableNames | jq -c -r '.[]' | grep $TARGET_TABLE`
+ if [ -n "$FOUND_TABLE" ]; then
+ echo "Target table already exists in dynamoDB: $FOUND_TABLE"
+ else
+ echo "Creating table ${TARGET_TABLE}"
+ GSI=`g3kubectl get configmaps/manifest-hatchery -o json | jq -r '.data."license-user-maps-global-secondary-index"'`
+ if [[ -z "$GSI" || "$GSI" == "null" ]]; then
+ echo "Error: No global-secondary-index in configuration"
+ return 0
+ fi
+ aws dynamodb create-table \
+ --no-cli-pager \
+ --table-name "$TARGET_TABLE" \
+ --attribute-definitions AttributeName=itemId,AttributeType=S \
+ AttributeName=environment,AttributeType=S \
+ AttributeName=isActive,AttributeType=S \
+ --key-schema AttributeName=itemId,KeyType=HASH \
+ AttributeName=environment,KeyType=RANGE \
+ --provisioned-throughput ReadCapacityUnits=5,WriteCapacityUnits=5 \
+ --global-secondary-indexes \
+ "[
+ {
+ \"IndexName\": \"$GSI\",
+ \"KeySchema\": [{\"AttributeName\":\"environment\",\"KeyType\":\"HASH\"},
+ {\"AttributeName\":\"isActive\",\"KeyType\":\"RANGE\"}],
+ \"Projection\":{
+ \"ProjectionType\":\"INCLUDE\",
+ \"NonKeyAttributes\":[\"itemId\",\"userId\",\"licenseId\",\"licenseType\"]
+ },
+ \"ProvisionedThroughput\": {
+ \"ReadCapacityUnits\": 5,
+ \"WriteCapacityUnits\": 3
+ }
+ }
+ ]"
+ fi
+}
+
+TARGET_TABLE=`g3kubectl get configmaps/manifest-hatchery -o json | jq -r '.data."license-user-maps-dynamodb-table"'`
+if [[ -z "$TARGET_TABLE" || "$TARGET_TABLE" == "null" ]]; then
+ echo "No gen3-license table in configuration"
+ # cron job to distribute licenses if using Stata workspaces but not using dynamoDB
+ if [ "$(g3kubectl get configmaps/manifest-hatchery -o yaml | grep "\"image\": .*stata.*")" ];
+ then
+ gen3 job cron distribute-licenses '* * * * *'
+ fi
+else
+ echo "Found gen3-license table in configuration: $TARGET_TABLE"
+ exists_or_create_gen3_license_table "$TARGET_TABLE"
+fi
+
+# if `nextflow-global.imagebuilder-reader-role-arn` is set in hatchery config, allow hatchery
+# to assume the configured role
+imagebuilderRoleArn=$(g3kubectl get configmap manifest-hatchery -o jsonpath={.data.nextflow-global} | jq -r '."imagebuilder-reader-role-arn"')
+assumeImageBuilderRolePolicyBlock=""
+if [ -z "$imagebuilderRoleArn" ]; then
+ gen3_log_info "No 'nexftlow-global.imagebuilder-reader-role-arn' in Hatchery configuration, not granting AssumeRole"
+else
+ gen3_log_info "Found 'nexftlow-global.imagebuilder-reader-role-arn' in Hatchery configuration, granting AssumeRole"
+ assumeImageBuilderRolePolicyBlock=$( cat < /dev/null 2>&1; then
roleName="$(gen3 api safe-name hatchery-sa)"
gen3 awsrole create $roleName $saName
policyName="$(gen3 api safe-name hatchery-policy)"
- policyInfo=$(gen3_aws_run aws iam create-policy --policy-name "$policyName" --policy-document "$policy" --description "Allow hathcery to assume csoc_adminvm role in other accounts, for multi-account workspaces")
+ policyInfo=$(gen3_aws_run aws iam create-policy --policy-name "$policyName" --policy-document "$policy" --description "Allow hatchery to assume csoc_adminvm role in other accounts and manage dynamodb for multi-account workspaces, and to create resources for nextflow workspaces")
if [ -n "$policyInfo" ]; then
- policyArn="$(jq -e -r '.["Policy"].Arn' <<< "$policyInfo")" || { echo "Cannot get 'Policy.Arn' from output: $policyInfo"; return 1; }
+ policyArn="$(jq -e -r '.["Policy"].Arn' <<< "$policyInfo")" || { echo "Cannot get 'Policy.Arn' from output: $policyInfo"; return 1; }
else
- echo "Unable to create policy $policyName. Assuming it already exists and continuing"
+ echo "Unable to create policy '$policyName'. Assume it already exists and create a new version to update the permissions..."
policyArn=$(gen3_aws_run aws iam list-policies --query "Policies[?PolicyName=='$policyName'].Arn" --output text)
- fi
+ # there can only be up to 5 versions, so delete old versions (except the current default one)
+ versions="$(gen3_aws_run aws iam list-policy-versions --policy-arn $policyArn | jq -r '.Versions[] | select(.IsDefaultVersion != true) | .VersionId')"
+ versions=(${versions}) # string to array
+ for v in "${versions[@]}"; do
+ echo "Deleting old version '$v'"
+ gen3_aws_run aws iam delete-policy-version --policy-arn $policyArn --version-id $v
+ done
+
+ # create the new version
+ gen3_aws_run aws iam create-policy-version --policy-arn "$policyArn" --policy-document "$policy" --set-as-default
+ fi
gen3_log_info "Attaching policy '${policyName}' to role '${roleName}'"
gen3 awsrole attach-policy ${policyArn} --role-name ${roleName} --force-aws-cli || exit 1
gen3 awsrole attach-policy "arn:aws:iam::aws:policy/AWSResourceAccessManagerFullAccess" --role-name ${roleName} --force-aws-cli || exit 1
diff --git a/gen3/bin/kube-setup-ingress.sh b/gen3/bin/kube-setup-ingress.sh
index d0bcff9a4..b75470f73 100644
--- a/gen3/bin/kube-setup-ingress.sh
+++ b/gen3/bin/kube-setup-ingress.sh
@@ -232,6 +232,28 @@ gen3_ingress_setup_role() {
}
}
},
+ {
+ "Effect": "Allow",
+ "Action": [
+ "elasticloadbalancing:AddTags"
+ ],
+ "Resource": [
+ "arn:aws:elasticloadbalancing:*:*:targetgroup/*/*",
+ "arn:aws:elasticloadbalancing:*:*:loadbalancer/net/*/*",
+ "arn:aws:elasticloadbalancing:*:*:loadbalancer/app/*/*"
+ ],
+ "Condition": {
+ "StringEquals": {
+ "elasticloadbalancing:CreateAction": [
+ "CreateTargetGroup",
+ "CreateLoadBalancer"
+ ]
+ },
+ "Null": {
+ "aws:RequestTag/elbv2.k8s.aws/cluster": "false"
+ }
+ }
+ },
{
"Effect": "Allow",
"Action": [
@@ -329,4 +351,4 @@ g3kubectl apply -f "${GEN3_HOME}/kube/services/revproxy/revproxy-service.yaml"
envsubst <$scriptDir/ingress.yaml | g3kubectl apply -f -
if [ "$deployWaf" = true ]; then
gen3_ingress_setup_waf
-fi
\ No newline at end of file
+fi
diff --git a/gen3/bin/kube-setup-jenkins2.sh b/gen3/bin/kube-setup-jenkins2.sh
new file mode 100644
index 000000000..f5233f978
--- /dev/null
+++ b/gen3/bin/kube-setup-jenkins2.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+#
+# Just a little helper for deploying jenkins onto k8s the first time
+#
+
+set -e
+
+export WORKSPACE="${WORKSPACE:-$HOME}"
+source "${GEN3_HOME}/gen3/lib/utils.sh"
+gen3_load "gen3/gen3setup"
+
+gen3 kube-setup-secrets
+
+#
+# Assume Jenkins should use 'jenkins' profile credentials in "${WORKSPACE}"/.aws/credentials
+#
+aws_access_key_id="$(aws configure get jenkins.aws_access_key_id)"
+aws_secret_access_key="$(aws configure get jenkins.aws_secret_access_key)"
+google_acct1_email="$(jq -r '.jenkins.google_acct1.email' < $(gen3_secrets_folder)/creds.json)"
+google_acct1_password="$(jq -r '.jenkins.google_acct1.password' < $(gen3_secrets_folder)/creds.json)"
+google_acct2_email="$(jq -r '.jenkins.google_acct2.email' < $(gen3_secrets_folder)/creds.json)"
+google_acct2_password="$(jq -r '.jenkins.google_acct2.password' < $(gen3_secrets_folder)/creds.json)"
+
+if [ -z "$aws_access_key_id" -o -z "$aws_secret_access_key" ]; then
+ gen3_log_err 'not configuring jenkins - could not extract secrets from aws configure'
+ exit 1
+fi
+if [[ -z "$google_acct1_email" || -z "$google_acct1_password" || -z "$google_acct2_email" || -z "$google_acct2_password" ]]; then
+ gen3_log_err "missing google credentials in '.jenkins' of creds.json"
+ exit 1
+fi
+
+if ! g3kubectl get secrets jenkins-secret > /dev/null 2>&1; then
+ # make it easy to rerun kube-setup-jenkins.sh
+ g3kubectl create secret generic jenkins-secret "--from-literal=aws_access_key_id=$aws_access_key_id" "--from-literal=aws_secret_access_key=$aws_secret_access_key"
+fi
+if ! g3kubectl get secrets google-acct1 > /dev/null 2>&1; then
+ g3kubectl create secret generic google-acct1 "--from-literal=email=${google_acct1_email}" "--from-literal=password=${google_acct1_password}"
+fi
+if ! g3kubectl get secrets google-acct2 > /dev/null 2>&1; then
+ g3kubectl create secret generic google-acct2 "--from-literal=email=${google_acct2_email}" "--from-literal=password=${google_acct2_password}"
+fi
+
+if ! g3kubectl get storageclass gp2 > /dev/null 2>&1; then
+ g3kubectl apply -f "${GEN3_HOME}/kube/services/jenkins/10storageclass.yaml"
+fi
+if ! g3kubectl get persistentvolumeclaim datadir-jenkins > /dev/null 2>&1; then
+ g3kubectl apply -f "${GEN3_HOME}/kube/services/jenkins/00pvc.yaml"
+fi
+
+# Note: jenkins service account is configured by `kube-setup-roles`
+gen3 kube-setup-roles
+# Note: only the 'default' namespace jenkins-service account gets a cluster rolebinding
+g3kubectl apply -f "${GEN3_HOME}/kube/services/jenkins/clusterrolebinding-devops.yaml"
+
+# Note: requires Jenkins entry in cdis-manifest
+gen3 roll jenkins2
+gen3 roll jenkins2-worker
+gen3 roll jenkins2-ci-worker
+
+#
+# Get the ARN of the SSL certificate for the commons -
+# We'll optimistically assume it's a wildcard cert that
+# is appropriate to also attach to the jenkins ELB
+#
+export ARN=$(g3kubectl get configmap global --output=jsonpath='{.data.revproxy_arn}')
+if [[ ! -z $ARN ]]; then
+ envsubst <"${GEN3_HOME}/kube/services/jenkins/jenkins-service.yaml" | g3kubectl apply -f -
+else
+ gen3_log_info "Global configmap not configured - not launching service (require SSL cert ARN)"
+fi
diff --git a/gen3/bin/kube-setup-karpenter.sh b/gen3/bin/kube-setup-karpenter.sh
index bada6e69e..949c1ccd1 100644
--- a/gen3/bin/kube-setup-karpenter.sh
+++ b/gen3/bin/kube-setup-karpenter.sh
@@ -23,8 +23,10 @@ gen3_deploy_karpenter() {
if g3k_config_lookup .global.karpenter_version; then
karpenter=$(g3k_config_lookup .global.karpenter_version)
fi
- export clusterversion=`kubectl version --short -o json | jq -r .serverVersion.minor`
- if [ "${clusterversion}" = "24+" ]; then
+ export clusterversion=`kubectl version -o json | jq -r .serverVersion.minor`
+ if [ "${clusterversion}" = "25+" ]; then
+ karpenter=${karpenter:-v0.27.0}
+ elif [ "${clusterversion}" = "24+" ]; then
karpenter=${karpenter:-v0.24.0}
else
karpenter=${karpenter:-v0.22.0}
@@ -77,6 +79,14 @@ gen3_deploy_karpenter() {
"Effect": "Allow",
"Resource": "*",
"Sid": "ConditionalEC2Termination"
+ },
+ {
+ "Sid": "VisualEditor0",
+ "Effect": "Allow",
+ "Action": [
+ "kms:*"
+ ],
+ "Resource": "*"
}
],
"Version": "2012-10-17"
@@ -140,7 +150,11 @@ gen3_deploy_karpenter() {
--set serviceAccount.name=karpenter \
--set serviceAccount.create=false \
--set controller.env[0].name=AWS_REGION \
- --set controller.env[0].value=us-east-1
+ --set controller.env[0].value=us-east-1 \
+ --set controller.resources.requests.memory="2Gi" \
+ --set controller.resources.requests.cpu="2" \
+ --set controller.resources.limits.memory="2Gi" \
+ --set controller.resources.limits.cpu="2"
fi
gen3 awsrole sa-annotate karpenter "karpenter-controller-role-$vpc_name" karpenter
gen3_log_info "Remove cluster-autoscaler"
diff --git a/gen3/bin/kube-setup-ohdsi.sh b/gen3/bin/kube-setup-ohdsi.sh
index d586570db..3d8165547 100644
--- a/gen3/bin/kube-setup-ohdsi.sh
+++ b/gen3/bin/kube-setup-ohdsi.sh
@@ -14,13 +14,8 @@ new_client() {
local secrets=$(g3kubectl exec -c fence $(gen3 pod fence) -- fence-create client-create --client atlas --urls https://${atlas_hostname}/WebAPI/user/oauth/callback?client_name=OidcClient --username atlas --allowed-scopes openid profile email user | tail -1)
# secrets looks like ('CLIENT_ID', 'CLIENT_SECRET')
if [[ ! $secrets =~ (\'(.*)\', \'(.*)\') ]]; then
- # try delete client
- g3kubectl exec -c fence $(gen3 pod fence) -- fence-create client-delete --client atlas > /dev/null 2>&1
- secrets=$(g3kubectl exec -c fence $(gen3 pod fence) -- fence-create client-create --client atlas --urls https://${atlas_hostname}/WebAPI/user/oauth/callback?client_name=OidcClient --username atlas --allowed-scopes openid profile email user | tail -1)
- if [[ ! $secrets =~ (\'(.*)\', \'(.*)\') ]]; then
- gen3_log_err "kube-setup-ohdsi" "Failed generating oidc client for atlas: $secrets"
- return 1
- fi
+ gen3_log_err "kube-setup-ohdsi" "Failed generating oidc client for atlas: $secrets"
+ return 1
fi
local FENCE_CLIENT_ID="${BASH_REMATCH[2]}"
local FENCE_CLIENT_SECRET="${BASH_REMATCH[3]}"
@@ -87,6 +82,8 @@ setup_secrets() {
export DB_HOST=$(jq -r ".db_host" <<< "$dbcreds")
export FENCE_URL="https://${hostname}/user/user"
+ # get arborist_url from manifest.json:
+ export ARBORIST_URL=$(g3k_manifest_lookup .global.arborist_url)
export FENCE_METADATA_URL="https://${hostname}/.well-known/openid-configuration"
export FENCE_CLIENT_ID=$(jq -r ".FENCE_CLIENT_ID" <<< "$appcreds")
export FENCE_CLIENT_SECRET=$(jq -r ".FENCE_CLIENT_SECRET" <<< "$appcreds")
diff --git a/gen3/bin/kube-setup-revproxy.sh b/gen3/bin/kube-setup-revproxy.sh
index fcc2ef3b7..fd30b478b 100644
--- a/gen3/bin/kube-setup-revproxy.sh
+++ b/gen3/bin/kube-setup-revproxy.sh
@@ -111,15 +111,14 @@ for name in $(g3kubectl get services -o json | jq -r '.items[] | .metadata.name'
fi
done
-if g3kubectl get namespace argo > /dev/null 2>&1;
-then
- for argo in $(g3kubectl get services -n argo -o jsonpath='{.items[*].metadata.name}');
- do
- filePath="$scriptDir/gen3.nginx.conf/${argo}.conf"
- if [[ -f "$filePath" ]]; then
- confFileList+=("--from-file" "$filePath")
- fi
- done
+
+if g3k_manifest_lookup .argo.argo_server_service_url 2> /dev/null; then
+ argo_server_service_url=$(g3k_manifest_lookup .argo.argo_server_service_url)
+ g3k_kv_filter "${scriptDir}/gen3.nginx.conf/argo-server.conf" SERVICE_URL "${argo_server_service_url}" > /tmp/argo-server-with-url$(gen3 db namespace).conf
+ filePath="/tmp/argo-server-with-url$(gen3 db namespace).conf"
+ if [[ -f "$filePath" ]]; then
+ confFileList+=("--from-file" "$filePath")
+ fi
fi
if g3kubectl get namespace argocd > /dev/null 2>&1;
diff --git a/gen3/bin/kube-setup-system-services.sh b/gen3/bin/kube-setup-system-services.sh
index 609ee01c7..c26a04cb5 100644
--- a/gen3/bin/kube-setup-system-services.sh
+++ b/gen3/bin/kube-setup-system-services.sh
@@ -19,7 +19,7 @@ gen3_load "gen3/gen3setup"
kubeproxy=${kubeproxy:-1.24.7}
coredns=${coredns:-1.8.7}
kubednsautoscaler=${kubednsautoscaler:-1.8.6}
-cni=${cni:-1.12.2}
+cni=${cni:-1.14.1}
calico=${calico:-1.7.8}
@@ -39,7 +39,7 @@ calico_yaml="https://raw.githubusercontent.com/aws/amazon-vpc-cni-k8s/v${calico}
g3kubectl set image daemonset.apps/kube-proxy -n kube-system kube-proxy=${kube_proxy_image}
g3kubectl set image --namespace kube-system deployment.apps/coredns coredns=${coredns_image}
-g3k_kv_filter "${GEN3_HOME}/kube/services/kube-dns-autoscaler/dns-horizontal-autoscaler.yaml" SERVICE "coredns" IMAGE "$kubednsautoscaler_image" | g3kubectl apply -f -
+#g3k_kv_filter "${GEN3_HOME}/kube/services/kube-dns-autoscaler/dns-horizontal-autoscaler.yaml" SERVICE "coredns" IMAGE "$kubednsautoscaler_image" | g3kubectl apply -f -
g3kubectl apply -f ${cni_image}
g3kubectl apply -f ${calico_yaml}
diff --git a/gen3/bin/kube-setup-wts.sh b/gen3/bin/kube-setup-wts.sh
index b807da2d5..ad8211d03 100644
--- a/gen3/bin/kube-setup-wts.sh
+++ b/gen3/bin/kube-setup-wts.sh
@@ -42,6 +42,8 @@ new_client() {
"oidc_client_id": "$client_id",
"oidc_client_secret": "$client_secret",
+ "aggregate_endpoint_allowlist": ["/authz/mapping"],
+
"external_oidc": []
}
EOM
diff --git a/gen3/bin/migrate-to-vpc-cni.sh b/gen3/bin/migrate-to-vpc-cni.sh
new file mode 100644
index 000000000..510d9ebef
--- /dev/null
+++ b/gen3/bin/migrate-to-vpc-cni.sh
@@ -0,0 +1,138 @@
+#!/bin/bash
+
+source "${GEN3_HOME}/gen3/lib/utils.sh"
+gen3_load "gen3/gen3setup"
+
+#Get the K8s NS
+ctx="$(g3kubectl config current-context)"
+ctxNamespace="$(g3kubectl config view -ojson | jq -r ".contexts | map(select(.name==\"$ctx\")) | .[0] | .context.namespace")"
+
+# Set the cluster name variable
+CLUSTER_NAME=`gen3 api environment`
+
+# Check if in default ns
+if [[ ("$ctxNamespace" != "default" && "$ctxNamespace" != "null") ]]; then
+ gen3_log_err "Namespace must be default"
+ exit 1
+fi
+
+# Cd into Cloud-automation repo and pull the latest from master
+gen3_log_info "Pulling the latest from Cloud-Auto"
+cd /home/$CLUSTER_NAME/cloud-automation || { gen3_log_err "Cloud-automation repo not found"; exit 1; }
+#### Change to master
+git checkout master || { gen3_log_err "Failed to checkout master branch"; exit 1; }
+git pull || { gen3_log_err "Failed to pull from the repository"; exit 1; }
+
+# Update the Karpenter Node Template
+gen3_log_info "Apply new Karpenter Node Template"
+if [[ -d $(g3k_manifest_init)/$(g3k_hostname)/manifests/karpenter ]]; then
+ gen3_log_info "Karpenter setup in manifest. Open a cdismanifest PR and add this line to aws node templates: https://github.com/uc-cdis/cloud-automation/blob/master/kube/services/karpenter/nodeTemplateDefault.yaml#L40"
+ while true; do
+ read -p "Have you updated your manifest? (yes/no): " yn
+ case $yn in
+ [Yy]* )
+ gen3_log_info "Proceeding with Karpenter deployment..."
+ gen3 kube-setup-karpenter deploy --force || { gen3_log_err "kube-setup-karpenter failed"; exit 1; }
+ break
+ ;;
+ [Nn]* )
+ gen3_log_info "Please update the cdismanifest before proceeding."
+ exit 1
+ ;;
+ * )
+ gen3_log_info "Please answer yes or no."
+ ;;
+ esac
+ done
+else
+ gen3 kube-setup-karpenter deploy --force || { gen3_log_err "kube-setup-karpenter failed"; exit 1; }
+fi
+
+# Cordon all the nodes before running gen3 roll all"
+gen3_log_info "Cordoning all nodes"
+kubectl get nodes --no-headers -o custom-columns=":metadata.name" | grep -v '^fargate' | xargs -I{} kubectl cordon {}
+
+# Run a "gen3 roll all" so all nodes use the new mounted BPF File System
+gen3_log_info "Cycling all the nodes by running gen3 roll all"
+gen3 roll all --fast || exit 1
+
+# Confirm that all nodes have been rotated
+while true; do
+ read -p "Roll all complete. Have all cordoned nodes been rotated? (yes/no): " yn
+ case $yn in
+ [Yy]* )
+ gen3_log_info "Continuing with script..."
+ break
+ ;;
+ [Nn]* )
+ gen3_log_info "Please drain any remaining nodes with 'kubectl drain --ignore-daemonsets --delete-emptydir-data'"
+ ;;
+ * )
+ gen3_log_info "Please answer yes or no."
+ ;;
+ esac
+done
+
+
+# Delete all existing network policies
+gen3_log_info "Deleting networkpolicies"
+kubectl delete networkpolicies --all
+
+# Delete all Calico related resources from the “kube-system” namespace
+gen3_log_info "Deleting all Calico related resources"
+kubectl get deployments -n kube-system | grep calico | awk '{print $1}' | xargs kubectl delete deployment -n kube-system
+kubectl get daemonsets -n kube-system | grep calico | awk '{print $1}' | xargs kubectl delete daemonset -n kube-system
+kubectl get services -n kube-system | grep calico | awk '{print $1}' | xargs kubectl delete service -n kube-system
+kubectl get replicasets -n kube-system | grep calico | awk '{print $1}' | xargs kubectl delete replicaset -n kube-system
+
+# Backup the current VPC CNI configuration in case of rollback
+gen3_log_info "Backing up current VPC CNI Configuration..."
+kubectl get daemonset aws-node -n kube-system -o yaml > aws-k8s-cni-old.yaml || { gen3_log_err "Error backig up VPC CNI configuration"; exit 1; }
+
+# Check to ensure we are not using an AWS plugin to manage the VPC CNI Plugin
+if aws eks describe-addon --cluster-name "$CLUSTER_NAME" --addon-name vpc-cni --query addon.addonVersion --output text 2>/dev/null; then
+ gen3_log_err "Error: VPC CNI Plugin is managed by AWS. Please log into the AWS UI and delete the VPC CNI Plugin in Amazon EKS, then re-run this script."
+ exit 1
+else
+ gen3_log_info "No managed VPC CNI Plugin found, proceeding with the script."
+fi
+
+# Apply the new VPC CNI Version
+gen3_log_info "Applying new version of VPC CNI"
+g3kubectl apply -f https://raw.githubusercontent.com/aws/amazon-vpc-cni-k8s/v1.14.1/config/master/aws-k8s-cni.yaml || { gen3_log_err "Failed to apply new VPC CNI version"; exit 1; }
+
+# Check the version to make sure it updated
+NEW_VERSION=$(kubectl describe daemonset aws-node --namespace kube-system | grep amazon-k8s-cni: | cut -d : -f 3)
+gen3_log_info "Current version of aws-k8s-cni is: $NEW_VERSION"
+if [ "$NEW_VERSION" != "v1.14.1" ]; then
+ gen3_log_info "The version of aws-k8s-cni has not been updated correctly."
+ exit 1
+fi
+
+# Edit the amazon-vpc-cni configmap to enable network policy controller
+gen3_log_info "Enabling NetworkPolicies in VPC CNI Configmap"
+kubectl patch configmap -n kube-system amazon-vpc-cni --type merge -p '{"data":{"enable-network-policy-controller":"true"}}' || { gen3_log_err "Configmap patch failed"; exit 1; }
+
+# Edit the aws-node daemonset
+gen3_log_info "Enabling NetworkPolicies in aws-node Daemonset"
+kubectl patch daemonset aws-node -n kube-system --type=json -p='[{"op": "add", "path": "/spec/template/spec/containers/1/args", "value": ["--enable-network-policy=true", "--enable-ipv6=false", "--enable-cloudwatch-logs=false", "--metrics-bind-addr=:8162", "--health-probe-bind-addr=:8163"]}]' || { gen3_log_err "Daemonset edit failed"; exit 1; }
+
+# Ensure all the aws-nodes are running
+kubectl get pods -n kube-system | grep aws
+while true; do
+ read -p "Do all the aws-node pods in the kube-system ns have 2/2 containers running? (yes/no): " yn
+ case $yn in
+ [Yy]* )
+ gen3_log_info "Running kube-setup-networkpolicy..."
+ gen3 kube-setup-networkpolicy || exit 1
+ break
+ ;;
+ [Nn]* )
+ gen3_log_err "Look at aws-node logs to figure out what went wrong. View this document for more details: https://docs.google.com/document/d/1fcBTciQSSwjvHktEnO_7EObY-xR_EvJ2NtgUa70wvL8"
+ gen3_log_info "Rollback instructions are also available in the above document"
+ ;;
+ * )
+ gen3_log_info "Please answer yes or no."
+ ;;
+ esac
+done
\ No newline at end of file
diff --git a/gen3/bin/mutate-guppy-config-for-guppy-test.sh b/gen3/bin/mutate-guppy-config-for-guppy-test.sh
index de7da10d5..151bb7169 100644
--- a/gen3/bin/mutate-guppy-config-for-guppy-test.sh
+++ b/gen3/bin/mutate-guppy-config-for-guppy-test.sh
@@ -16,7 +16,7 @@ sed -i 's/\(.*\)"index": "\(.*\)_etl",$/\1"index": "jenkins_subject_alias",/' or
# for bloodpac-like envs
sed -i 's/\(.*\)"index": "\(.*\)_case",$/\1"index": "jenkins_subject_alias",/' original_guppy_config.yaml
# the pre-defined Canine index works with subject ONLY (never case)
-sed -i 's/\(.*\)"type": "case"$/\1"type": "subject"/' original_guppy_config.yaml
+# sed -i 's/\(.*\)"type": "case"$/\1"type": "subject"/' original_guppy_config.yaml
sed -i 's/\(.*\)"index": "\(.*\)_file",$/\1"index": "jenkins_file_alias",/' original_guppy_config.yaml
sed -i 's/\(.*\)"config_index": "\(.*\)_array-config",$/\1"config_index": "jenkins_configs_alias",/' original_guppy_config.yaml
diff --git a/gen3/lib/logs/snapshot.sh b/gen3/lib/logs/snapshot.sh
index 31cb80283..ae769a285 100644
--- a/gen3/lib/logs/snapshot.sh
+++ b/gen3/lib/logs/snapshot.sh
@@ -36,10 +36,11 @@ gen3_logs_snapshot_container() {
# Snapshot all the pods
#
gen3_logs_snapshot_all() {
+ # For each pod for which we can list the containers, get the pod name and get its list of containers
+ # (container names + initContainers names). Diplay them as lines of "".
g3kubectl get pods -o json | \
- jq -r '.items | map(select(.status.phase != "Pending" and .status.phase != "Unknown")) | map( {pod: .metadata.name, containers: .spec.containers | map(.name) } ) | map( .pod as $pod | .containers | map( { pod: $pod, cont: .})[]) | map(select(.cont != "pause" and .cont != "jupyterhub"))[] | .pod + " " + .cont' | \
+ jq -r '.items | map(select(.status.phase != "Pending" and .status.phase != "Unknown")) | .[] | .metadata.name as $pod | (.spec.containers + .spec.initContainers) | map(select(.name != "pause" and .name != "jupyterhub")) | .[] | {pod: $pod, cont: .name} | "\(.pod) \(.cont)"' | \
while read -r line; do
gen3_logs_snapshot_container $line
done
}
-
diff --git a/gen3/lib/testData/default/expectedFenceResult.yaml b/gen3/lib/testData/default/expectedFenceResult.yaml
index f6d76d790..98c360531 100644
--- a/gen3/lib/testData/default/expectedFenceResult.yaml
+++ b/gen3/lib/testData/default/expectedFenceResult.yaml
@@ -44,6 +44,13 @@ spec:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
+ preference:
+ matchExpressions:
+ - key: karpenter.sh/capacity-type
+ operator: In
+ values:
+ - on-demand
+ - weight: 99
preference:
matchExpressions:
- key: eks.amazonaws.com/capacityType
@@ -136,6 +143,7 @@ spec:
ports:
- containerPort: 80
- containerPort: 443
+ - containerPort: 6567
volumeMounts:
# -----------------------------------------------------------------------------
# DEPRECATED! Remove when all commons are no longer using local_settings.py
diff --git a/gen3/lib/testData/default/expectedSheepdogResult.yaml b/gen3/lib/testData/default/expectedSheepdogResult.yaml
index b9db85a36..a2bd3efcc 100644
--- a/gen3/lib/testData/default/expectedSheepdogResult.yaml
+++ b/gen3/lib/testData/default/expectedSheepdogResult.yaml
@@ -17,6 +17,7 @@ spec:
template:
metadata:
labels:
+ netnolimit: "yes"
app: sheepdog
release: production
public: "yes"
@@ -39,12 +40,19 @@ spec:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
+ preference:
+ matchExpressions:
+ - key: karpenter.sh/capacity-type
+ operator: In
+ values:
+ - spot
+ - weight: 99
preference:
matchExpressions:
- key: eks.amazonaws.com/capacityType
operator: In
values:
- - ONDEMAND
+ - SPOT
automountServiceAccountToken: false
volumes:
- name: config-volume
diff --git a/gen3/lib/testData/test1.manifest.g3k/expectedFenceResult.yaml b/gen3/lib/testData/test1.manifest.g3k/expectedFenceResult.yaml
index d4196c070..adc35ad2f 100644
--- a/gen3/lib/testData/test1.manifest.g3k/expectedFenceResult.yaml
+++ b/gen3/lib/testData/test1.manifest.g3k/expectedFenceResult.yaml
@@ -47,6 +47,13 @@ spec:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
+ preference:
+ matchExpressions:
+ - key: karpenter.sh/capacity-type
+ operator: In
+ values:
+ - on-demand
+ - weight: 99
preference:
matchExpressions:
- key: eks.amazonaws.com/capacityType
diff --git a/gen3/lib/testData/test1.manifest.g3k/expectedSheepdogResult.yaml b/gen3/lib/testData/test1.manifest.g3k/expectedSheepdogResult.yaml
index f54fd3e03..08407ae52 100644
--- a/gen3/lib/testData/test1.manifest.g3k/expectedSheepdogResult.yaml
+++ b/gen3/lib/testData/test1.manifest.g3k/expectedSheepdogResult.yaml
@@ -43,6 +43,13 @@ spec:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
+ preference:
+ matchExpressions:
+ - key: karpenter.sh/capacity-type
+ operator: In
+ values:
+ - spot
+ - weight: 99
preference:
matchExpressions:
- key: eks.amazonaws.com/capacityType
diff --git a/gen3/test/ec2Test.sh b/gen3/test/ec2Test.sh
index 21310a24c..4981c925c 100644
--- a/gen3/test/ec2Test.sh
+++ b/gen3/test/ec2Test.sh
@@ -1,6 +1,6 @@
-if ! EC2_TEST_IP="$(g3kubectl get nodes -o json | jq -r -e '.items[0].status.addresses[] | select(.type == "InternalIP") | .address')" || [[ -z "$EC2_TEST_IP" ]]; then
+if ! EC2_TEST_IP="$(g3kubectl get nodes -o json | jq -r -e '.items[3].status.addresses[] | select(.type == "InternalIP") | .address')" || [[ -z "$EC2_TEST_IP" ]]; then
gen3_log_err "ec2Test failed to acquire IP address of a k8s node to test against"
fi
diff --git a/gen3/test/ecrTest.sh b/gen3/test/ecrTest.sh
index 91edf798b..57847abe5 100644
--- a/gen3/test/ecrTest.sh
+++ b/gen3/test/ecrTest.sh
@@ -10,8 +10,8 @@ test_ecr_login() {
test_ecr_setup() {
if [[ -n "$JENKINS_HOME" ]]; then
- # give ourselves read/write permissions on /var/run/docker.sock
- sudo chmod a+rw /var/run/docker.sock; because $? "ecr_setup modified docker.sock"
+ # give ourselves permissions on /run/containerd/containerd.sock
+ sudo chown root:sudo /run/containerd/containerd.sock; because $? "ecr_setup modified containerd.sock"
fi
}
diff --git a/gen3/test/jobTest.sh b/gen3/test/jobTest.sh
index 84a4d046b..bb37b4f72 100644
--- a/gen3/test/jobTest.sh
+++ b/gen3/test/jobTest.sh
@@ -6,7 +6,7 @@
excludeJob() {
local jobKey="$1"
local excludeList=(
- /aws-bucket- /bucket- /covid19- /data-ingestion- /google- /nb-etl- /remove-objects-from- /replicate- /s3sync- /fence-cleanup
+ /aws-bucket- /bucket- /covid19- /data-ingestion- /google- /nb-etl- /remove-objects-from- /replicate- /s3sync- /fence-cleanup /etl- /indexd- /metadata-
)
for exclude in "${excludeList[@]}"; do
if [[ "$it" =~ $exclude ]]; then return 0; fi
diff --git a/gen3/test/jupyterTest.sh b/gen3/test/jupyterTest.sh
index f0e327d71..db6a62618 100644
--- a/gen3/test/jupyterTest.sh
+++ b/gen3/test/jupyterTest.sh
@@ -30,7 +30,7 @@ test_jupyter_metrics() {
}
shunit_runtest "test_jupyter_idle" "jupyter"
-shunit_runtest "test_jupyter_metrics" "jupyter"
+# shunit_runtest "test_jupyter_metrics" "jupyter"
shunit_runtest "test_jupyter_prepuller" "local,jupyter"
shunit_runtest "test_jupyter_namespace" "local,jupyter"
shunit_runtest "test_jupyter_setup" "jupyter"
diff --git a/gen3/test/terraformTest.sh b/gen3/test/terraformTest.sh
deleted file mode 100644
index 17bcc03c2..000000000
--- a/gen3/test/terraformTest.sh
+++ /dev/null
@@ -1,461 +0,0 @@
-GEN3_TEST_PROFILE="${GEN3_TEST_PROFILE:-cdistest}"
-GEN3_TEST_WORKSPACE="gen3test"
-GEN3_TEST_ACCOUNT=707767160287
-
-#
-# TODO - generalize these tests to setup their own test VPC,
-# rather than relying on qaplanetv1 or devplanetv1 being there
-#
-
-#
-# Little macos/linux stat wrapper
-#
-file_mode() {
- if [[ $(uname -s) == 'Linux' ]]; then
- stat -c %a "$1"
- else
- stat -f %p "$1"
- fi
-}
-
-test_workspace() {
- gen3 workon $GEN3_TEST_PROFILE $GEN3_TEST_WORKSPACE; because $? "Calling gen3 workon multiple times should be harmless"
- [[ $GEN3_PROFILE = $GEN3_TEST_PROFILE ]]; because $? "gen3 workon sets the GEN3_PROFILE env variable: $GEN3_PROFILE"
- [[ $GEN3_WORKSPACE = $GEN3_TEST_WORKSPACE ]]; because $? "gen3 workon sets the GEN3_WORKSPACE env variable: $GEN3_WORKSPACE"
- [[ $GEN3_FLAVOR = "AWS" || \
- ($GEN3_FLAVOR == "GCP" && $GEN3_PROFILE =~ ^gcp-) || \
- ($GEN3_FLAVOR == "ONPREM" && $GEN3_PROFILE =~ ^onprem-) ]]; because $? "GEN3_FLAVOR is gcp for gcp-* profiles, else AWS"
- [[ $GEN3_FLAVOR != "AWS" || $GEN3_S3_BUCKET = "cdis-state-ac${GEN3_TEST_ACCOUNT}-gen3" || $GEN3_S3_BUCKET = "cdis-terraform-state.account-${GEN3_TEST_ACCOUNT}.gen3" ]]; because $? "gen3 workon sets the GEN3_S3_BUCKET env variable: $GEN3_S3_BUCKET"
- [[ (! -z $GEN3_WORKDIR) && -d $GEN3_WORKDIR ]]; because $? "gen3 workon sets the GEN3_WORKDIR env variable, and initializes the folder: $GEN3_WORKDIR"
- [[ $(file_mode $GEN3_WORKDIR) =~ 700$ ]]; because $? "gen3 workon sets the GEN3_WORKDIR to mode 0700, because secrets are in there"
- gen3 cd && [[ $(pwd) = "$GEN3_WORKDIR" ]]; because $? "gen3 cd should take us to the workspace by default: $(pwd) =? $GEN3_WORKDIR"
- for fileName in README.md config.tfvars backend.tfvars; do
- [[ -f $fileName ]]; because $? "gen3 workon ensures we have a $fileName - local copy || s3 copy || generated from template"
- done
- [[ ! -z "$MD5" ]]; because $? "commons.sh sets MD5 to $MD5"
-
- if [[ $GEN3_TEST_WORKSPACE =~ __custom$ ]]; then
- [[ "$GEN3_TFSCRIPT_FOLDER" == "$GEN3_WORKDIR" ]]; because $? "a __custom workspace loads from the workspace folder"
- elif [[ "$GEN3_TEST_PROFILE" =~ ^gcp- ]]; then
- [[ "$GEN3_TFSCRIPT_FOLDER" == "$GEN3_HOME/tf_files/gcp/commons" ]]; because $? "a gcp- profile currently only support a commons workspace"
- elif [[ "$GEN3_TEST_PROFILE" =~ ^onprem- ]]; then
- for fileName in README.md creds.json 00configmap.yaml kube-setup.sh; do
- filePath="onprem_scripts/$fileName"
- [[ -f $filePath ]]; because $? "gen3 workon ensures we have a $filePath generated from template"
- done
- else # aws profile
- [[ "$GEN3_TFSCRIPT_FOLDER" =~ ^"$GEN3_HOME/tf_files/aws/" ]]; because $? "an aws workspace references the aws/ folder: $GEN3_TFSCRIPT_FOLDER"
- fi
-}
-
-workspace_cleanup() {
- # try to avoid accidentally erasing the user's data ...
- cd /tmp && [[ -n "$GEN3_WORKDIR" && "$GEN3_WORKDIR" =~ /gen3/ && -f "$GEN3_WORKDIR/config.tfvars" ]] && /bin/rm -rf "$GEN3_WORKDIR";
- because $? "was able to cleanup $GEN3_WORKDIR"
-}
-
-test_uservpc_workspace() {
- GEN3_TEST_WORKSPACE="${GEN3_TEST_WORKSPACE}_user"
- test_workspace
- [[ "$GEN3_TFSCRIPT_FOLDER" == "$GEN3_HOME/tf_files/aws/user_vpc" ]]; because $? "a _user workspace should use the ./aws/user_vpc resources: $GEN3_TFSCRIPT_FOLDER"
- workspace_cleanup
-}
-
-test_usergeneric_workspace() {
- GEN3_TEST_WORKSPACE="${GEN3_TEST_WORKSPACE}_usergeneric"
- test_workspace
- [[ "$GEN3_TFSCRIPT_FOLDER" == "$GEN3_HOME/tf_files/aws/user_generic" ]]; because $? "a _usergeneric workspace should use the ./aws/user_generic resources: $GEN3_TFSCRIPT_FOLDER"
- cat << EOF > config.tfvars
-username="frickjack"
-EOF
- gen3 tfplan; because $? "_usergeneric tfplan should work";
- workspace_cleanup
-}
-
-test_snapshot_workspace() {
- GEN3_TEST_WORKSPACE="${GEN3_TEST_WORKSPACE}_snapshot"
- test_workspace
- [[ "$GEN3_TFSCRIPT_FOLDER" == "$GEN3_HOME/tf_files/aws/rds_snapshot" ]]; because $? "a _snapshot workspace should use the ./aws/rds_snapshot resources: $GEN3_TFSCRIPT_FOLDER"
- workspace_cleanup
-}
-
-test_databucket_workspace() {
- GEN3_TEST_WORKSPACE="${GEN3_TEST_WORKSPACE}_databucket"
- test_workspace
- [[ "$GEN3_TFSCRIPT_FOLDER" == "$GEN3_HOME/tf_files/aws/data_bucket" ]]; because $? "a _databucket workspace should use the ./aws/data_bucket resources: $GEN3_TFSCRIPT_FOLDER"
- cat - > config.tfvars < config.tfvars < config.tfvars < @ in password
-db_password_fence="whatever"
-
-db_password_gdcapi="whatever"
-db_password_sheepdog="whatever"
-db_password_peregrine="whatever"
-
-db_password_indexd="g6pmYkcoR7qECjGoErzVb5gkX3kum0yo"
-
-# password for write access to indexd
-gdcapi_indexd_password="oYva39mIPV5uXskv7jWnKuVZBUFBQcxd"
-
-fence_snapshot=""
-gdcapi_snapshot=""
-indexd_snapshot=""
-# mailgun for sending alert e-mails
-mailgun_api_key=""
-mailgun_api_url=""
-mailgun_smtp_host=""
-
-kube_ssh_key=""
-EOM
- [[ "$(pwd)" =~ "/$GEN3_WORKSPACE"$ ]]; because $? "commons workspace should have base $GEN3_WORKSPACE - $(pwd)"
- gen3 tfplan; because $? "tfplan should run even with some invalid config variables"
- [[ -f "$GEN3_WORKDIR/plan.terraform" ]]; because $? "'gen3 tfplan' generates a plan.terraform file used by 'gen3 tfapply'"
- workspace_cleanup
-}
-
-test_custom_workspace() {
- GEN3_TEST_WORKSPACE="${GEN3_TEST_WORKSPACE}__custom"
- test_workspace
-
- local sourceFolder="../../../../../cloud-automation/tf_files/aws/modules/s3-bucket"
- if [[ ! -d "$sourceFolder" ]]; then
- # Jenkins has a different relative path setup
- sourceFolder="../../../../cloud-automation/tf_files/aws/modules/s3-bucket"
- fi
- cat - > bucket.tf < config.tfvars < config.tfvars < config.tfvars < config.tfvars < config.tfvars < config.tfvars < config.tfvars < config.tfvars <> /home/ec2-user/.ssh/authorized_keys
- aws ec2 create-tags --resources $instanceId --tags 'Key="instanceId",Value='$instanceId''
- curl https://raw.githubusercontent.com/uc-cdis/cloud-automation/master/files/authorized_keys/ops_team >> /home/ec2-user/.ssh/authorized_keys
+
+ echo "$(jq '.registryPullQPS=0' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json
sysctl -w fs.inotify.max_user_watches=12000
@@ -96,7 +116,7 @@ data:
blockDeviceMappings:
- deviceName: /dev/xvda
ebs:
- volumeSize: 50Gi
+ volumeSize: 100Gi
volumeType: gp2
encrypted: true
deleteOnTermination: true
diff --git a/kube/services/argo-events/workflows/eventsource-created.yaml b/kube/services/argo-events/workflows/eventsource-created.yaml
index 9abf78e19..11d7084ca 100644
--- a/kube/services/argo-events/workflows/eventsource-created.yaml
+++ b/kube/services/argo-events/workflows/eventsource-created.yaml
@@ -15,4 +15,4 @@ spec:
eventTypes:
- ADD
filter:
- afterStart: false
+ afterStart: true
diff --git a/kube/services/argo-events/workflows/sensor-completed.yaml b/kube/services/argo-events/workflows/sensor-completed.yaml
index e92ad6918..293c0e119 100644
--- a/kube/services/argo-events/workflows/sensor-completed.yaml
+++ b/kube/services/argo-events/workflows/sensor-completed.yaml
@@ -43,18 +43,22 @@ spec:
parallelism: 1
template:
spec:
- restartPolicy: Never
+ restartPolicy: OnFailure
containers:
- name: karpenter-resource-creator
image: quay.io/cdis/awshelper
command: ["/bin/sh"]
- args:
+ args:
- "-c"
- |
- kubectl delete awsnodetemplate workflow-$WORKFLOW_NAME
- kubectl delete provisioners workflow-$WORKFLOW_NAME
- env:
- - name: WORKFLOW_NAME
- value: ""
- backoffLimit: 0
+ if kubectl get awsnodetemplate workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
+ kubectl delete awsnodetemplate workflow-$WORKFLOW_NAME
+ fi
+ if kubectl get provisioner workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
+ kubectl delete provisioners workflow-$WORKFLOW_NAME
+ fi
+ env:
+ - name: WORKFLOW_NAME
+ value: ""
+ backoffLimit: 20
diff --git a/kube/services/argo-events/workflows/sensor-created.yaml b/kube/services/argo-events/workflows/sensor-created.yaml
index 27cbc5643..4221f5742 100644
--- a/kube/services/argo-events/workflows/sensor-created.yaml
+++ b/kube/services/argo-events/workflows/sensor-created.yaml
@@ -51,20 +51,31 @@ spec:
parallelism: 1
template:
spec:
- restartPolicy: Never
+ restartPolicy: OnFailure
containers:
- name: karpenter-resource-creator
image: quay.io/cdis/awshelper
command: ["/bin/sh"]
- args:
- - "-c"
- - |
- for file in /home/manifests/*.yaml; do envsubst < $file | kubectl apply -f -; done
+ args:
+ - "-c"
+ - |
+ if ! kubectl get awsnodetemplate workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
+ sed -e "s/WORKFLOW_NAME/$workflow_name/" -e "s/GEN3_USERNAME/$workflow_user/" -e "s/ENVIRONMENT/$ENVIRONMENT/" | kubectl apply -f -
+ fi
+
+ if ! kubectl get provisioner workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
+ sed -e "s/WORKFLOW_NAME/$workflow_name/" -e "s/GEN3_USERNAME/$workflow_user/" -e "s/ENVIRONMENT/$ENVIRONMENT/" | kubectl apply -f -
+ fi
env:
- - name: WORKFLOW_NAME
- value: ""
- - name: GEN3_USERNAME
- value: ""
+ - name: WORKFLOW_NAME
+ value: ""
+ - name: GEN3_USERNAME
+ value: ""
+ - name: ENVIRONMENT
+ valueFrom:
+ configMapKeyRef:
+ name: environment
+ key: environment
volumeMounts:
- name: karpenter-templates-volume
mountPath: /home/manifests
@@ -72,5 +83,4 @@ spec:
- name: karpenter-templates-volume
configMap:
name: karpenter-templates
- backoffLimit: 0
-
+ backoffLimit: 20
diff --git a/kube/services/argo-events/workflows/sensor-deleted.yaml b/kube/services/argo-events/workflows/sensor-deleted.yaml
index 61e2235d7..c235a820a 100644
--- a/kube/services/argo-events/workflows/sensor-deleted.yaml
+++ b/kube/services/argo-events/workflows/sensor-deleted.yaml
@@ -39,18 +39,22 @@ spec:
parallelism: 1
template:
spec:
- restartPolicy: Never
+ restartPolicy: OnFailure
containers:
- name: karpenter-resource-creator
image: quay.io/cdis/awshelper
command: ["/bin/sh"]
- args:
+ args:
- "-c"
- |
- kubectl delete awsnodetemplate workflow-$WORKFLOW_NAME
- kubectl delete provisioners workflow-$WORKFLOW_NAME
- env:
- - name: WORKFLOW_NAME
- value: ""
- backoffLimit: 0
+ if kubectl get awsnodetemplate workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
+ kubectl delete awsnodetemplate workflow-$WORKFLOW_NAME
+ fi
+ if kubectl get provisioner workflow-$WORKFLOW_NAME >/dev/null 2>&1; then
+ kubectl delete provisioners workflow-$WORKFLOW_NAME
+ fi
+ env:
+ - name: WORKFLOW_NAME
+ value: ""
+ backoffLimit: 20
diff --git a/kube/services/argo-wrapper/argo-wrapper-deploy.yaml b/kube/services/argo-wrapper/argo-wrapper-deploy.yaml
index 65f68d98a..89ec29ecc 100644
--- a/kube/services/argo-wrapper/argo-wrapper-deploy.yaml
+++ b/kube/services/argo-wrapper/argo-wrapper-deploy.yaml
@@ -58,7 +58,10 @@ spec:
configMap:
name: manifest-argo
optional: true
-
+ - name: argo-wrapper-namespace-config
+ configMap:
+ name: argo-wrapper-namespace-config
+
containers:
- name: argo-wrapper
GEN3_ARGO-WRAPPER_IMAGE
@@ -70,3 +73,7 @@ spec:
readOnly: true
mountPath: /argo.json
subPath: argo.json
+ - name: argo-wrapper-namespace-config
+ readOnly: true
+ mountPath: /argowrapper/config.ini
+ subPath: config.ini
diff --git a/kube/services/argo-wrapper/config.ini b/kube/services/argo-wrapper/config.ini
new file mode 100644
index 000000000..0693ee2e2
--- /dev/null
+++ b/kube/services/argo-wrapper/config.ini
@@ -0,0 +1,6 @@
+[DEFAULT]
+ARGO_ACCESS_METHOD = access
+ARGO_HOST = $ARGO_HOST
+ARGO_NAMESPACE = $ARGO_NAMESPACE
+COHORT_DEFINITION_BY_SOURCE_AND_TEAM_PROJECT_URL = http://cohort-middleware-service/cohortdefinition-stats/by-source-id/{}/by-team-project?team-project={}
+COHORT_MIDDLEWARE_URL = http://cohort-middleware-service
diff --git a/kube/services/argo/values.yaml b/kube/services/argo/values.yaml
index e8db62711..23dda4a5a 100644
--- a/kube/services/argo/values.yaml
+++ b/kube/services/argo/values.yaml
@@ -1,10 +1,17 @@
controller:
- parallelism: 3
+ parallelism: 10
+ namespaceParallelism: 5
metricsConfig:
# -- Enables prometheus metrics server
enabled: true
servicePort: 9090
+ resources:
+ requests:
+ memory: 8Gi
+ limits:
+ memory: 8Gi
+
podAnnotations:
prometheus.io/scrape: "true"
prometheus.io/path: /metrics
@@ -22,11 +29,11 @@ controller:
}
]
}
- }
+ }
resourceRateLimit:
limit: 40
- burst: 4
+ burst: 4
# -- enable persistence using postgres
persistence:
@@ -43,7 +50,7 @@ controller:
port: 5432
database: GEN3_ARGO_DB_NAME
tableName: argo_workflows
- # # the database secrets must be in the same namespace of the controller
+ # # the database secrets must be in the same namespace of the controller
userNameSecret:
name: argo-db-creds
key: db_username
@@ -52,7 +59,7 @@ controller:
key: db_password
nodeStatusOffLoad: true
- workflowDefaults:
+ workflowDefaults:
spec:
archiveLogs: true
@@ -71,11 +78,16 @@ server:
baseHref: "/argo/"
# -- Extra arguments to provide to the Argo server binary, such as for disabling authentication.
extraArgs:
- - --auth-mode=server
- - --auth-mode=client
+ - --auth-mode=server
+ - --auth-mode=client
extraEnv:
- - name: ARGO_HTTP1
- value: "true"
+ - name: ARGO_HTTP1
+ value: "true"
+ resources:
+ requests:
+ memory: 8Gi
+ limits:
+ memory: 8Gi
# -- Influences the creation of the ConfigMap for the workflow-controller itself.
useDefaultArtifactRepo: true
diff --git a/kube/services/argo/workflows/fence-usersync-cron.yaml b/kube/services/argo/workflows/fence-usersync-cron.yaml
new file mode 100644
index 000000000..4723ce10f
--- /dev/null
+++ b/kube/services/argo/workflows/fence-usersync-cron.yaml
@@ -0,0 +1,10 @@
+apiVersion: argoproj.io/v1alpha1
+kind: CronWorkflow
+metadata:
+ name: fence-usersync-cron
+spec:
+ serviceAccountName: argo
+ schedule: "*/30 * * * *"
+ workflowSpec:
+ workflowTemplateRef:
+ name: fence-usersync-workflow
diff --git a/kube/services/argo/workflows/fence-usersync-wf.yaml b/kube/services/argo/workflows/fence-usersync-wf.yaml
new file mode 100644
index 000000000..d7f56a2ce
--- /dev/null
+++ b/kube/services/argo/workflows/fence-usersync-wf.yaml
@@ -0,0 +1,257 @@
+apiVersion: argoproj.io/v1alpha1
+kind: WorkflowTemplate
+metadata:
+ name: fence-usersync-workflow
+spec:
+ volumeClaimTemplates:
+ - metadata:
+ name: shared-data
+ spec:
+ accessModes: [ "ReadWriteOnce" ]
+ resources:
+ requests:
+ storage: 1Gi
+ serviceAccountName: argo
+ entrypoint: fence-usersync
+ arguments:
+ parameters:
+ - name: ADD_DBGAP
+ value: "false"
+ - name: ONLY_DBGAP
+ value: "false"
+ templates:
+ - name: fence-usersync
+ steps:
+ - - name: wait-for-fence
+ template: wait-for-fence
+ - - name: awshelper
+ template: awshelper
+ - - name: usersyncer
+ template: usersyncer
+
+ - name: wait-for-fence
+ container:
+ image: curlimages/curl:latest
+ command: ["/bin/sh","-c"]
+ args: ["while [ $(curl -sw '%{http_code}' http://fence-service -o /dev/null) -ne 200 ]; do sleep 5; echo 'Waiting for fence...'; done"]
+
+ - name: awshelper
+ container:
+ image: quay.io/cdis/awshelper:master
+ imagePullPolicy: Always
+ securityContext:
+ runAsUser: 0
+ env:
+ - name: gen3Env
+ valueFrom:
+ configMapKeyRef:
+ name: global
+ key: hostname
+ - name: userYamlS3Path
+ valueFrom:
+ configMapKeyRef:
+ name: manifest-global
+ key: useryaml_s3path
+ - name: slackWebHook
+ value: None
+ volumeMounts:
+ - name: shared-data
+ mountPath: /mnt/shared
+ command: ["/bin/bash"]
+ args:
+ - "-c"
+ - |
+ GEN3_HOME=/home/ubuntu/cloud-automation
+ source "${GEN3_HOME}/gen3/lib/utils.sh"
+ gen3_load "gen3/gen3setup"
+
+ if [ "${userYamlS3Path}" = 'none' ]; then
+ # echo "using local user.yaml"
+ # cp /var/www/fence/user.yaml /mnt/shared/user.yaml
+ echo "s3 yaml not provided - bailing out"
+ exit 1
+ else
+ # -----------------
+ echo "awshelper downloading ${userYamlS3Path} to /mnt/shared/user.yaml"
+ n=0
+ until [ $n -ge 5 ]; do
+ echo "Download attempt $n"
+ aws s3 cp "${userYamlS3Path}" /mnt/shared/user.yaml && break
+ n=$[$n+1]
+ sleep 2
+ done
+ fi
+ if [[ ! -f /mnt/shared/user.yaml ]]; then
+ echo "awshelper failed to retrieve /mnt/shared/user.yaml"
+ exit 1
+ fi
+ #-----------
+ echo "awshelper updating etl configmap"
+ if ! gen3 gitops etl-convert < /mnt/shared/user.yaml > /tmp/user.yaml; then
+ echo "ERROR: failed to generate ETL config"
+ exit 1
+ fi
+ # kubectl delete configmap fence > /dev/null 2>&1
+ # kubectl create configmap fence --from-file=/tmp/user.yaml
+ if [ "${slackWebHook}" != 'None' ]; then
+ curl -X POST --data-urlencode "payload={\"text\": \"AWSHelper: Syncing users on ${gen3Env}\"}" "${slackWebHook}"
+ fi
+ echo "Helper exit ok"
+
+ - name: usersyncer
+ volumes:
+ - name: yaml-merge
+ configMap:
+ name: "fence-yaml-merge"
+ - name: config-volume
+ secret:
+ secretName: "fence-config"
+ - name: creds-volume
+ secret:
+ secretName: "fence-creds"
+ - name: fence-google-app-creds-secret-volume
+ secret:
+ secretName: "fence-google-app-creds-secret"
+ - name: fence-google-storage-creds-secret-volume
+ secret:
+ secretName: "fence-google-storage-creds-secret"
+ - name: fence-ssh-keys
+ secret:
+ secretName: "fence-ssh-keys"
+ defaultMode: 0400
+ - name: fence-sshconfig
+ configMap:
+ name: "fence-sshconfig"
+ - name: projects
+ configMap:
+ name: "projects"
+ container:
+ image: quay.io/cdis/fence:master
+ imagePullPolicy: Always
+ env:
+ - name: PYTHONPATH
+ value: /var/www/fence
+ - name: SYNC_FROM_DBGAP
+ valueFrom:
+ configMapKeyRef:
+ name: manifest-global
+ key: sync_from_dbgap
+ - name: ADD_DBGAP
+ value: "{{workflow.parameters.ADD_DBGAP}}"
+ - name: ONLY_DBGAP
+ value: "{{workflow.parameters.ONLY_DBGAP}}"
+ - name: SLACK_SEND_DBGAP
+ valueFrom:
+ configMapKeyRef:
+ name: manifest-global
+ key: slack_send_dbgap
+ optional: true
+ - name: slackWebHook
+ valueFrom:
+ configMapKeyRef:
+ name: global
+ key: slack_webhook
+ optional: true
+ - name: gen3Env
+ valueFrom:
+ configMapKeyRef:
+ name: global
+ key: hostname
+ - name: FENCE_PUBLIC_CONFIG
+ valueFrom:
+ configMapKeyRef:
+ name: manifest-fence
+ key: fence-config-public.yaml
+ optional: true
+ volumeMounts:
+ - name: shared-data
+ mountPath: /mnt/shared
+ - name: "config-volume"
+ readOnly: true
+ mountPath: "/var/www/fence/fence-config.yaml"
+ subPath: fence-config.yaml
+ - name: "creds-volume"
+ readOnly: true
+ mountPath: "/var/www/fence/creds.json"
+ - name: "yaml-merge"
+ readOnly: true
+ mountPath: "/var/www/fence/yaml_merge.py"
+ - name: "fence-google-app-creds-secret-volume"
+ readOnly: true
+ mountPath: "/var/www/fence/fence_google_app_creds_secret.json"
+ subPath: fence_google_app_creds_secret.json
+ - name: "fence-google-storage-creds-secret-volume"
+ readOnly: true
+ mountPath: "/var/www/fence/fence_google_storage_creds_secret.json"
+ subPath: fence_google_storage_creds_secret.json
+ - name: "fence-ssh-keys"
+ mountPath: "/root/.ssh/id_rsa"
+ subPath: "id_rsa"
+ - name: "fence-ssh-keys"
+ mountPath: "/root/.ssh/id_rsa.pub"
+ subPath: "id_rsa.pub"
+ - name: "fence-sshconfig"
+ mountPath: "/root/.ssh/config"
+ subPath: "config"
+ - name: "projects"
+ mountPath: "/var/www/fence/projects.yaml"
+ subPath: "projects.yaml"
+ command: ["/bin/bash"]
+ args:
+ - "-c"
+ # Script always succeeds if it runs (echo exits with 0)
+ - |
+ echo "${ADD_DBGAP}"
+ echo "${ONLY_DBGAP}"
+ echo "${FENCE_PUBLIC_CONFIG:-""}" > "/var/www/fence/fence-config-public.yaml"
+ python /var/www/fence/yaml_merge.py /var/www/fence/fence-config-public.yaml /var/www/fence/fence-config-secret.yaml > /var/www/fence/fence-config.yaml
+ echo 'options use-vc' >> /etc/resolv.conf
+ let count=0
+ while [[ ! -f /mnt/shared/user.yaml && $count -lt 50 ]]; do
+ echo "fence container waiting for /mnt/shared/user.yaml";
+ sleep 2
+ let count=$count+1
+ done
+ if [[ "$SYNC_FROM_DBGAP" != True && "$ADD_DBGAP" != "true" ]]; then
+ if [[ -f /mnt/shared/user.yaml ]]; then
+ echo "running fence-create"
+ time fence-create sync --arborist http://arborist-service --yaml /mnt/shared/user.yaml
+ else
+ echo "/mnt/shared/user.yaml did not appear within timeout :-("
+ false # non-zero exit code
+ fi
+ exitcode=$?
+ else
+ output=$(mktemp "/tmp/fence-create-output_XXXXXX")
+ if [[ -f /mnt/shared/user.yaml && "$ONLY_DBGAP" != "true" ]]; then
+ echo "Running fence-create dbgap-sync with user.yaml - see $output"
+ time fence-create sync --arborist http://arborist-service --sync_from_dbgap "True" --projects /var/www/fence/projects.yaml --yaml /mnt/shared/user.yaml 2>&1 | tee "$output"
+ else
+ echo "Running fence-create dbgap-sync without user.yaml - see $output"
+ time fence-create sync --arborist http://arborist-service --sync_from_dbgap "True" --projects /var/www/fence/projects.yaml 2>&1 | tee "$output"
+ fi
+ exitcode="${PIPESTATUS[0]}"
+ echo "$output"
+ # Echo what files we are seeing on dbgap ftp to Slack
+ # We only do this step every 12 hours and not on weekends to reduce noise
+ if [[ -n "$SLACK_SEND_DBGAP" && "$SLACK_SEND_DBGAP" = True ]]; then
+ files=$(grep "Reading file" "$output")
+ let hour=$(date -u +10#%H)
+ let dow=$(date -u +10#%u)
+ if ! (( hour % 12 )) && (( dow < 6 )); then
+ if [ "${slackWebHook}" != 'None' ]; then
+ curl -X POST --data-urlencode "payload={\"text\": \"FenceHelper: \n\`\`\`\n${files}\n\`\`\`\"}" "${slackWebHook}"
+ fi
+ fi
+ fi
+ fi
+ if [[ $exitcode -ne 0 && "${slackWebHook}" != 'None' ]]; then
+ emptyfile=$(grep "EnvironmentError:" "$output")
+ if [ ! -z "$emptyfile" ]; then
+ curl -X POST --data-urlencode "payload={\"text\": \"JOBSKIPPED: User sync skipped on ${gen3Env} ${emptyfile}\"}" "${slackWebHook}";
+ else
+ curl -X POST --data-urlencode "payload={\"text\": \"JOBFAIL: User sync failed on ${gen3Env}\"}" "${slackWebHook}"
+ fi
+ fi
+ echo "Exit code: $exitcode"
+ exit "$exitcode"
\ No newline at end of file
diff --git a/kube/services/datadog/datadog-application.yaml b/kube/services/datadog/datadog-application.yaml
index f5a8925e1..19e0e1d86 100644
--- a/kube/services/datadog/datadog-application.yaml
+++ b/kube/services/datadog/datadog-application.yaml
@@ -5,14 +5,17 @@ metadata:
namespace: argocd
spec:
project: default
- source:
- chart: datadog
+ sources:
+ - chart: datadog
repoURL: 'https://helm.datadoghq.com'
targetRevision: 3.6.4
helm:
- valueFiles:
- - https://raw.githubusercontent.com/uc-cdis/cloud-automation/master/kube/services/datadog/values.yaml
+ valueFiles:
+ - $values/kube/services/datadog/values.yaml
releaseName: datadog
+ - repoURL: 'https://github.com/uc-cdis/cloud-automation.git'
+ targetRevision: master
+ ref: values
destination:
server: 'https://kubernetes.default.svc'
namespace: datadog
@@ -21,4 +24,4 @@ spec:
prune: true
selfHeal: true
syncOptions:
- - CreateNamespace=true
\ No newline at end of file
+ - CreateNamespace=true
diff --git a/kube/services/datadog/values.yaml b/kube/services/datadog/values.yaml
index c613bd079..fc0bbab8b 100644
--- a/kube/services/datadog/values.yaml
+++ b/kube/services/datadog/values.yaml
@@ -20,7 +20,18 @@ datadog:
# datadog.apiKeyExistingSecret -- Use existing Secret which stores API key instead of creating a new one. The value should be set with the `api-key` key inside the secret.
## If set, this parameter takes precedence over "apiKey".
- apiKeyExistingSecret: "datadog-agent"
+ apiKeyExistingSecret: "ddgov-apikey"
+
+ # datadog.site -- The site of the Datadog intake to send Agent data to.
+ # (documentation: https://docs.datadoghq.com/getting_started/site/)
+
+ ## Set to 'datadoghq.com' to send data to the US1 site (default).
+ ## Set to 'datadoghq.eu' to send data to the EU site.
+ ## Set to 'us3.datadoghq.com' to send data to the US3 site.
+ ## Set to 'us5.datadoghq.com' to send data to the US5 site.
+ ## Set to 'ddog-gov.com' to send data to the US1-FED site.
+ ## Set to 'ap1.datadoghq.com' to send data to the AP1 site.
+ site: ddog-gov.com
# datadog.kubeStateMetricsEnabled -- If true, deploys the kube-state-metrics deployment
## ref: https://github.com/kubernetes/kube-state-metrics/tree/kube-state-metrics-helm-chart-2.13.2/charts/kube-state-metrics
@@ -59,11 +70,13 @@ datadog:
apm:
# datadog.apm.socketEnabled -- Enable APM over Socket (Unix Socket or windows named pipe)
## ref: https://docs.datadoghq.com/agent/kubernetes/apm/
- socketEnabled: true
+ socketEnabled: false
# datadog.apm.portEnabled -- Enable APM over TCP communication (port 8126 by default)
## ref: https://docs.datadoghq.com/agent/kubernetes/apm/
- portEnabled: true
+ portEnabled: false
+
+ enabled: false
# datadog.apm.port -- Override the trace Agent port
## Note: Make sure your client is sending to the same UDP port.
@@ -80,15 +93,15 @@ datadog:
# datadog.processAgent.processCollection -- Set this to true to enable process collection in process monitoring agent
## Requires processAgent.enabled to be set to true to have any effect
- processCollection: true
+ processCollection: false
# datadog.processAgent.stripProcessArguments -- Set this to scrub all arguments from collected processes
## Requires processAgent.enabled and processAgent.processCollection to be set to true to have any effect
## ref: https://docs.datadoghq.com/infrastructure/process/?tab=linuxwindows#process-arguments-scrubbing
- stripProcessArguments: true
+ stripProcessArguments: false
# datadog.processAgent.processDiscovery -- Enables or disables autodiscovery of integrations
- processDiscovery: true
+ processDiscovery: false
## Enable systemProbe agent and provide custom configs
systemProbe:
@@ -222,7 +235,7 @@ datadog:
# timeout: 5
containerExcludeLogs: "kube_namespace:logging kube_namespace:argo name:pelican-export* name:job-task"
-
+ containerExclude: "kube_namespace:logging kube_namespace:kube-system kube_namespace:kubecost kube_namespace:argo kube_namespace:cortex-xdr kube_namespace:monitoring kube_namespace:datadog"
## This is the Datadog Cluster Agent implementation that handles cluster-wide
## metrics more cleanly, separates concerns for better rbac, and implements
## the external metrics API so you can autoscale HPAs based on datadog metrics
@@ -327,4 +340,3 @@ agents:
# agents.rbac.serviceAccountAnnotations -- Annotations to add to the ServiceAccount if agents.rbac.create is true
serviceAccountAnnotations: {}
-
diff --git a/kube/services/fenceshib/fenceshib-configmap.yaml b/kube/services/fenceshib/fenceshib-configmap.yaml
index 2412518c0..b8e55243d 100644
--- a/kube/services/fenceshib/fenceshib-configmap.yaml
+++ b/kube/services/fenceshib/fenceshib-configmap.yaml
@@ -231,48 +231,48 @@ data:
few exceptions for newer attributes where the name is the same for both versions. You will
usually want to uncomment or map the names for both SAML versions as a unit.
-->
-
+
-
+
-
+
-
+
-
+
-
+
-
+
-
+
@@ -286,7 +286,7 @@ data:
-
+
@@ -416,47 +416,51 @@ data:
- MIIGeDCCBWCgAwIBAgITKwAE3xjJ0BmsXYl8hwAAAATfGDANBgkqhkiG9w0BAQsF
- ADBOMRUwEwYKCZImiZPyLGQBGRYFTE9DQUwxHDAaBgoJkiaJk/IsZAEZFgxESEhT
- U0VDVVJJVFkxFzAVBgNVBAMTDk5JSC1EUEtJLUNBLTFBMB4XDTIxMDMyMzEwMjMz
- MloXDTIzMDMyMzEwMjMzMlowcDELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAk1EMREw
- DwYDVQQHEwhCZXRoZXNkYTEMMAoGA1UEChMDSEhTMQwwCgYDVQQLEwNOSUgxJTAj
- BgNVBAMTHHdhbXNpZ25pbmdmZWRlcmF0aW9uLm5paC5nb3YwggEiMA0GCSqGSIb3
- DQEBAQUAA4IBDwAwggEKAoIBAQDrng8ItLe/PdN7+GT50g0xd4Kc5zVLk5JhHV/M
- C0ICo3ulYpNnK8f0vGYvKXhG9B4gyYjjAVgY8dHL1Yi9Vw4OCMHiAhT80qidFhah
- xdcz8EaKWueqlMV+SZ8/6luahSmYYjKHAxICMg253gHsG6A64pWBsf58fzOYeEV/
- HIItkthIJ7Rh71gXeZwmcir3fAve1sQXrgXsRb265yFQaxLrRI+QA7k+Tiemlt4+
- 7wBOXdROm0kxGJT6u6+IG8g2Qdbc1JWaAmwROGCByREQzfMNUVpXCXJHhKSrHype
- z8Z0o4p2sLXyOysbBAmNoShMhvaaPlsrJt7PyDN5uj6KaXNNAgMBAAGjggMrMIID
- JzAdBgNVHQ4EFgQUb/4wTaSXJ6P1tAmI8mWJhMv1VHowHwYDVR0jBBgwFoAUeWw4
- jBnSyRkHcaYQ+YnwrdCDBZMwggESBgNVHR8EggEJMIIBBTCCAQGggf6ggfuGgcFs
- ZGFwOi8vL0NOPU5JSC1EUEtJLUNBLTFBLENOPU5JSERQS0lDQVNWQyxDTj1DRFAs
- Q049UHVibGljJTIwS2V5JTIwU2VydmljZXMsQ049U2VydmljZXMsQ049Q29uZmln
- dXJhdGlvbixEQz1ESEhTU0VDVVJJVFksREM9TE9DQUw/Y2VydGlmaWNhdGVSZXZv
- Y2F0aW9uTGlzdD9iYXNlP29iamVjdENsYXNzPWNSTERpc3RyaWJ1dGlvblBvaW50
- hjVodHRwOi8vTklIRFBLSUNSTC5OSUguR09WL0NlcnREYXRhL05JSC1EUEtJLUNB
- LTFBLmNybDCCATkGCCsGAQUFBwEBBIIBKzCCAScwgbQGCCsGAQUFBzAChoGnbGRh
- cDovLy9DTj1OSUgtRFBLSS1DQS0xQSxDTj1BSUEsQ049UHVibGljJTIwS2V5JTIw
- U2VydmljZXMsQ049U2VydmljZXMsQ049Q29uZmlndXJhdGlvbixEQz1ESEhTU0VD
- VVJJVFksREM9TE9DQUw/Y0FDZXJ0aWZpY2F0ZT9iYXNlP29iamVjdENsYXNzPWNl
- cnRpZmljYXRpb25BdXRob3JpdHkwQQYIKwYBBQUHMAKGNWh0dHA6Ly9OSUhEUEtJ
- Q1JMLk5JSC5HT1YvQ2VydERhdGEvTklILURQS0ktQ0EtMUEuY3J0MCsGCCsGAQUF
- BzABhh9odHRwOi8vTklIRFBLSU9DU1AuTklILkdPVi9vY3NwMAsGA1UdDwQEAwIF
- oDA9BgkrBgEEAYI3FQcEMDAuBiYrBgEEAYI3FQiHscIohpH8F4b5jwiG7rxzgbud
- JR2F39lChY/gIQIBZQIBJDAdBgNVHSUEFjAUBggrBgEFBQcDAgYIKwYBBQUHAwEw
- JwYJKwYBBAGCNxUKBBowGDAKBggrBgEFBQcDAjAKBggrBgEFBQcDATANBgkqhkiG
- 9w0BAQsFAAOCAQEAkgyJY5Pdyz7hF83hu9BsijKHOdMWe8fDyN7GsDR1O0URBuJW
- oK7FsemmITwMCiDhH+NDkrRWM27EQhuv4w4yIUIFVqPeJS+Ff3gKyqB/VNcrDbfc
- 1RU7Q0qyxwpItm/cEUTTTnfNppf/O6wn/FUbpvPbHMNukqhjtbiYJrmKcO1U0lEu
- i7FlnPW6rRmEbhp/bChVJMkxw8sBH4K3Vrx9c15nPuBgv4E1cFLe1rwrt3wEeRlU
- OaWMTbLwYBaBo2BC3iDHzWioSl4OtzItEkT5XxNOhViuoty09Tu5zd7byqiV7To3
- YVc+Yi/VBubgB+osvPXPAv0AQCLo88dO7MBWQg==
+ MIIGrDCCBZSgAwIBAgITKwAL5UokKuFiZ7VPlQAAAAvlSjANBgkqhkiG9w0B
+ AQsFADBOMRUwEwYKCZImiZPyLGQBGRYFTE9DQUwxHDAaBgoJkiaJk/IsZAEZ
+ FgxESEhTU0VDVVJJVFkxFzAVBgNVBAMTDk5JSC1EUEtJLUNBLTFBMB4XDTIy
+ MTIwNjE2NTUzNloXDTI0MTIwNTE2NTUzNlowgaMxCzAJBgNVBAYTAlVTMREw
+ DwYDVQQIEwhNYXJ5bGFuZDERMA8GA1UEBxMIQmV0aGVzZGExDDAKBgNVBAoT
+ A05JSDEMMAoGA1UECxMDQ0lUMSUwIwYDVQQDExx3YW1zaWduaW5nZmVkZXJh
+ dGlvbi5uaWguZ292MSswKQYJKoZIhvcNAQkBFhxuaWhsb2dpbnN1cHBvcnRA
+ bWFpbC5uaWguZ292MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA
+ o3aHcoq0SAof+GXCl6aZOw9w8CrWTSxz3hxEvG2RaJ4Bm0+UQEcQHArCiQ+Y
+ Wjmx8eORRwOblQKmcozpQAOxNRu7fbJn8msdryKdju+nBJg/gn0Ygn44EJEq
+ pZmBn+FBRgH/lADRdpLM8uO654i1x5Pr8TQtNMevGNot8oiacOZkB1A5N6+l
+ 4guxToA2ZuNhHRhwrpd1wIyq6sgY3J8XpWlx54HjDc8bZvia0bEhJns/qZpM
+ mAh5wvIP1I2JngqJ55mpl/btbIXX+uTn3tIomWre3KKjDKh9ZjUQom8VqTzp
+ oGYHSjTExuopsHnnVpC1HTW0QJoxFa5yR1f2fiUTZwIDAQABo4IDKzCCAycw
+ HQYDVR0OBBYEFMqGnTB0W0rFy8tD2y6JnApAzRCyMB8GA1UdIwQYMBaAFHls
+ OIwZ0skZB3GmEPmJ8K3QgwWTMIIBEgYDVR0fBIIBCTCCAQUwggEBoIH+oIH7
+ hoHBbGRhcDovLy9DTj1OSUgtRFBLSS1DQS0xQSxDTj1OSUhEUEtJQ0FTVkMs
+ Q049Q0RQLENOPVB1YmxpYyUyMEtleSUyMFNlcnZpY2VzLENOPVNlcnZpY2Vz
+ LENOPUNvbmZpZ3VyYXRpb24sREM9REhIU1NFQ1VSSVRZLERDPUxPQ0FMP2Nl
+ cnRpZmljYXRlUmV2b2NhdGlvbkxpc3Q/YmFzZT9vYmplY3RDbGFzcz1jUkxE
+ aXN0cmlidXRpb25Qb2ludIY1aHR0cDovL05JSERQS0lDUkwuTklILkdPVi9D
+ ZXJ0RGF0YS9OSUgtRFBLSS1DQS0xQS5jcmwwggE5BggrBgEFBQcBAQSCASsw
+ ggEnMIG0BggrBgEFBQcwAoaBp2xkYXA6Ly8vQ049TklILURQS0ktQ0EtMUEs
+ Q049QUlBLENOPVB1YmxpYyUyMEtleSUyMFNlcnZpY2VzLENOPVNlcnZpY2Vz
+ LENOPUNvbmZpZ3VyYXRpb24sREM9REhIU1NFQ1VSSVRZLERDPUxPQ0FMP2NB
+ Q2VydGlmaWNhdGU/YmFzZT9vYmplY3RDbGFzcz1jZXJ0aWZpY2F0aW9uQXV0
+ aG9yaXR5MEEGCCsGAQUFBzAChjVodHRwOi8vTklIRFBLSUNSTC5OSUguR09W
+ L0NlcnREYXRhL05JSC1EUEtJLUNBLTFBLmNydDArBggrBgEFBQcwAYYfaHR0
+ cDovL05JSERQS0lPQ1NQLk5JSC5HT1Yvb2NzcDALBgNVHQ8EBAMCBaAwPQYJ
+ KwYBBAGCNxUHBDAwLgYmKwYBBAGCNxUIh7HCKIaR/BeG+Y8Ihu68c4G7nSUd
+ gZOnCYKOiSECAWQCAUwwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMC
+ MCcGCSsGAQQBgjcVCgQaMBgwCgYIKwYBBQUHAwEwCgYIKwYBBQUHAwIwDQYJ
+ KoZIhvcNAQELBQADggEBAGxvrAxX3RUmFXeUa1UewCWfzWCnI3wTMKkqvmI2
+ CySFEOniXNXC/hhu0i000QD9mS527u+lGqgN6eaUaEaSDXMszYR753whJ1Wf
+ xJ50zji2mvUWDyzdRbcvxbVfYe6h6+TzQl0gd8z1DjAxkUWydv9aAFYHNiIY
+ BbhPqvrlOT+oV8CYI8ghEg7qyxo1mso99aVGCbnBA+6IC+jt8lvwQYFISW8J
+ lxJbz5P9fyAbQFuMvcvSkx1WWCCK+d3WsLzU2JETjmYNoID5skFaIfrq+rV1
+ nBqQfCSKApojRaUMwn83IRcosSu0Y3dhpmxz2oDkOURbwOkuPJRgYnZRLBDn
+ e50=
-
+
urn:oasis:names:tc:SAML:2.0:nameid-format:persistent
-
+
diff --git a/kube/services/fluentd/gen3-1.15.3.conf b/kube/services/fluentd/gen3-1.15.3.conf
new file mode 100644
index 000000000..d9b6bed5d
--- /dev/null
+++ b/kube/services/fluentd/gen3-1.15.3.conf
@@ -0,0 +1,231 @@
+#
+# Gen3 customization of fluent config.
+# - tries to extract structure from gen3 service logs
+# - includes the default conf at the bottom - just adds prefix rules
+#
+# Deploy by:
+# - mount this file into the container at /fluentd/etc/gen3.conf
+# - set environment variable FLUENTD_CONF=gen3.conf
+#
+# https://www.fluentd.org/guides/recipes/docker-logging
+# https://docs.fluentd.org/v0.12/articles/config-file#introduction:-the-life-of-a-fluentd-event
+# https://docs.fluentd.org/v1.0/articles/out_rewrite_tag_filter
+
+
+
+
+
+ @type tail
+ @id in_tail_container_logs
+ path /var/log/containers/*.log
+ pos_file /var/log/fluentd-containers.log.pos
+ tag "#{ENV['FLUENT_CONTAINER_TAIL_TAG'] || 'kubernetes.*'}"
+ exclude_path "#{ENV['FLUENT_CONTAINER_TAIL_EXCLUDE_PATH'] || use_default}"
+ read_from_head true
+
+ @type "#{ENV['FLUENT_CONTAINER_TAIL_PARSER_TYPE'] || 'json'}"
+ time_format %Y-%m-%dT%H:%M:%S.%NZ
+
+
+
+
+ @type tail
+ path /var/log/messages
+ pos_file /var/log/host-messages.log.pos
+
+ @type syslog
+
+ tag host.messages
+
+
+
+
+ @type tail
+ path /var/log/secure
+ pos_file /var/log/host-secure.log.pos
+
+ @type syslog
+
+ tag host.secure
+
+
+
+ @type tail
+ @id in_tail_docker
+ path /var/log/docker.log
+ pos_file /var/log/fluentd-docker.log.pos
+ tag docker
+
+ @type regexp
+ expression /^time="(?
+
+
+
+
+ @type tail
+ @id in_tail_kubelet
+ multiline_flush_interval 5s
+ path /var/log/kubelet.log
+ pos_file /var/log/fluentd-kubelet.log.pos
+ tag kubelet
+
+ @type kubernetes
+
+
+
+
+
+
+ @type kubernetes_metadata
+ @id filter_kube_metadata
+ kubernetes_url "#{ENV['FLUENT_FILTER_KUBERNETES_URL'] || 'https://' + ENV.fetch('KUBERNETES_SERVICE_HOST') + ':' + ENV.fetch('KUBERNETES_SERVICE_PORT') + '/api'}"
+ verify_ssl "#{ENV['KUBERNETES_VERIFY_SSL'] || true}"
+ ca_file "#{ENV['KUBERNETES_CA_FILE']}"
+ skip_labels "#{ENV['FLUENT_KUBERNETES_METADATA_SKIP_LABELS'] || 'false'}"
+ skip_container_metadata "#{ENV['FLUENT_KUBERNETES_METADATA_SKIP_CONTAINER_METADATA'] || 'false'}"
+ skip_master_url "#{ENV['FLUENT_KUBERNETES_METADATA_SKIP_MASTER_URL'] || 'false'}"
+ skip_namespace_metadata "#{ENV['FLUENT_KUBERNETES_METADATA_SKIP_NAMESPACE_METADATA'] || 'false'}"
+
+
+
+ @type null
+
+
+
+ @type null
+
+
+
+ @type rewrite_tag_filter
+
+ key $._HOSTNAME
+ pattern ^(.+)$
+ tag $1.docker
+
+
+
+
+ @type rewrite_tag_filter
+
+ key $._HOSTNAME
+ pattern ^(.+)$
+ tag $1.kubelet
+
+
+
+
+ @type rewrite_tag_filter
+
+ key $.host
+ pattern ^(.+)$
+ tag $1.messages
+
+
+
+
+ @type rewrite_tag_filter
+
+ key $.host
+ pattern ^(.+)$
+ tag $1.secure
+
+
+
+
+ @type rewrite_tag_filter
+
+ # json structured log - consider adoption a standard json schema:
+ # https://github.com/timberio/log-event-json-schema
+ key message
+ pattern /^\{\s*"gen3log":/
+ tag kubernetes.gen3.json.${tag}
+
+
+ # combined log format - default Apache and nginx structure
+ # https://httpd.apache.org/docs/1.3/logs.html#combined
+ key message
+ pattern /^(((\d+\.\d+\.\d+\.\d+)|-)\s+){2}\S+\s+\[\d\d?\//
+ tag kubernetes.gen3.combined.${tag}
+
+
+ # unstructured log line
+ key message
+ pattern /\S/
+ tag kubernetes.gen3.raw.${tag}
+
+
+
+
+
+ @type record_transformer
+
+ log_type json
+ # This one doesn't work for whatever reason, if you do ${record["kubernetes"]} the whole blob would be added, but can't access subobjects
+ #container_name ${record["kubernetes"]["container_name"]}
+
+
+
+
+ @type record_transformer
+
+ log_type combined
+
+
+
+
+ @type record_transformer
+
+ log_type raw
+
+
+
+
+ @type rewrite_tag_filter
+
+ key $.kubernetes.pod_name
+ pattern ^(.+)$
+ tag "#{Time.now.strftime('%Y-%m-%d')}.$1"
+
+#
+# key $.kubernetes
+# pattern ^(.+)$
+# tag $1.container_name
+#
+
+
+#
+# @type rewrite_tag_filter
+#
+# key $.kubernetes.container_name
+# pattern ^(.+)$
+ #tag $1.${tag}
+# tag ${tag}.$1
+#
+#
+
+# TODO:
+# * python stack traces: "Traceback (most recent call last):""
+# https://docs.fluentd.org/v0.12/articles/parser_multiline#formatn
+#
+# Idea: add `visitor` cookie to revproxy ...
+
+
+
+ @type cloudwatch_logs
+ @id out_cloudwatch_logs
+ log_group_name "#{ENV['LOG_GROUP_NAME']}"
+ auto_create_stream true
+ use_tag_as_stream true
+ retention_in_days "#{ENV['RETENTION_IN_DAYS'] || 'nil'}"
+ json_handler yajl # To avoid UndefinedConversionError
+ log_rejected_request "#{ENV['LOG_REJECTED_REQUEST']}" # Log rejected request for missing parts
+
+
+
+#@include fluent.conf
+#@include conf.d/*.conf
diff --git a/kube/services/gen3-discovery-ai/README.md b/kube/services/gen3-discovery-ai/README.md
new file mode 100644
index 000000000..4c20678e0
--- /dev/null
+++ b/kube/services/gen3-discovery-ai/README.md
@@ -0,0 +1,42 @@
+# Gen3 Discovery AI Configuration
+
+Expects data in a `gen3-discovery-ai` folder relative to
+where the `manifest.json` is.
+
+Basic setup:
+
+`{{dir where manifest.json is}}/gen3-discovery-ai/knowledge/`
+
+- `tsvs` folder
+ - tsvs with topic_name at beginning of file
+- `markdown` folder
+ - {{topic_name_1}}
+ - markdown file(s)
+ - {{topic_name_2}}
+ - markdown file(s)
+
+The `kube-setup-gen3-discovery-ai` script syncs the above `/knowledge` folder to
+an S3 bucket. The service configuration then pulls from the S3 bucket and runs load commands
+to get the data into chromadb.
+
+> Note: See the `gen3-discovery-ai` service repo docs and README for more details on data load capabilities.
+
+Check the `gen3-discovery-ai-deploy.yaml` for what commands are being run in the automation.
+
+Expects secrets setup in `g3auto/gen3-discovery-ai` folder
+ - `credentials.json`: Google service account key if using a topic with Google Vertex AI
+ - `env`: .env file contents for service configuration (see service repo for a default one)
+
+## Populating Disk for In-Memory Vectordb Chromadb
+
+In order to setup pre-configured topics, we need to load a bunch of data
+into Chromadb (which is an in-mem vectordb with an option to persist to disk).
+
+To load topics consistently, we setup an S3 bucket to house the persisted
+data for the vectordb.
+
+### Getting data from S3 in mem
+
+We specify a path for Chromadb to use for persisted data and when it sees
+data there, it loads it in. So the deployment automation: 1. aws syncs the bucket
+and then 2. calls a script to load the files into the in-mem vectorstore from there.
diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml
new file mode 100644
index 000000000..dcfe03248
--- /dev/null
+++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-deploy.yaml
@@ -0,0 +1,181 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: gen3-discovery-ai-deployment
+spec:
+ selector:
+ # Only select pods based on the 'app' label
+ matchLabels:
+ app: gen3-discovery-ai
+ release: production
+ strategy:
+ type: RollingUpdate
+ rollingUpdate:
+ maxSurge: 1
+ maxUnavailable: 0
+ template:
+ metadata:
+ labels:
+ app: gen3-discovery-ai
+ release: production
+ GEN3_DATE_LABEL
+ spec:
+ serviceAccountName: gen3-discovery-ai-sa
+ volumes:
+ - name: gen3-discovery-ai-g3auto-volume
+ secret:
+ secretName: gen3-discovery-ai-g3auto
+ - name: gen3-discovery-ai-knowledge-library-volume
+ emptyDir: {}
+ initContainers:
+ # chromadb's persisted disk support requires the ability to write. We don't technically need this ability
+ # since we're populating the entirety of the database from configured files (no live updates).
+ #
+ # Solution: utilize emptyDir as a writable space.
+ #
+ # Procedure: in init containers, copy files from s3 to writable
+ # temporary space in emptyDir, use files from writable space
+ # to load into knowledge libary, move final knowledge library
+ # files into top-level emptyDir and make available in final container
+ - name: gen3-discovery-ai-aws-init
+ GEN3_AWSHELPER_IMAGE|-image: quay.io/cdis/awshelper:master-|
+ imagePullPolicy: Always
+ ports:
+ - containerPort: 8080
+ env:
+ - name: GEN3_DEBUG
+ GEN3_DEBUG_FLAG|-value: "False"-|
+ volumeMounts:
+ - name: gen3-discovery-ai-g3auto-volume
+ readOnly: true
+ mountPath: /gen3discoveryai/.env
+ subPath: env
+ - name: gen3-discovery-ai-g3auto-volume
+ readOnly: true
+ mountPath: /gen3discoveryai/credentials.json
+ subPath: credentials.json
+ - name: gen3-discovery-ai-g3auto-volume
+ readOnly: true
+ mountPath: /gen3discoveryai/storage_config.json
+ subPath: storage_config.json
+ - name: gen3-discovery-ai-knowledge-library-volume
+ mountPath: /gen3discoveryai/knowledge
+ imagePullPolicy: Always
+ resources:
+ requests:
+ cpu: 1
+ limits:
+ cpu: 2
+ memory: 512Mi
+ command: ["/bin/bash"]
+ args:
+ - "-c"
+ - |
+ bucketName=$(grep -o "\"bucket\": *\"[^\"]*\"" /gen3discoveryai/storage_config.json | awk -F'"' '{print $4}')
+ echo BUCKET: "$bucketName"
+ echo
+ echo BEFORE /gen3discoveryai/knowledge
+ ls -Ra /gen3discoveryai/knowledge
+ echo
+ echo syncing from s3
+ aws s3 sync "s3://${bucketName}" "/gen3discoveryai/knowledge/tmp"
+ echo
+ echo AFTER /gen3discoveryai/knowledge
+ ls -Ra /gen3discoveryai/knowledge
+ - name: gen3-discovery-ai-knowledge-init
+ GEN3_GEN3-DISCOVERY-AI_IMAGE
+ imagePullPolicy: Always
+ ports:
+ - containerPort: 8080
+ env:
+ - name: GEN3_DEBUG
+ GEN3_DEBUG_FLAG|-value: "False"-|
+ - name: ANONYMIZED_TELEMETRY
+ value: "False"
+ - name: GOOGLE_APPLICATION_CREDENTIALS
+ value: /gen3discoveryai/credentials.json
+ volumeMounts:
+ - name: gen3-discovery-ai-g3auto-volume
+ readOnly: true
+ mountPath: /gen3discoveryai/.env
+ subPath: env
+ - name: gen3-discovery-ai-g3auto-volume
+ readOnly: true
+ mountPath: /gen3discoveryai/credentials.json
+ subPath: credentials.json
+ - name: gen3-discovery-ai-g3auto-volume
+ readOnly: true
+ mountPath: /gen3discoveryai/storage_config.json
+ subPath: storage_config.json
+ - name: gen3-discovery-ai-knowledge-library-volume
+ mountPath: /gen3discoveryai/knowledge
+ imagePullPolicy: Always
+ resources:
+ requests:
+ cpu: 1
+ limits:
+ cpu: 2
+ memory: 512Mi
+ command: ["/bin/bash"]
+ args:
+ - "-c"
+ - |
+ echo
+ echo BEFORE /gen3discoveryai/knowledge
+ ls -Ra /gen3discoveryai/knowledge
+ echo running load_into_knowledge_store.py
+ poetry run python /gen3discoveryai/bin/load_into_knowledge_store.py tsvs /gen3discoveryai/knowledge/tmp/tsvs
+
+ if [ -d "/gen3discoveryai/knowledge/tmp/markdown" ]; then
+ for dir in "/gen3discoveryai/knowledge/tmp/markdown"/*; do
+ if [ -d "$dir" ]; then
+ dir_name=$(basename "$dir")
+
+ echo "Processing directory: $dir_name. Full path: $dir"
+ poetry run python /gen3discoveryai/bin/load_into_knowledge_store.py markdown --topic $dir_name $dir
+ fi
+ done
+ else
+ echo "Not syncing markdown, directory not found: /gen3discoveryai/knowledge/tmp/markdown"
+ fi
+
+ rm -r /gen3discoveryai/knowledge/tmp/
+ echo
+ echo AFTER /gen3discoveryai/knowledge
+ ls -Ra /gen3discoveryai/knowledge
+ containers:
+ - name: gen3-discovery-ai
+ GEN3_GEN3-DISCOVERY-AI_IMAGE
+ imagePullPolicy: Always
+ ports:
+ - containerPort: 8080
+ env:
+ - name: GEN3_DEBUG
+ GEN3_DEBUG_FLAG|-value: "False"-|
+ - name: ANONYMIZED_TELEMETRY
+ value: "False"
+ - name: GOOGLE_APPLICATION_CREDENTIALS
+ value: /gen3discoveryai/credentials.json
+ volumeMounts:
+ - name: gen3-discovery-ai-g3auto-volume
+ readOnly: true
+ mountPath: /gen3discoveryai/.env
+ subPath: env
+ - name: gen3-discovery-ai-g3auto-volume
+ readOnly: true
+ mountPath: /gen3discoveryai/credentials.json
+ subPath: credentials.json
+ - name: gen3-discovery-ai-g3auto-volume
+ readOnly: true
+ mountPath: /gen3discoveryai/storage_config.json
+ subPath: storage_config.json
+ - name: gen3-discovery-ai-knowledge-library-volume
+ mountPath: /gen3discoveryai/knowledge
+ imagePullPolicy: Always
+ resources:
+ requests:
+ cpu: 1
+ limits:
+ cpu: 2
+ # NOTE: If the configured data for the knowledge library (vector database) is large, you may need to bump this
+ memory: 512Mi
diff --git a/kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml b/kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml
new file mode 100644
index 000000000..b4734c3b8
--- /dev/null
+++ b/kube/services/gen3-discovery-ai/gen3-discovery-ai-service.yaml
@@ -0,0 +1,21 @@
+kind: Service
+apiVersion: v1
+metadata:
+ name: gen3-discovery-ai-service
+spec:
+ selector:
+ app: gen3-discovery-ai
+ release: production
+ ports:
+ - protocol: TCP
+ port: 80
+ targetPort: 8089
+ name: http
+ nodePort: null
+ - protocol: TCP
+ port: 443
+ targetPort: 443
+ name: https
+ nodePort: null
+ type: ClusterIP
+
diff --git a/kube/services/guppy/guppy-deploy.yaml b/kube/services/guppy/guppy-deploy.yaml
index 01a8905de..c3e8d121c 100644
--- a/kube/services/guppy/guppy-deploy.yaml
+++ b/kube/services/guppy/guppy-deploy.yaml
@@ -155,6 +155,6 @@ spec:
resources:
requests:
cpu: 100m
- memory: 128Mi
+ memory: 256Mi
limits:
- memory: 1200Mi
+ memory: 2000Mi
diff --git a/kube/services/ingress/ingress.yaml b/kube/services/ingress/ingress.yaml
index 3ceacf608..1db08e8ef 100644
--- a/kube/services/ingress/ingress.yaml
+++ b/kube/services/ingress/ingress.yaml
@@ -11,7 +11,7 @@ metadata:
alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS":443}]'
alb.ingress.kubernetes.io/load-balancer-attributes: idle_timeout.timeout_seconds=600
alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}'
- alb.ingress.kubernetes.io/ssl-policy: "ELBSecurityPolicy-TLS-1-2-2017-01"
+ alb.ingress.kubernetes.io/ssl-policy: ELBSecurityPolicy-TLS13-1-2-FIPS-2023-04
spec:
ingressClassName: alb
rules:
diff --git a/kube/services/jenkins/jenkins-deploy.yaml b/kube/services/jenkins/jenkins-deploy.yaml
index c0eae2040..954e996f2 100644
--- a/kube/services/jenkins/jenkins-deploy.yaml
+++ b/kube/services/jenkins/jenkins-deploy.yaml
@@ -38,6 +38,10 @@ spec:
operator: In
values:
- on-demand
+ - key: topology.kubernetes.io/zone
+ operator: In
+ values:
+ - us-east-1a
serviceAccountName: jenkins-service
securityContext:
runAsUser: 1000
diff --git a/kube/services/jenkins2-ci-worker/jenkins2-agent-service.yaml b/kube/services/jenkins2-ci-worker/jenkins2-agent-service.yaml
new file mode 100644
index 000000000..7f4e58109
--- /dev/null
+++ b/kube/services/jenkins2-ci-worker/jenkins2-agent-service.yaml
@@ -0,0 +1,17 @@
+apiVersion: v1
+kind: Service
+metadata:
+ labels:
+ name: jenkins-agent-service
+ name: jenkins-agent
+ namespace: default
+spec:
+ ports:
+ - name: slavelistener
+ port: 50000
+ protocol: TCP
+ targetPort: 50000
+ selector:
+ app: jenkins
+ sessionAffinity: None
+ type: ClusterIP
diff --git a/kube/services/jenkins2-ci-worker/jenkins2-ci-worker-deploy.yaml b/kube/services/jenkins2-ci-worker/jenkins2-ci-worker-deploy.yaml
new file mode 100644
index 000000000..3dea38a5c
--- /dev/null
+++ b/kube/services/jenkins2-ci-worker/jenkins2-ci-worker-deploy.yaml
@@ -0,0 +1,149 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: jenkins-ci-worker-deployment
+spec:
+ selector:
+ # Only select pods based on the 'app' label
+ matchLabels:
+ app: jenkins-ci-worker
+ template:
+ metadata:
+ labels:
+ app: jenkins-ci-worker
+ # for network policy
+ netnolimit: "yes"
+ annotations:
+ "cluster-autoscaler.kubernetes.io/safe-to-evict": "false"
+ spec:
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: eks.amazonaws.com/capacityType
+ operator: In
+ values:
+ - ONDEMAND
+ - matchExpressions:
+ - key: karpenter.sh/capacity-type
+ operator: In
+ values:
+ - on-demand
+ serviceAccountName: jenkins-service
+ securityContext:
+ runAsUser: 1000
+ fsGroup: 1000
+ initContainers:
+ - args:
+ - -c
+ - |
+ # fix permissions for /var/run/docker.sock
+ chmod 666 /var/run/docker.sock
+ echo "done"
+ command:
+ - /bin/bash
+ image: quay.io/cdis/awshelper:master
+ imagePullPolicy: Always
+ name: awshelper
+ resources: {}
+ securityContext:
+ allowPrivilegeEscalation: false
+ runAsUser: 0
+ terminationMessagePath: /dev/termination-log
+ terminationMessagePolicy: File
+ volumeMounts:
+ - mountPath: /var/run/docker.sock
+ name: dockersock
+ containers:
+ #
+ # See for details on running docker in a pod:
+ # https://estl.tech/accessing-docker-from-a-kubernetes-pod-68996709c04b
+ #
+ - name: jenkins-worker
+ image: "quay.io/cdis/gen3-ci-worker:master"
+ ports:
+ - containerPort: 8080
+ env:
+ - name: JENKINS_URL
+ value: "https://jenkins2.planx-pla.net"
+ - name: JENKINS_SECRET
+ valueFrom:
+ secretKeyRef:
+ name: jenkins-ci-worker-g3auto
+ key: jenkins-jnlp-agent-secret
+ - name: JENKINS_AGENT_NAME
+ value: "gen3-ci-worker"
+ - name: JENKINS_TUNNEL
+ value: "jenkins-agent:50000"
+ - name: AWS_DEFAULT_REGION
+ value: us-east-1
+ - name: JAVA_OPTS
+ value: "-Xmx3072m"
+ - name: AWS_ACCESS_KEY_ID
+ valueFrom:
+ secretKeyRef:
+ name: jenkins-secret
+ key: aws_access_key_id
+ - name: AWS_SECRET_ACCESS_KEY
+ valueFrom:
+ secretKeyRef:
+ name: jenkins-secret
+ key: aws_secret_access_key
+ - name: GOOGLE_EMAIL_AUX1
+ valueFrom:
+ secretKeyRef:
+ name: google-acct1
+ key: email
+ - name: GOOGLE_PASSWORD_AUX1
+ valueFrom:
+ secretKeyRef:
+ name: google-acct1
+ key: password
+ - name: GOOGLE_EMAIL_AUX2
+ valueFrom:
+ secretKeyRef:
+ name: google-acct2
+ key: email
+ - name: GOOGLE_PASSWORD_AUX2
+ valueFrom:
+ secretKeyRef:
+ name: google-acct2
+ key: password
+ - name: GOOGLE_APP_CREDS_JSON
+ valueFrom:
+ secretKeyRef:
+ name: jenkins-g3auto
+ key: google_app_creds.json
+ resources:
+ limits:
+ cpu: 0.9
+ memory: 4096Mi
+ ephemeral-storage: 500Mi
+ imagePullPolicy: Always
+ volumeMounts:
+ - name: "cert-volume"
+ readOnly: true
+ mountPath: "/mnt/ssl/service.crt"
+ subPath: "service.crt"
+ - name: "cert-volume"
+ readOnly: true
+ mountPath: "/mnt/ssl/service.key"
+ subPath: "service.key"
+ - name: "ca-volume"
+ readOnly: true
+ mountPath: "/usr/local/share/ca-certificates/cdis/cdis-ca.crt"
+ subPath: "ca.pem"
+ - name: dockersock
+ mountPath: "/var/run/docker.sock"
+ imagePullPolicy: Always
+ volumes:
+ - name: cert-volume
+ secret:
+ secretName: "cert-jenkins-service"
+ - name: ca-volume
+ secret:
+ secretName: "service-ca"
+ - name: dockersock
+ hostPath:
+ path: /var/run/docker.sock
diff --git a/kube/services/jenkins2-ci-worker/jenkins2-ci-worker-pvc.yaml b/kube/services/jenkins2-ci-worker/jenkins2-ci-worker-pvc.yaml
new file mode 100644
index 000000000..047e4e966
--- /dev/null
+++ b/kube/services/jenkins2-ci-worker/jenkins2-ci-worker-pvc.yaml
@@ -0,0 +1,12 @@
+kind: PersistentVolumeClaim
+apiVersion: v1
+metadata:
+ name: datadir-jenkins-ci
+ annotations:
+ volume.beta.kubernetes.io/storage-class: gp2
+spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 200Gi
diff --git a/kube/services/jenkins2/jenkins2-deploy.yaml b/kube/services/jenkins2/jenkins2-deploy.yaml
index ee838bae6..08365f811 100644
--- a/kube/services/jenkins2/jenkins2-deploy.yaml
+++ b/kube/services/jenkins2/jenkins2-deploy.yaml
@@ -48,7 +48,7 @@ spec:
# https://estl.tech/accessing-docker-from-a-kubernetes-pod-68996709c04b
#
- name: jenkins
- GEN3_JENKINS_IMAGE
+ GEN3_JENKINS2_IMAGE
ports:
- containerPort: 8080
name: http
diff --git a/kube/services/jobs/cedar-ingestion-job.yaml b/kube/services/jobs/cedar-ingestion-job.yaml
index ecc83335c..f6be4dd23 100644
--- a/kube/services/jobs/cedar-ingestion-job.yaml
+++ b/kube/services/jobs/cedar-ingestion-job.yaml
@@ -1,19 +1,22 @@
#
# run with:
-# gen3 job run cedar-ingestion \
-# SUBMISSION_USER $submission_user \
-# CEDAR_DIRECTORY_ID $cedar_directory_id \
-#
-# SUBMISSION_USER(optional)
-# e-mail of user-account to submit the data to MDS, must have MDS admin and CEDAR polices granted. Default: "cdis.autotest@gmail.com"
+# gen3 job run cedar-ingestion [CEDAR_DIRECTORY_ID $cedar_directory_id]
#
# CEDAR_DIRECTORY_ID
-# ID of CEDAR directory where instances will be pulled from, only needs its UUID part. For example: "123e4567-e89b-12d3-a456-426614174000"
+# The directory id will be read from 'directory_id.txt' in the
+# 'cedar-g3auto' secret.
+# You can override the secret value with an optional command line argument.
+#
# The deployed CEDAR wrapper services must be able to read from this directory.
#
-# Example
-# gen3 job run cedar-ingestion CEDAR_DIRECTORY_ID 123e4567-e89b-12d3-a456-426614174000 SUBMISSION_USER cdis.autotest@gmail.com
+# ACCESS TOKENS
+# Access tokens will be generated for an existing fence-client, cedar_ingest_client.
+# The client_id and client_secret will be read from
+# 'cedar_client_credentials.json' in the 'cedar-g3auto' secret.
+#
+# The fence-client must have MDS admin and CEDAR polices granted.
#
+
apiVersion: batch/v1
kind: Job
metadata:
@@ -44,36 +47,13 @@ spec:
- ONDEMAND
serviceAccountName: useryaml-job
volumes:
- - name: yaml-merge
- configMap:
- name: "fence-yaml-merge"
- name: shared-data
emptyDir: {}
-# -----------------------------------------------------------------------------
-# DEPRECATED! Remove when all commons are no longer using local_settings.py
-# for fence.
-# -----------------------------------------------------------------------------
- - name: old-config-volume
- secret:
- secretName: "fence-secret"
- - name: creds-volume
- secret:
- secretName: "fence-creds"
- - name: config-helper
- configMap:
- name: config-helper
- - name: json-secret-volume
+ - name: cedar-client-volume-g3auto
secret:
- secretName: "fence-json-secret"
-# -----------------------------------------------------------------------------
- - name: config-volume
- secret:
- secretName: "fence-config"
- - name: fence-jwt-keys
- secret:
- secretName: "fence-jwt-keys"
- containers:
- - name: awshelper
+ secretName: cedar-g3auto # the secret name in kube
+ initContainers:
+ - name: cedar
image: quay.io/cdis/awshelper:master
imagePullPolicy: Always
ports:
@@ -84,10 +64,18 @@ spec:
configMapKeyRef:
name: global
key: hostname
- - name: SUBMISSION_USER
- GEN3_SUBMISSION_USER|-value: "cdis.autotest@gmail.com"-|
- name: CEDAR_DIRECTORY_ID
GEN3_CEDAR_DIRECTORY_ID|-value: ""-|
+ - name: CEDAR_DIRECTORY_ID_SECRET
+ valueFrom:
+ secretKeyRef:
+ name: cedar-g3auto
+ key: "directory_id.txt"
+ - name: CEDAR_CLIENT_CREDENTIALS
+ valueFrom:
+ secretKeyRef:
+ name: cedar-g3auto
+ key: "cedar_client_credentials.json"
volumeMounts:
- name: shared-data
mountPath: /mnt/shared
@@ -95,117 +83,75 @@ spec:
limits:
cpu: 1
memory: 5Gi
+
command: ["/bin/bash" ]
args:
- "-c"
- |
if [[ -z "$CEDAR_DIRECTORY_ID" ]]; then
- echo -e "CEDAR_DIRECTORY_ID is required" 1>&2
- exit 1
+ if [[ ! -z "$CEDAR_DIRECTORY_ID_SECRET" ]]; then
+ echo "CEDAR_DIRECTORY_ID is from g3auto secret"
+ export CEDAR_DIRECTORY_ID=$CEDAR_DIRECTORY_ID_SECRET
+ else
+ echo -e "ERROR: CEDAR_DIRECTORY_ID must be in secret or on command line" 1>&2
+ exit 0
+ fi
+ else
+ echo "CEDAR_DIRECTORY_ID is from command line parameter"
+ fi
+
+ if [[ ! -z "$CEDAR_CLIENT_CREDENTIALS" ]]; then
+ export CEDAR_CLIENT_ID=$(echo $CEDAR_CLIENT_CREDENTIALS | jq -r .client_id)
+ export CEDAR_CLIENT_SECRET=$(echo $CEDAR_CLIENT_CREDENTIALS | jq -r .client_secret)
+ else
+ echo -e "Could not read cedar-client credentials" 1>&2
+ exit 0
fi
- let count=0
- while [[ ! -f /mnt/shared/access_token.txt && $count -lt 50 ]]; do
- echo "Waiting for /mnt/shared/access_token.txt";
- sleep 2
- let count=$count+1
- done
+
pip install pydash
export GEN3_HOME="$HOME/cloud-automation"
- export ACCESS_TOKEN="$(cat /mnt/shared/access_token.txt)"
- python ${GEN3_HOME}/files/scripts/healdata/heal-cedar-data-ingest.py --access_token $ACCESS_TOKEN --directory $CEDAR_DIRECTORY_ID --hostname $HOSTNAME
- echo "All done - exit status $?"
- - name: fence
- GEN3_FENCE_IMAGE
- imagePullPolicy: Always
- env:
- - name: PYTHONPATH
- value: /var/www/fence
- - name: SUBMISSION_USER
- GEN3_SUBMISSION_USER|-value: "cdis.autotest@gmail.com"-|
- - name: TOKEN_EXPIRATION
- value: "3600"
- - name: FENCE_PUBLIC_CONFIG
- valueFrom:
- configMapKeyRef:
- name: manifest-fence
- key: fence-config-public.yaml
- optional: true
- volumeMounts:
-# -----------------------------------------------------------------------------
-# DEPRECATED! Remove when all commons are no longer using local_settings.py
-# for fence.
-# -----------------------------------------------------------------------------
- - name: "old-config-volume"
- readOnly: true
- mountPath: "/var/www/fence/local_settings.py"
- subPath: local_settings.py
- - name: "creds-volume"
- readOnly: true
- mountPath: "/var/www/fence/creds.json"
- subPath: creds.json
- - name: "config-helper"
- readOnly: true
- mountPath: "/var/www/fence/config_helper.py"
- subPath: config_helper.py
- - name: "json-secret-volume"
- readOnly: true
- mountPath: "/var/www/fence/fence_credentials.json"
- subPath: fence_credentials.json
-# -----------------------------------------------------------------------------
- - name: "config-volume"
- readOnly: true
- mountPath: "/var/www/fence/fence-config-secret.yaml"
- subPath: fence-config.yaml
- - name: "yaml-merge"
- readOnly: true
- mountPath: "/var/www/fence/yaml_merge.py"
- subPath: yaml_merge.py
- - name: "fence-jwt-keys"
- readOnly: true
- mountPath: "/fence/jwt-keys.tar"
- subPath: "jwt-keys.tar"
- - name: shared-data
- mountPath: /mnt/shared
- command: ["/bin/bash" ]
- args:
+ python ${GEN3_HOME}/files/scripts/healdata/heal-cedar-data-ingest.py --directory $CEDAR_DIRECTORY_ID --cedar_client_id $CEDAR_CLIENT_ID --cedar_client_secret $CEDAR_CLIENT_SECRET --hostname $HOSTNAME
+ status=$?
+ if [[ $status -ne 0 ]]; then
+ echo "WARNING: non zero exit code: $status"
+ else
+ echo "All done - exit code: $status"
+ touch /mnt/shared/success
+ fi
+ containers:
+ - name: awshelper
+ env:
+ - name: slackWebHook
+ valueFrom:
+ configMapKeyRef:
+ name: global
+ key: slack_webhook
+ - name: gen3Env
+ valueFrom:
+ configMapKeyRef:
+ name: manifest-global
+ key: hostname
+ GEN3_AWSHELPER_IMAGE|-image: quay.io/cdis/awshelper:master-|
+ volumeMounts:
+ - name: shared-data
+ mountPath: /mnt/shared
+ command: ["/bin/bash"]
+ args:
- "-c"
- |
- echo "${FENCE_PUBLIC_CONFIG:-""}" > "/var/www/fence/fence-config-public.yaml"
- python /var/www/fence/yaml_merge.py /var/www/fence/fence-config-public.yaml /var/www/fence/fence-config-secret.yaml > /var/www/fence/fence-config.yaml
- if [ -f /fence/jwt-keys.tar ]; then
- cd /fence
- tar xvf jwt-keys.tar
- if [ -d jwt-keys ]; then
- mkdir -p keys
- mv jwt-keys/* keys/
- fi
+ if [[ ! "$slackWebHook" =~ ^http ]]; then
+ echo "Slack webhook not set"
+ exit 0
fi
- echo "generate access token"
- echo "fence-create --path fence token-create --type access_token --username $SUBMISSION_USER --scopes openid,user,test-client --exp $TOKEN_EXPIRATION"
- tempFile="$(mktemp -p /tmp token.txt_XXXXXX)"
- success=false
- count=0
- sleepTime=10
- # retry loop
- while [[ $count -lt 3 && $success == false ]]; do
- if fence-create --path fence token-create --type access_token --username $SUBMISSION_USER --scopes openid,user,test-client --exp $TOKEN_EXPIRATION > "$tempFile"; then
- echo "fence-create success!"
- tail -1 "$tempFile" > /mnt/shared/access_token.txt
- # base64 --decode complains about invalid characters - don't know why
- awk -F . '{ print $2 }' /mnt/shared/access_token.txt | base64 --decode 2> /dev/null
- success=true
- else
- echo "fence-create failed!"
- cat "$tempFile"
- echo "sleep for $sleepTime, then retry"
- sleep "$sleepTime"
- let sleepTime=$sleepTime+$sleepTime
- fi
- let count=$count+1
- done
- if [[ $success != true ]]; then
- echo "Giving up on fence-create after $count retries - failed to create valid access token"
+ if ! [ -f /mnt/shared/success ]; then
+ success="FAILED"
+ color="ff0000"
+ else
+ success="SUCCESS"
+ color="2EB67D"
fi
- echo ""
- echo "All Done - always succeed to avoid k8s retries"
+ echo "Sending ${success} message to slack..."
+ payload="{\"attachments\": [{\"fallback\": \"JOB ${success}: cedar-ingest cronjob on ${gen3Env}\",\"color\": \"#${color}\",\"title\": \"JOB ${success}: cedar-ingest cronjob on ${gen3Env}\",\"text\": \"Pod name: ${HOSTNAME}\",\"ts\": \"$(date +%s)\"}]}"
+ echo "Payload=${payload}"
+ curl -X POST --data-urlencode "payload=${payload}" "${slackWebHook}"
restartPolicy: Never
diff --git a/kube/services/jobs/distribute-licenses-job.yaml b/kube/services/jobs/distribute-licenses-job.yaml
index aef52c75c..1c2ad4284 100644
--- a/kube/services/jobs/distribute-licenses-job.yaml
+++ b/kube/services/jobs/distribute-licenses-job.yaml
@@ -48,10 +48,11 @@ spec:
configMapKeyRef:
name: manifest-hatchery
key: "user-namespace"
- - name: GEN3_LICENSE_SECRET_NAME
- value: stata-workspace-gen3-license
- - name: GEN3_LICENSE_KEY
- value: licenseSecrets
+ - name: GEN3_STATA_LICENSE
+ valueFrom:
+ secretKeyRef:
+ name: stata-workspace-gen3-license-g3auto
+ key: "stata_license.txt"
command: ["python"]
args:
- "-c"
@@ -100,19 +101,10 @@ spec:
used_licenses.sort()
print(f"Licenses currently in use: {used_licenses}")
- # The license keys should be stored in a kubernetes secret.
+ # The Gen3 Stata license strings should be stored in a kubernetes secret using g3auto.
# The format of the secret is one license string per line.
# The license strings are generated with 'stinit' using the information in a license PDF.
- # The secret can be generated from a temporary file with a kubectl command, eg
- # kubectl create secret generic GEN3_LICENSE_SECRET_NAME --from-file=GEN3_LICENSE_KEY=/path/to/file.lic
-
- # Get license from kubernetes secret
- print("Ready to read secret")
- secret_name = os.environ['GEN3_LICENSE_SECRET_NAME']
- secret_key = os.environ['GEN3_LICENSE_KEY']
- license_secrets = os.popen(
- f"kubectl get secret {secret_name} --template={{{{.data.{secret_key}}}}} | base64 -d"
- ).read()
+ license_secrets = os.environ['GEN3_STATA_LICENSE']
license_secrets = license_secrets.strip()
licenses = license_secrets.split("\n")
diff --git a/kube/services/jobs/ecr-access-job.yaml b/kube/services/jobs/ecr-access-job.yaml
new file mode 100644
index 000000000..89bb49d6d
--- /dev/null
+++ b/kube/services/jobs/ecr-access-job.yaml
@@ -0,0 +1,83 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+ name: ecr-access
+spec:
+ template:
+ metadata:
+ labels:
+ app: gen3job
+ spec:
+ affinity:
+ nodeAffinity:
+ preferredDuringSchedulingIgnoredDuringExecution:
+ - weight: 100
+ preference:
+ matchExpressions:
+ - key: karpenter.sh/capacity-type
+ operator: In
+ values:
+ - on-demand
+ - weight: 99
+ preference:
+ matchExpressions:
+ - key: eks.amazonaws.com/capacityType
+ operator: In
+ values:
+ - ONDEMAND
+ restartPolicy: Never
+ serviceAccountName: ecr-access-job-sa
+ securityContext:
+ fsGroup: 1000
+ containers:
+ - name: awshelper
+ GEN3_AWSHELPER_IMAGE|-image: quay.io/cdis/awshelper:master-|
+ imagePullPolicy: Always
+ resources:
+ limits:
+ cpu: 0.5
+ memory: 1Gi
+ env:
+ - name: SLACK_WEBHOOK
+ valueFrom:
+ configMapKeyRef:
+ name: global
+ key: slack_webhook
+ optional: true
+ - name: HOSTNAME
+ valueFrom:
+ configMapKeyRef:
+ name: global
+ key: hostname
+ - name: PAY_MODELS_DYNAMODB_TABLE
+ valueFrom:
+ configMapKeyRef:
+ name: manifest-hatchery
+ key: pay-models-dynamodb-table
+ optional: true
+ - name: ECR_ACCESS_JOB_ARN
+ valueFrom:
+ configMapKeyRef:
+ name: manifest-global
+ key: ecr-access-job-role-arn
+ optional: true
+ command: ["/bin/bash"]
+ args:
+ - "-c"
+ - |
+ cd cloud-automation/files/scripts/
+ echo Installing requirements...
+ pip3 install -r ecr-access-job-requirements.txt
+ python3 ecr-access-job.py
+ exitcode=$?
+
+ if [[ "${SLACK_WEBHOOK}" != 'None' ]]; then
+ if [[ $exitcode == 1 ]]; then
+ curl -X POST --data-urlencode "payload={\"text\": \"JOBFAIL: ECR access job on ${HOSTNAME}\"}" "${SLACK_WEBHOOK}"
+ else
+ curl -X POST --data-urlencode "payload={\"text\": \"SUCCESS: ECR access job on ${HOSTNAME}\"}" "${SLACK_WEBHOOK}"
+ fi
+ fi
+
+ echo "Exit code: $exitcode"
+ exit "$exitcode"
diff --git a/kube/services/jobs/etl-job.yaml b/kube/services/jobs/etl-job.yaml
index fa201c99a..6b9b887ec 100644
--- a/kube/services/jobs/etl-job.yaml
+++ b/kube/services/jobs/etl-job.yaml
@@ -2,6 +2,8 @@
apiVersion: batch/v1
kind: Job
metadata:
+ annotations:
+ karpenter.sh/do-not-evict: "true"
name: etl
spec:
backoffLimit: 0
diff --git a/kube/services/jobs/fence-cleanup-expired-ga4gh-info-cronjob.yaml b/kube/services/jobs/fence-cleanup-expired-ga4gh-info-cronjob.yaml
index 74d7fc9a4..93eaf7652 100644
--- a/kube/services/jobs/fence-cleanup-expired-ga4gh-info-cronjob.yaml
+++ b/kube/services/jobs/fence-cleanup-expired-ga4gh-info-cronjob.yaml
@@ -1,5 +1,5 @@
---
-apiVersion: batch/v1beta1
+apiVersion: batch/v1
kind: CronJob
metadata:
name: fence-cleanup-expired-ga4gh-info
diff --git a/kube/services/jobs/psql-db-dump-va-testing-job.yaml b/kube/services/jobs/psql-db-dump-va-testing-job.yaml
new file mode 100644
index 000000000..8a8037e16
--- /dev/null
+++ b/kube/services/jobs/psql-db-dump-va-testing-job.yaml
@@ -0,0 +1,80 @@
+---
+# NOTE: This job was created specifically to dump all the databases in va-testing, in preparation for a move to second cluster
+# If you aren't doing that, this probably is not the job you're looking for
+apiVersion: batch/v1
+kind: Job
+metadata:
+ name: psql-db-dump-va-testing
+spec:
+ template:
+ metadata:
+ labels:
+ app: gen3job
+ spec:
+ affinity:
+ nodeAffinity:
+ preferredDuringSchedulingIgnoredDuringExecution:
+ - weight: 100
+ preference:
+ matchExpressions:
+ - key: karpenter.sh/capacity-type
+ operator: In
+ values:
+ - on-demand
+ - weight: 99
+ preference:
+ matchExpressions:
+ - key: eks.amazonaws.com/capacityType
+ operator: In
+ values:
+ - ONDEMAND
+ serviceAccountName: dbbackup-sa
+ containers:
+ - name: pgdump
+ image: quay.io/cdis/awshelper:master
+ imagePullPolicy: Always
+ env:
+ - name: gen3Env
+ valueFrom:
+ configMapKeyRef:
+ name: global
+ key: environment
+ - name: JENKINS_HOME
+ value: "devterm"
+ - name: GEN3_HOME
+ value: /home/ubuntu/cloud-automation
+ command: ["/bin/bash"]
+ args:
+ - "-c"
+ - |
+ source "${GEN3_HOME}/gen3/lib/utils.sh"
+ gen3_load "gen3/gen3setup"
+ account_id=$(aws sts get-caller-identity --query "Account" --output text)
+ default_bucket_name="gen3-db-backups-${account_id}"
+ default_databases=("fence" "indexd" "sheepdog" "peregrine" "arborist" "argo" "atlas" "metadata" "ohdsi" "omop-data" "wts")
+ s3_dir="va-testing-$(date +"%Y-%m-%d-%H-%M-%S")"
+ databases=("${default_databases[@]}")
+ bucket_name=$default_bucket_name
+
+ for database in "${databases[@]}"; do
+ gen3_log_info "Starting database backup for ${database}"
+ gen3 db backup "${database}" > "${database}.sql"
+
+ if [ $? -eq 0 ] && [ -f "${database}.sql" ]; then
+ gen3_log_info "Uploading backup file ${database}.sql to s3://${bucket_name}/${s3_dir}/${database}.sql"
+ aws s3 cp "${database}.sql" "s3://${bucket_name}/${s3_dir}/${database}.sql"
+
+ if [ $? -eq 0 ]; then
+ gen3_log_info "Successfully uploaded ${database}.sql to S3"
+ else
+ gen3_log_err "Failed to upload ${database}.sql to S3"
+ fi
+ gen3_log_info "Deleting temporary backup file ${database}.sql"
+ rm -f "${database}.sql"
+ else
+ gen3_log_err "Backup operation failed for ${database}"
+ rm -f "${database}.sql"
+ fi
+ done
+ sleep 600
+ restartPolicy: Never
diff --git a/kube/services/jobs/psql-db-prep-dump-job.yaml b/kube/services/jobs/psql-db-prep-dump-job.yaml
new file mode 100644
index 000000000..86c513b78
--- /dev/null
+++ b/kube/services/jobs/psql-db-prep-dump-job.yaml
@@ -0,0 +1,79 @@
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+ name: psql-db-prep-dump
+spec:
+ template:
+ metadata:
+ labels:
+ app: gen3job
+ spec:
+ affinity:
+ nodeAffinity:
+ preferredDuringSchedulingIgnoredDuringExecution:
+ - weight: 100
+ preference:
+ matchExpressions:
+ - key: karpenter.sh/capacity-type
+ operator: In
+ values:
+ - on-demand
+ - weight: 99
+ preference:
+ matchExpressions:
+ - key: eks.amazonaws.com/capacityType
+ operator: In
+ values:
+ - ONDEMAND
+ serviceAccountName: dbbackup-sa
+ containers:
+ - name: pgdump
+ image: quay.io/cdis/awshelper:master
+ imagePullPolicy: Always
+ env:
+ - name: gen3Env
+ valueFrom:
+ configMapKeyRef:
+ name: global
+ key: environment
+ - name: JENKINS_HOME
+ value: "devterm"
+ - name: GEN3_HOME
+ value: /home/ubuntu/cloud-automation
+ command: [ "/bin/bash" ]
+ args:
+ - "-c"
+ - |
+ source "${GEN3_HOME}/gen3/lib/utils.sh"
+ gen3_load "gen3/gen3setup"
+ account_id=$(aws sts get-caller-identity --query "Account" --output text)
+ default_bucket_name="gen3-db-backups-${account_id}"
+ default_databases=("indexd" "sheepdog" "metadata")
+ s3_dir="$(date +"%Y-%m-%d-%H-%M-%S")"
+ databases=("${default_databases[@]}")
+ bucket_name=$default_bucket_name
+
+ for database in "${databases[@]}"; do
+ gen3_log_info "Starting database backup for ${database}"
+ gen3 db backup "${database}" > "${database}.sql"
+
+ if [ $? -eq 0 ] && [ -f "${database}.sql" ]; then
+ gen3_log_info "Uploading backup file ${database}.sql to s3://${bucket_name}/${s3_dir}/${database}.sql"
+ aws s3 cp "${database}.sql" "s3://${bucket_name}/${s3_dir}/${database}.sql"
+
+ if [ $? -eq 0 ]; then
+ gen3_log_info "Successfully uploaded ${database}.sql to S3"
+ else
+ gen3_log_err "Failed to upload ${database}.sql to S3"
+ fi
+ gen3_log_info "Deleting temporary backup file ${database}.sql"
+ rm -f "${database}.sql"
+ else
+ gen3_log_err "Backup operation failed for ${database}"
+ rm -f "${database}.sql"
+ fi
+ done
+ sleep 600
+ restartPolicy: Never
+
diff --git a/kube/services/jobs/psql-db-prep-restore-job.yaml b/kube/services/jobs/psql-db-prep-restore-job.yaml
new file mode 100644
index 000000000..710e6f4f1
--- /dev/null
+++ b/kube/services/jobs/psql-db-prep-restore-job.yaml
@@ -0,0 +1,90 @@
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+ name: psql-db-prep-restore
+spec:
+ template:
+ metadata:
+ labels:
+ app: gen3job
+ spec:
+ affinity:
+ nodeAffinity:
+ preferredDuringSchedulingIgnoredDuringExecution:
+ - weight: 100
+ preference:
+ matchExpressions:
+ - key: karpenter.sh/capacity-type
+ operator: In
+ values:
+ - on-demand
+ - weight: 99
+ preference:
+ matchExpressions:
+ - key: eks.amazonaws.com/capacityType
+ operator: In
+ values:
+ - ONDEMAND
+ serviceAccountName: dbbackup-sa
+ containers:
+ - name: pgrestore
+ image: quay.io/cdis/awshelper:master
+ imagePullPolicy: Always
+ env:
+ - name: gen3Env
+ valueFrom:
+ configMapKeyRef:
+ name: global
+ key: environment
+ - name: JENKINS_HOME
+ value: "devterm"
+ - name: GEN3_HOME
+ value: /home/ubuntu/cloud-automation
+ command: [ "/bin/bash" ]
+ args:
+ - "-c"
+ - |
+ source "${GEN3_HOME}/gen3/lib/utils.sh"
+ gen3_load "gen3/gen3setup"
+ account_id=$(aws sts get-caller-identity --query "Account" --output text)
+ default_bucket_name="gen3-db-backups-${account_id}"
+ default_databases=("indexd" "sheepdog" "metadata")
+ backup_directories=$(aws s3 ls "s3://${default_bucket_name}/")
+ newest_directory=$(echo "$backup_directories" | awk '/PRE/ {if ($2 > max) max = $2} END {print max}')
+ databases=("${default_databases[@]}")
+ bucket_name=$default_bucket_name
+ namespace=$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace)
+ date_str=$(date -u +%y%m%d_%H%M%S)
+ gen3_log_info "Database backup location in S3: ${bucket_name}/${newest_directory}"
+ gen3_log_info "namespace: $namespace \n\n"
+
+ for database in "${databases[@]}"; do
+ gen3_log_info "Downloading database backup file s3://${default_bucket_name}/${newest_directory}${database}.sql"
+ aws s3 cp "s3://${default_bucket_name}/${newest_directory}${database}.sql" "${database}.sql"
+ server=$(gen3 db creds "$database" | jq -r '.g3FarmServer')
+ username=$(gen3 db creds "$database" | jq -r '.db_username')
+ db_name="${namespace}_${database}_${date_str}"
+ if [[ -z "$server" || -z "$username" ]]; then
+ gen3_log_info "Error: Unable to extract server name or username."
+ return 1
+ fi
+ gen3 psql $database -c "create database $db_name;" 2>&1 | grep -q "permission denied"
+ if [ $? -eq 0 ]; then
+ gen3_log_info "User does not have permission to create database. Granting required permission..."
+ gen3 psql $server -c "alter user $username createdb;"
+ gen3 psql $database -c "create database $db_name;"
+ if [ $? -eq 0 ]; then
+ gen3_log_info "Database $db_name created successfully!"
+ else
+ gen3_log_info "Error creating database $db_name after granting permission."
+ fi
+ else
+ gen3_log_info "Database $db_name created successfully!"
+ fi
+ gen3_log_info "Starting database restore for ${database} to database $db_name"
+ gen3 psql "$database" -d "$db_name" -f "${database}.sql" 1>&2
+ gen3_log_info "cleanup temporary backup file ${database}.sql \n\n\n"
+ done
+ sleep 600
+ restartPolicy: Never
diff --git a/kube/services/jobs/usersync-job.yaml b/kube/services/jobs/usersync-job.yaml
index 8f148a3b0..8a5471a20 100644
--- a/kube/services/jobs/usersync-job.yaml
+++ b/kube/services/jobs/usersync-job.yaml
@@ -260,7 +260,7 @@ spec:
exit 1
fi
#-----------------
- echo "awshelper downloading ${userYamlS3Path} to /mnt/shared/useryaml";
+ echo "awshelper downloading ${userYamlS3Path} to /mnt/shared/user.yaml";
n=0
until [ $n -ge 5 ]; do
echo "Download attempt $n"
diff --git a/kube/services/karpenter-reconciler/application.yaml b/kube/services/karpenter-reconciler/application.yaml
new file mode 100644
index 000000000..fb0fab871
--- /dev/null
+++ b/kube/services/karpenter-reconciler/application.yaml
@@ -0,0 +1,22 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+ name: karpenter-reconciler-application
+ namespace: argocd
+spec:
+ destination:
+ namespace: kube-system
+ server: https://kubernetes.default.svc
+ project: default
+ source:
+ repoURL: https://github.com/uc-cdis/cloud-automation.git
+ targetRevision: master
+ path: kube/services/karpenter-reconciler
+ directory:
+ exclude: "application.yaml"
+ syncPolicy:
+ automated:
+ prune: true
+ selfHeal: true
+ syncOptions:
+ - CreateNamespace=true
diff --git a/kube/services/karpenter-reconciler/auth.yaml b/kube/services/karpenter-reconciler/auth.yaml
new file mode 100644
index 000000000..c159028ab
--- /dev/null
+++ b/kube/services/karpenter-reconciler/auth.yaml
@@ -0,0 +1,44 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ name: karpenter-reconciler
+ namespace: argo-events
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ name: karpenter-admin-binding-reconciler
+subjects:
+ - kind: ServiceAccount
+ name: karpenter-reconciler
+ namespace: argo-events
+roleRef:
+ kind: ClusterRole
+ name: karpenter-admin
+ apiGroup: rbac.authorization.k8s.io
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ name: workflow-viewer-reconciler
+subjects:
+ - kind: ServiceAccount
+ name: karpenter-reconciler
+ namespace: argo-events
+roleRef:
+ kind: ClusterRole
+ name: argo-argo-workflows-view
+ apiGroup: rbac.authorization.k8s.io
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ name: viewer-reconciler
+subjects:
+ - kind: ServiceAccount
+ name: karpenter-reconciler
+ namespace: argo-events
+roleRef:
+ kind: ClusterRole
+ name: system:aggregate-to-view
+ apiGroup: rbac.authorization.k8s.io
diff --git a/kube/services/karpenter-reconciler/karpenter-reconciler-cronjob.yaml b/kube/services/karpenter-reconciler/karpenter-reconciler-cronjob.yaml
new file mode 100644
index 000000000..4f82e9d43
--- /dev/null
+++ b/kube/services/karpenter-reconciler/karpenter-reconciler-cronjob.yaml
@@ -0,0 +1,72 @@
+apiVersion: batch/v1
+kind: CronJob
+metadata:
+ name: karpenter-reconciler-cronjob
+ namespace: argo-events
+spec:
+ schedule: "*/5 * * * *"
+ jobTemplate:
+ spec:
+ template:
+ metadata:
+ labels:
+ app: gen3job
+ spec:
+ serviceAccount: karpenter-reconciler
+ volumes:
+ - name: karpenter-templates-volume
+ configMap:
+ name: karpenter-templates
+ containers:
+ - name: karpenter-reconciler
+ image: quay.io/cdis/awshelper
+ volumeMounts:
+ - name: karpenter-templates-volume
+ mountPath: /manifests
+ env:
+ - name: PROVISIONER_TEMPLATE
+ value: /manifests/provisioner.yaml
+ - name: AWSNODETEMPLATE_TEMPLATE
+ value: /manifests/nodetemplate.yaml
+ command: ["/bin/bash"]
+ args:
+ - "-c"
+ - |
+ #!/bin/bash
+ if [ -z "$PROVISIONER_TEMPLATE" ]; then
+ PROVISIONER_TEMPLATE="provisioner.yaml"
+ fi
+
+ if [ -z "$AWSNODETEMPLATE_TEMPLATE" ]; then
+ AWSNODETEMPLATE_TEMPLATE="nodetemplate.yaml"
+ fi
+
+ ENVIRONMENT=$(kubectl -n default get configmap global -o jsonpath="{.data.environment}")
+
+ RAW_WORKFLOWS=$(kubectl get workflows -n argo -o yaml)
+
+ WORKFLOWS=$(echo "${RAW_WORKFLOWS}" | yq -r '.items[] | [.metadata.name, .metadata.labels.gen3username] | join(" ")')
+
+ WORKFLOW_ARRAY=()
+
+ while IFS= read -r line; do
+ WORKFLOW_ARRAY+=("$line")
+ done <<< "$WORKFLOWS"
+
+ for workflow in "${WORKFLOW_ARRAY[@]}"
+ do
+ workflow_name=$(echo "$workflow" | awk '{print $1}')
+ workflow_user=$(echo "$workflow" | awk '{print $2}')
+
+ if ! kubectl get awsnodetemplate workflow-$workflow_name >/dev/null 2>&1; then
+ echo "No awsnodetemplate found for ${workflow_name}, creating one"
+ sed -e "s/WORKFLOW_NAME/$workflow_name/" -e "s/GEN3_USERNAME/$workflow_user/" -e "s/ENVIRONMENT/$ENVIRONMENT/" "$AWSNODETEMPLATE_TEMPLATE" | kubectl apply -f -
+ fi
+
+ if ! kubectl get provisioner workflow-$workflow_name >/dev/null 2>&1; then
+ echo "No provisioner found for ${workflow_name}, creating one"
+ sed -e "s/WORKFLOW_NAME/$workflow_name/" -e "s/GEN3_USERNAME/$workflow_user/" -e "s/ENVIRONMENT/$ENVIRONMENT/" "$PROVISIONER_TEMPLATE" | kubectl apply -f -
+
+ fi
+ done
+ restartPolicy: OnFailure
diff --git a/kube/services/karpenter/nodeTemplateDefault.yaml b/kube/services/karpenter/nodeTemplateDefault.yaml
index 0f76a392f..6ba8b3a0f 100644
--- a/kube/services/karpenter/nodeTemplateDefault.yaml
+++ b/kube/services/karpenter/nodeTemplateDefault.yaml
@@ -11,6 +11,7 @@ spec:
karpenter.sh/discovery: VPC_NAME
Environment: VPC_NAME
Name: eks-VPC_NAME-karpenter
+ purpose: default
metadataOptions:
httpEndpoint: enabled
httpProtocolIPv6: disabled
@@ -23,11 +24,11 @@ spec:
--BOUNDARY
Content-Type: text/x-shellscript; charset="us-ascii"
- #!/bin/bash -xe
+ #!/bin/bash -x
instanceId=$(curl -s http://169.254.169.254/latest/dynamic/instance-identity/document | jq -r .instanceId)
curl https://raw.githubusercontent.com/uc-cdis/cloud-automation/master/files/authorized_keys/ops_team >> /home/ec2-user/.ssh/authorized_keys
- aws ec2 create-tags --resources $instanceId --tags 'Key="instanceId",Value='$instanceId''
- curl https://raw.githubusercontent.com/uc-cdis/cloud-automation/master/files/authorized_keys/ops_team >> /home/ec2-user/.ssh/authorized_keys
+
+ echo "$(jq '.registryPullQPS=0' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json
sysctl -w fs.inotify.max_user_watches=12000
@@ -37,7 +38,14 @@ spec:
# configure grub
sudo /sbin/grubby --update-kernel=ALL --args="fips=1"
+ # --BOUNDARY
+ # Content-Type: text/cloud-config; charset="us-ascii"
+
+ # mounts:
+ # - ['fstype': 'bpf', 'mountpoint': '/sys/fs/bpf', 'opts': 'rw,relatime']
+
--BOUNDARY
+
Content-Type: text/cloud-config; charset="us-ascii"
power_state:
diff --git a/kube/services/karpenter/nodeTemplateGPU.yaml b/kube/services/karpenter/nodeTemplateGPU.yaml
index b41e6441c..925e7a9a0 100644
--- a/kube/services/karpenter/nodeTemplateGPU.yaml
+++ b/kube/services/karpenter/nodeTemplateGPU.yaml
@@ -11,6 +11,7 @@ spec:
Environment: VPC_NAME
Name: eks-VPC_NAME-gpu-karpenter
karpenter.sh/discovery: VPC_NAME
+ purpose: gpu
metadataOptions:
httpEndpoint: enabled
httpProtocolIPv6: disabled
@@ -23,11 +24,11 @@ spec:
--BOUNDARY
Content-Type: text/x-shellscript; charset="us-ascii"
- #!/bin/bash -xe
+ #!/bin/bash -x
instanceId=$(curl -s http://169.254.169.254/latest/dynamic/instance-identity/document | jq -r .instanceId)
curl https://raw.githubusercontent.com/uc-cdis/cloud-automation/master/files/authorized_keys/ops_team >> /home/ec2-user/.ssh/authorized_keys
- aws ec2 create-tags --resources $instanceId --tags 'Key="instanceId",Value='$instanceId''
- curl https://raw.githubusercontent.com/uc-cdis/cloud-automation/master/files/authorized_keys/ops_team >> /home/ec2-user/.ssh/authorized_keys
+
+ echo "$(jq '.registryPullQPS=0' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json
sysctl -w fs.inotify.max_user_watches=12000
@@ -37,6 +38,12 @@ spec:
# configure grub
sudo /sbin/grubby --update-kernel=ALL --args="fips=1"
+ # --BOUNDARY
+ # Content-Type: text/cloud-config; charset="us-ascii"
+
+ # mounts:
+ # - ['fstype': 'bpf', 'mountpoint': '/sys/fs/bpf', 'opts': 'rw,relatime']
+
--BOUNDARY
Content-Type: text/cloud-config; charset="us-ascii"
diff --git a/kube/services/karpenter/nodeTemplateJupyter.yaml b/kube/services/karpenter/nodeTemplateJupyter.yaml
index 579ac1aa3..1c8970ad6 100644
--- a/kube/services/karpenter/nodeTemplateJupyter.yaml
+++ b/kube/services/karpenter/nodeTemplateJupyter.yaml
@@ -11,6 +11,7 @@ spec:
Environment: VPC_NAME
Name: eks-VPC_NAME-jupyter-karpenter
karpenter.sh/discovery: VPC_NAME
+ purpose: jupyter
metadataOptions:
httpEndpoint: enabled
httpProtocolIPv6: disabled
@@ -23,11 +24,11 @@ spec:
--BOUNDARY
Content-Type: text/x-shellscript; charset="us-ascii"
- #!/bin/bash -xe
+ #!/bin/bash -x
instanceId=$(curl -s http://169.254.169.254/latest/dynamic/instance-identity/document | jq -r .instanceId)
curl https://raw.githubusercontent.com/uc-cdis/cloud-automation/master/files/authorized_keys/ops_team >> /home/ec2-user/.ssh/authorized_keys
- aws ec2 create-tags --resources $instanceId --tags 'Key="instanceId",Value='$instanceId''
- curl https://raw.githubusercontent.com/uc-cdis/cloud-automation/master/files/authorized_keys/ops_team >> /home/ec2-user/.ssh/authorized_keys
+
+ echo "$(jq '.registryPullQPS=0' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json
sysctl -w fs.inotify.max_user_watches=12000
@@ -37,6 +38,12 @@ spec:
# configure grub
sudo /sbin/grubby --update-kernel=ALL --args="fips=1"
+ # --BOUNDARY
+ # Content-Type: text/cloud-config; charset="us-ascii"
+
+ # mounts:
+ # - ['fstype': 'bpf', 'mountpoint': '/sys/fs/bpf', 'opts': 'rw,relatime']
+
--BOUNDARY
Content-Type: text/cloud-config; charset="us-ascii"
diff --git a/kube/services/karpenter/nodeTemplateWorkflow.yaml b/kube/services/karpenter/nodeTemplateWorkflow.yaml
index 60481b4fc..6e47b22f9 100644
--- a/kube/services/karpenter/nodeTemplateWorkflow.yaml
+++ b/kube/services/karpenter/nodeTemplateWorkflow.yaml
@@ -11,6 +11,7 @@ spec:
Environment: VPC_NAME
Name: eks-VPC_NAME-workflow-karpenter
karpenter.sh/discovery: VPC_NAME
+ purpose: workflow
metadataOptions:
httpEndpoint: enabled
httpProtocolIPv6: disabled
@@ -23,11 +24,11 @@ spec:
--BOUNDARY
Content-Type: text/x-shellscript; charset="us-ascii"
- #!/bin/bash -xe
+ #!/bin/bash -x
instanceId=$(curl -s http://169.254.169.254/latest/dynamic/instance-identity/document | jq -r .instanceId)
curl https://raw.githubusercontent.com/uc-cdis/cloud-automation/master/files/authorized_keys/ops_team >> /home/ec2-user/.ssh/authorized_keys
- aws ec2 create-tags --resources $instanceId --tags 'Key="instanceId",Value='$instanceId''
- curl https://raw.githubusercontent.com/uc-cdis/cloud-automation/master/files/authorized_keys/ops_team >> /home/ec2-user/.ssh/authorized_keys
+
+ echo "$(jq '.registryPullQPS=0' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json
sysctl -w fs.inotify.max_user_watches=12000
@@ -37,6 +38,12 @@ spec:
# configure grub
sudo /sbin/grubby --update-kernel=ALL --args="fips=1"
+ # --BOUNDARY
+ # Content-Type: text/cloud-config; charset="us-ascii"
+
+ # mounts:
+ # - ['fstype': 'bpf', 'mountpoint': '/sys/fs/bpf', 'opts': 'rw,relatime']
+
--BOUNDARY
Content-Type: text/cloud-config; charset="us-ascii"
diff --git a/kube/services/monitoring/values.yaml b/kube/services/monitoring/values.yaml
index ffdf92bd9..d93e5098a 100644
--- a/kube/services/monitoring/values.yaml
+++ b/kube/services/monitoring/values.yaml
@@ -1540,6 +1540,15 @@ prometheus-node-exporter:
- --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$
service:
portName: http-metrics
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: "eks.amazonaws.com/compute-type"
+ operator: NotIn
+ values:
+ - fargate
prometheus:
monitor:
enabled: true
diff --git a/kube/services/node-monitors/application.yaml b/kube/services/node-monitors/application.yaml
new file mode 100644
index 000000000..0748f7c35
--- /dev/null
+++ b/kube/services/node-monitors/application.yaml
@@ -0,0 +1,22 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+ name: node-monitor-application
+ namespace: argocd
+spec:
+ destination:
+ namespace: default
+ server: https://kubernetes.default.svc
+ project: default
+ source:
+ repoURL: https://github.com/uc-cdis/cloud-automation.git
+ targetRevision: master
+ path: kube/services/node-monitors/
+ directory:
+ exclude: "application.yaml"
+ syncPolicy:
+ automated:
+ prune: true
+ selfHeal: true
+ syncOptions:
+ - CreateNamespace=true
diff --git a/kube/services/node-monitors/argo-monitors/application.yaml b/kube/services/node-monitors/argo-monitors/application.yaml
new file mode 100644
index 000000000..fca4ace86
--- /dev/null
+++ b/kube/services/node-monitors/argo-monitors/application.yaml
@@ -0,0 +1,22 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+ name: node-monitor-argo-application
+ namespace: argocd
+spec:
+ destination:
+ namespace: default
+ server: https://kubernetes.default.svc
+ project: default
+ source:
+ repoURL: https://github.com/uc-cdis/cloud-automation.git
+ targetRevision: master
+ path: kube/services/node-monitors/argo-monitors/
+ directory:
+ exclude: "application.yaml"
+ syncPolicy:
+ automated:
+ prune: true
+ selfHeal: true
+ syncOptions:
+ - CreateNamespace=true
diff --git a/kube/services/node-monitors/argo-monitors/argo-node-age.yaml b/kube/services/node-monitors/argo-monitors/argo-node-age.yaml
new file mode 100644
index 000000000..890495ee0
--- /dev/null
+++ b/kube/services/node-monitors/argo-monitors/argo-node-age.yaml
@@ -0,0 +1,58 @@
+apiVersion: batch/v1
+kind: CronJob
+metadata:
+ name: argo-node-age
+ namespace: default
+spec:
+ schedule: "*/5 * * * *"
+ jobTemplate:
+ spec:
+ template:
+ metadata:
+ labels:
+ app: gen3job
+ spec:
+ serviceAccountName: node-monitor
+ containers:
+ - name: kubectl
+ image: quay.io/cdis/awshelper
+ env:
+ # This is the label we want to monitor, probably will never need to change
+ - name: NODE_LABEL
+ value: purpose=workflow
+ # This is 3 * 3600, or 3 hours
+ - name: THRESHOLD_TIME
+ value: "10800"
+ - name: SLACK_WEBHOOK_URL
+ valueFrom:
+ configMapKeyRef:
+ name: global
+ key: slack_webhook
+
+ command: ["/bin/bash"]
+ args:
+ - "-c"
+ - |
+ #!/bin/bash
+ # Get all nodes with specific label and check their age
+ kubectl get nodes -l "$NODE_LABEL" -o json | jq -c '.items[] | {name: .metadata.name, creationTimestamp: .metadata.creationTimestamp}' | while read node_info; do
+ NODE_NAME=$(echo $node_info | jq -r '.name')
+ CREATION_TIMESTAMP=$(echo $node_info | jq -r '.creationTimestamp')
+
+ # Convert creation timestamp to Unix Epoch time
+ CREATION_EPOCH=$(date -d "$CREATION_TIMESTAMP" +%s)
+
+ # Get current Unix Epoch time
+ CURRENT_EPOCH=$(date +%s)
+
+ # Calculate node age in seconds
+ NODE_AGE=$(($CURRENT_EPOCH - $CREATION_EPOCH))
+
+ # Check if node age is greater than threshold
+ if [ "$NODE_AGE" -gt "$THRESHOLD_TIME" ]; then
+ echo "Node $NODE_NAME has been around too long, sending an alert"
+ # Send alert to Slack
+ curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"WARNING: Node \`${NODE_NAME}\` is older than 3 hours!\"}" $SLACK_WEBHOOK_URL
+ fi
+ done
+ restartPolicy: OnFailure
\ No newline at end of file
diff --git a/kube/services/node-monitors/auth.yaml b/kube/services/node-monitors/auth.yaml
new file mode 100644
index 000000000..72560cddc
--- /dev/null
+++ b/kube/services/node-monitors/auth.yaml
@@ -0,0 +1,18 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ name: node-monitor
+ namespace: default
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ name: node-monitor-binding
+subjects:
+ - kind: ServiceAccount
+ name: node-monitor
+ namespace: default
+roleRef:
+ kind: ClusterRole
+ name: system:node
+ apiGroup: rbac.authorization.k8s.io
diff --git a/kube/services/node-monitors/node-not-ready.yaml b/kube/services/node-monitors/node-not-ready.yaml
new file mode 100644
index 000000000..500832fc3
--- /dev/null
+++ b/kube/services/node-monitors/node-not-ready.yaml
@@ -0,0 +1,48 @@
+apiVersion: batch/v1
+kind: CronJob
+metadata:
+ name: node-not-ready-cron
+ namespace: default
+spec:
+ schedule: "*/30 * * * *"
+ jobTemplate:
+ spec:
+ template:
+ metadata:
+ labels:
+ app: gen3job
+ spec:
+ serviceAccountName: node-monitor
+ containers:
+ - name: kubectl
+ image: quay.io/cdis/awshelper
+ env:
+ - name: SLACK_WEBHOOK_URL
+ valueFrom:
+ configMapKeyRef:
+ name: global
+ key: slack_webhook
+ - name: ENVIRONMENT
+ valueFrom:
+ configMapKeyRef:
+ name: global
+ key: environment
+
+ command: ["/bin/bash"]
+ args:
+ - "-c"
+ - |
+ #!/bin/sh
+
+ # Get nodes that show "NodeStatusNeverUpdated"
+ NODES=$(kubectl get nodes -o json | jq -r '.items[] | select(.status.conditions[] | select(.type == "Ready" and .status == "Unknown")) | .metadata.name')
+
+ if [ -n "$NODES" ]; then
+ echo "Nodes reporting 'NodeStatusNeverUpdated', sending an alert:"
+ echo "$NODES"
+ # Send alert to Slack
+ curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"WARNING: Node \`${NODES}\` is stuck in "NotReady" in \`${ENVIRONMENT}\`! \"}" $SLACK_WEBHOOK_URL
+ else
+ echo "No nodes reporting 'NodeStatusNeverUpdated'"
+ fi
+ restartPolicy: OnFailure
diff --git a/kube/services/ohdsi-atlas/ohdsi-atlas-deploy.yaml b/kube/services/ohdsi-atlas/ohdsi-atlas-deploy.yaml
index bf128920e..62265503e 100644
--- a/kube/services/ohdsi-atlas/ohdsi-atlas-deploy.yaml
+++ b/kube/services/ohdsi-atlas/ohdsi-atlas-deploy.yaml
@@ -72,7 +72,7 @@ spec:
volumeMounts:
- name: ohdsi-atlas-config-local
readOnly: true
- mountPath: /usr/share/nginx/html/atlas/js/config-local.js
+ mountPath: /etc/atlas/config-local.js
subPath: config-local.js
imagePullPolicy: Always
resources:
@@ -80,4 +80,4 @@ spec:
cpu: 100m
memory: 100Mi
limits:
- memory: 500Mi
+ memory: 500Mi
diff --git a/kube/services/ohdsi-webapi/ohdsi-webapi-config.yaml b/kube/services/ohdsi-webapi/ohdsi-webapi-config.yaml
index 5cd46edd9..8eb01ec08 100644
--- a/kube/services/ohdsi-webapi/ohdsi-webapi-config.yaml
+++ b/kube/services/ohdsi-webapi/ohdsi-webapi-config.yaml
@@ -55,6 +55,9 @@ stringData:
security_oauth_callback_api: https://atlas.$hostname/WebAPI/user/oauth/callback
security_oauth_callback_urlResolver: query
+ security_ohdsi_custom_authorization_mode: teamproject
+ security_ohdsi_custom_authorization_url: $ARBORIST_URL/auth/mapping
+
logging_level_root: info
logging_level_org_ohdsi: info
logging_level_org_apache_shiro: info
diff --git a/kube/services/ohdsi-webapi/ohdsi-webapi-deploy.yaml b/kube/services/ohdsi-webapi/ohdsi-webapi-deploy.yaml
index 65d6ed38c..a729ae7c4 100644
--- a/kube/services/ohdsi-webapi/ohdsi-webapi-deploy.yaml
+++ b/kube/services/ohdsi-webapi/ohdsi-webapi-deploy.yaml
@@ -83,7 +83,7 @@ spec:
limits:
memory: 4Gi
- name: ohdsi-webapi-reverse-proxy
- image: nginx:1.23
+ image: 707767160287.dkr.ecr.us-east-1.amazonaws.com/gen3/nginx:1.23
ports:
- containerPort: 80
volumeMounts:
@@ -97,4 +97,4 @@ spec:
cpu: 100m
memory: 100Mi
limits:
- memory: 500Mi
\ No newline at end of file
+ memory: 500Mi
diff --git a/kube/services/ohif-viewer/app-config.js b/kube/services/ohif-viewer/app-config.js
new file mode 100644
index 000000000..6768726f4
--- /dev/null
+++ b/kube/services/ohif-viewer/app-config.js
@@ -0,0 +1,209 @@
+window.config = {
+ routerBasename: '/ohif-viewer/',
+ // whiteLabeling: {},
+ extensions: [],
+ modes: [],
+ customizationService: {
+ // Shows a custom route -access via http://localhost:3000/custom
+ // helloPage: '@ohif/extension-default.customizationModule.helloPage',
+ },
+ showStudyList: true,
+ // some windows systems have issues with more than 3 web workers
+ maxNumberOfWebWorkers: 3,
+ // below flag is for performance reasons, but it might not work for all servers
+ omitQuotationForMultipartRequest: true,
+ showWarningMessageForCrossOrigin: true,
+ showCPUFallbackMessage: true,
+ showLoadingIndicator: true,
+ strictZSpacingForVolumeViewport: true,
+ maxNumRequests: {
+ interaction: 100,
+ thumbnail: 75,
+ // Prefetch number is dependent on the http protocol. For http 2 or
+ // above, the number of requests can be go a lot higher.
+ prefetch: 25,
+ },
+ // filterQueryParam: false,
+ defaultDataSourceName: 'dicomweb',
+ /* Dynamic config allows user to pass "configUrl" query string this allows to load config without recompiling application. The regex will ensure valid configuration source */
+ // dangerouslyUseDynamicConfig: {
+ // enabled: true,
+ // // regex will ensure valid configuration source and default is /.*/ which matches any character. To use this, setup your own regex to choose a specific source of configuration only.
+ // // Example 1, to allow numbers and letters in an absolute or sub-path only.
+ // // regex: /(0-9A-Za-z.]+)(\/[0-9A-Za-z.]+)*/
+ // // Example 2, to restricts to either hosptial.com or othersite.com.
+ // // regex: /(https:\/\/hospital.com(\/[0-9A-Za-z.]+)*)|(https:\/\/othersite.com(\/[0-9A-Za-z.]+)*)/
+ // regex: /.*/,
+ // },
+ dataSources: [
+ {
+ friendlyName: 'dcmjs DICOMWeb Server',
+ namespace: '@ohif/extension-default.dataSourcesModule.dicomweb',
+ sourceName: 'dicomweb',
+ configuration: {
+ name: 'dicomweb',
+ wadoUriRoot: '$DICOM_SERVER_URL/wado',
+ qidoRoot: '$DICOM_SERVER_URL/dicom-web',
+ wadoRoot: '$DICOM_SERVER_URL/dicom-web',
+
+ qidoSupportsIncludeField: false,
+ supportsReject: false,
+ imageRendering: 'wadors',
+ thumbnailRendering: 'wadors',
+ enableStudyLazyLoad: true,
+ supportsFuzzyMatching: false,
+ supportsWildcard: true,
+ staticWado: true,
+ singlepart: 'bulkdata,video',
+ // whether the data source should use retrieveBulkData to grab metadata,
+ // and in case of relative path, what would it be relative to, options
+ // are in the series level or study level (some servers like series some study)
+ bulkDataURI: {
+ enabled: true,
+ relativeResolution: 'studies',
+ },
+ },
+ },
+ {
+ friendlyName: 'dicomweb delegating proxy',
+ namespace: '@ohif/extension-default.dataSourcesModule.dicomwebproxy',
+ sourceName: 'dicomwebproxy',
+ configuration: {
+ name: 'dicomwebproxy',
+ },
+ },
+ {
+ friendlyName: 'dicom json',
+ namespace: '@ohif/extension-default.dataSourcesModule.dicomjson',
+ sourceName: 'dicomjson',
+ configuration: {
+ name: 'json',
+ },
+ },
+ {
+ friendlyName: 'dicom local',
+ namespace: '@ohif/extension-default.dataSourcesModule.dicomlocal',
+ sourceName: 'dicomlocal',
+ configuration: {},
+ },
+ ],
+ httpErrorHandler: error => {
+ // This is 429 when rejected from the public idc sandbox too often.
+ console.warn(error.status);
+
+ // Could use services manager here to bring up a dialog/modal if needed.
+ console.warn('test, navigate to https://ohif.org/');
+ },
+ // whiteLabeling: {
+ // /* Optional: Should return a React component to be rendered in the "Logo" section of the application's Top Navigation bar */
+ // createLogoComponentFn: function (React) {
+ // return React.createElement(
+ // 'a',
+ // {
+ // target: '_self',
+ // rel: 'noopener noreferrer',
+ // className: 'text-purple-600 line-through',
+ // href: '/',
+ // },
+ // React.createElement('img',
+ // {
+ // src: './assets/customLogo.svg',
+ // className: 'w-8 h-8',
+ // }
+ // ))
+ // },
+ // },
+ hotkeys: [
+ {
+ commandName: 'incrementActiveViewport',
+ label: 'Next Viewport',
+ keys: ['right'],
+ },
+ {
+ commandName: 'decrementActiveViewport',
+ label: 'Previous Viewport',
+ keys: ['left'],
+ },
+ { commandName: 'rotateViewportCW', label: 'Rotate Right', keys: ['r'] },
+ { commandName: 'rotateViewportCCW', label: 'Rotate Left', keys: ['l'] },
+ { commandName: 'invertViewport', label: 'Invert', keys: ['i'] },
+ {
+ commandName: 'flipViewportHorizontal',
+ label: 'Flip Horizontally',
+ keys: ['h'],
+ },
+ {
+ commandName: 'flipViewportVertical',
+ label: 'Flip Vertically',
+ keys: ['v'],
+ },
+ { commandName: 'scaleUpViewport', label: 'Zoom In', keys: ['+'] },
+ { commandName: 'scaleDownViewport', label: 'Zoom Out', keys: ['-'] },
+ { commandName: 'fitViewportToWindow', label: 'Zoom to Fit', keys: ['='] },
+ { commandName: 'resetViewport', label: 'Reset', keys: ['space'] },
+ { commandName: 'nextImage', label: 'Next Image', keys: ['down'] },
+ { commandName: 'previousImage', label: 'Previous Image', keys: ['up'] },
+ // {
+ // commandName: 'previousViewportDisplaySet',
+ // label: 'Previous Series',
+ // keys: ['pagedown'],
+ // },
+ // {
+ // commandName: 'nextViewportDisplaySet',
+ // label: 'Next Series',
+ // keys: ['pageup'],
+ // },
+ {
+ commandName: 'setToolActive',
+ commandOptions: { toolName: 'Zoom' },
+ label: 'Zoom',
+ keys: ['z'],
+ },
+ // ~ Window level presets
+ {
+ commandName: 'windowLevelPreset1',
+ label: 'W/L Preset 1',
+ keys: ['1'],
+ },
+ {
+ commandName: 'windowLevelPreset2',
+ label: 'W/L Preset 2',
+ keys: ['2'],
+ },
+ {
+ commandName: 'windowLevelPreset3',
+ label: 'W/L Preset 3',
+ keys: ['3'],
+ },
+ {
+ commandName: 'windowLevelPreset4',
+ label: 'W/L Preset 4',
+ keys: ['4'],
+ },
+ {
+ commandName: 'windowLevelPreset5',
+ label: 'W/L Preset 5',
+ keys: ['5'],
+ },
+ {
+ commandName: 'windowLevelPreset6',
+ label: 'W/L Preset 6',
+ keys: ['6'],
+ },
+ {
+ commandName: 'windowLevelPreset7',
+ label: 'W/L Preset 7',
+ keys: ['7'],
+ },
+ {
+ commandName: 'windowLevelPreset8',
+ label: 'W/L Preset 8',
+ keys: ['8'],
+ },
+ {
+ commandName: 'windowLevelPreset9',
+ label: 'W/L Preset 9',
+ keys: ['9'],
+ },
+ ],
+};
diff --git a/kube/services/revproxy/gen3.nginx.conf/argo-argo-workflows-server.conf b/kube/services/revproxy/gen3.nginx.conf/argo-server.conf
similarity index 86%
rename from kube/services/revproxy/gen3.nginx.conf/argo-argo-workflows-server.conf
rename to kube/services/revproxy/gen3.nginx.conf/argo-server.conf
index cb8def3aa..1cdd4608c 100644
--- a/kube/services/revproxy/gen3.nginx.conf/argo-argo-workflows-server.conf
+++ b/kube/services/revproxy/gen3.nginx.conf/argo-server.conf
@@ -7,7 +7,7 @@
auth_request /gen3-authz;
set $proxy_service "argo";
- set $upstream http://argo-argo-workflows-server.argo.svc.cluster.local:2746;
+ set $upstream SERVICE_URL;
rewrite ^/argo/(.*) /$1 break;
diff --git a/kube/services/revproxy/gen3.nginx.conf/gen3-discovery-ai-service.conf b/kube/services/revproxy/gen3.nginx.conf/gen3-discovery-ai-service.conf
new file mode 100644
index 000000000..42e9a3758
--- /dev/null
+++ b/kube/services/revproxy/gen3.nginx.conf/gen3-discovery-ai-service.conf
@@ -0,0 +1,12 @@
+ location /ai {
+ if ($csrf_check !~ ^ok-\S.+$) {
+ return 403 "failed csrf check";
+ }
+
+ set $proxy_service "gen3-discovery-ai-service";
+ set $upstream http://gen3-discovery-ai-service$des_domain;
+ rewrite ^/ai/(.*) /$1 break;
+ proxy_pass $upstream;
+ proxy_redirect http://$host/ https://$host/ai/;
+ client_max_body_size 0;
+ }
diff --git a/kube/services/revproxy/gen3.nginx.conf/gen3ff-as-root/frontend-framework-service.conf b/kube/services/revproxy/gen3.nginx.conf/gen3ff-as-root/frontend-framework-service.conf
index ac2cb75f6..37e7623de 100644
--- a/kube/services/revproxy/gen3.nginx.conf/gen3ff-as-root/frontend-framework-service.conf
+++ b/kube/services/revproxy/gen3.nginx.conf/gen3ff-as-root/frontend-framework-service.conf
@@ -2,6 +2,10 @@
if ($csrf_check !~ ^ok-\S.+$) {
return 403 "failed csrf check";
}
+
+ # added to avoid click-jacking attacks
+ add_header X-Frame-Options "SAMEORIGIN";
+
set $proxy_service "frontend-framework";
set $upstream http://frontend-framework-service.$namespace.svc.cluster.local;
proxy_pass $upstream;
diff --git a/kube/services/revproxy/gen3.nginx.conf/gen3ff-as-root/portal-service.conf b/kube/services/revproxy/gen3.nginx.conf/gen3ff-as-root/portal-service.conf
index 58f0851d6..75d69c185 100644
--- a/kube/services/revproxy/gen3.nginx.conf/gen3ff-as-root/portal-service.conf
+++ b/kube/services/revproxy/gen3.nginx.conf/gen3ff-as-root/portal-service.conf
@@ -21,5 +21,8 @@
rewrite ^/(.*)$ /dashboard/Public/maintenance-page/index.html redirect;
}
+ # added to avoid click-jacking attacks
+ add_header X-Frame-Options "SAMEORIGIN";
+
proxy_pass $upstream;
}
diff --git a/kube/services/revproxy/gen3.nginx.conf/guppy-service.conf b/kube/services/revproxy/gen3.nginx.conf/guppy-service.conf
index db2de5886..e6d66ec12 100644
--- a/kube/services/revproxy/gen3.nginx.conf/guppy-service.conf
+++ b/kube/services/revproxy/gen3.nginx.conf/guppy-service.conf
@@ -1,4 +1,8 @@
location /guppy/ {
+ if ($csrf_check !~ ^ok-\S.+$) {
+ return 403 "failed csrf check, make sure data-portal version >= 2023.12 or >= 5.19.0";
+ }
+
proxy_connect_timeout 600s;
proxy_send_timeout 600s;
proxy_read_timeout 600s;
diff --git a/kube/services/revproxy/gen3.nginx.conf/ohif-viewer-service.conf b/kube/services/revproxy/gen3.nginx.conf/ohif-viewer-service.conf
index 9a20bc832..22926bcf0 100644
--- a/kube/services/revproxy/gen3.nginx.conf/ohif-viewer-service.conf
+++ b/kube/services/revproxy/gen3.nginx.conf/ohif-viewer-service.conf
@@ -3,11 +3,12 @@ location /ohif-viewer/ {
# return 403 "failed csrf check";
# }
- set $authz_resource "/services/ohif-viewer";
- set $authz_method "read";
- set $authz_service "ohif-viewer";
+ # see if this can be fixed in the future for anonymous access
+ # set $authz_resource "/services/ohif-viewer";
+ # set $authz_method "read";
+ # set $authz_service "ohif-viewer";
- auth_request /gen3-authz;
+ # auth_request /gen3-authz;
set $proxy_service "ohif-viewer";
set $upstream http://ohif-viewer-service.$namespace.svc.cluster.local;
diff --git a/kube/services/revproxy/gen3.nginx.conf/orthanc-service.conf b/kube/services/revproxy/gen3.nginx.conf/orthanc-service.conf
index 2eb77b179..ed736189c 100644
--- a/kube/services/revproxy/gen3.nginx.conf/orthanc-service.conf
+++ b/kube/services/revproxy/gen3.nginx.conf/orthanc-service.conf
@@ -7,11 +7,6 @@ location /orthanc/ {
set $authz_method "create";
set $authz_service "orthanc";
- if ($request_uri ~ "^/orthanc/dicom-web/studies/") {
- set $authz_method "read";
- set $authz_resource "/services/orthanc/studies";
- }
-
auth_request /gen3-authz;
proxy_set_header Authorization "Basic cHVibGljOmhlbGxv";
@@ -24,3 +19,23 @@ location /orthanc/ {
# no limit to payload size so we can upload large DICOM files
client_max_body_size 0;
}
+
+location /orthanc/dicom-web/studies/ {
+ set $authz_method "read";
+ set $authz_resource "/services/orthanc/studies";
+ set $authz_service "orthanc";
+
+ auth_request /gen3-authz;
+ if ($request_method = POST) {
+ return 403;
+ }
+ proxy_set_header Authorization "Basic cHVibGljOmhlbGxv";
+
+ set $proxy_service "orthanc";
+ set $upstream http://orthanc-service.$namespace.svc.cluster.local;
+ rewrite ^/orthanc/(.*) /$1 break;
+ proxy_pass $upstream;
+
+ # no limit to payload size so we can upload large DICOM files
+ client_max_body_size 0;
+}
diff --git a/kube/services/revproxy/gen3.nginx.conf/portal-as-root/frontend-framework-service.conf b/kube/services/revproxy/gen3.nginx.conf/portal-as-root/frontend-framework-service.conf
index dbb24e4b2..f3686d1a6 100644
--- a/kube/services/revproxy/gen3.nginx.conf/portal-as-root/frontend-framework-service.conf
+++ b/kube/services/revproxy/gen3.nginx.conf/portal-as-root/frontend-framework-service.conf
@@ -6,6 +6,10 @@
if ($csrf_check !~ ^ok-\S.+$) {
return 403 "failed csrf check";
}
+
+ # added to avoid click-jacking attacks
+ add_header X-Frame-Options "SAMEORIGIN";
+
set $proxy_service "frontend-framework";
# frontend framework service expects the /ff/ prefix, so no path rewrite
set $upstream http://frontend-framework-service.$namespace.svc.cluster.local;
diff --git a/kube/services/revproxy/nginx.conf b/kube/services/revproxy/nginx.conf
index 2e3a3b151..d0e14f49b 100644
--- a/kube/services/revproxy/nginx.conf
+++ b/kube/services/revproxy/nginx.conf
@@ -236,7 +236,7 @@ server {
# This overrides the individual services
#
set $allow_origin "*";
- if ($http_origin) {
+ if ($http_origin = "https://$host") {
set $allow_origin "$http_origin";
}
diff --git a/kube/services/revproxy/revproxy-deploy.yaml b/kube/services/revproxy/revproxy-deploy.yaml
index 9d5caab1b..9f10ce90b 100644
--- a/kube/services/revproxy/revproxy-deploy.yaml
+++ b/kube/services/revproxy/revproxy-deploy.yaml
@@ -21,6 +21,7 @@ spec:
app: revproxy
# allow access from workspaces
userhelper: "yes"
+ internet: "yes"
GEN3_DATE_LABEL
spec:
affinity:
diff --git a/packer/configs/web_wildcard_whitelist b/packer/configs/web_wildcard_whitelist
index c58eeefe8..621dec3d5 100644
--- a/packer/configs/web_wildcard_whitelist
+++ b/packer/configs/web_wildcard_whitelist
@@ -44,4 +44,5 @@
.yahooapis.com
.cloudfront.net
.docker.io
+.blob.core.windows.net
.googleapis.com