Skip to content

Commit

Permalink
HP-1520 Fix/ct gov (#2556)
Browse files Browse the repository at this point in the history
* don't sync ct.gov metadata back from cedar

* fix

* make some field not changable

* fix study name

* back to master

* direct service communication

* save RequiredIDP tag
  • Loading branch information
mfshao authored Jun 4, 2024
1 parent e319b3f commit c1e0d38
Showing 1 changed file with 10 additions and 16 deletions.
26 changes: 10 additions & 16 deletions files/scripts/healdata/heal-cedar-data-ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def update_filter_metadata(metadata_to_update):
filter_metadata = pydash.uniq(filter_metadata)
metadata_to_update["advSearchFilters"] = filter_metadata
# Retain these from existing tags
save_tags = ["Data Repository", "Common Data Elements", "Additional Acknowledgement"]
save_tags = ["Data Repository", "Common Data Elements", "RequiredIDP", "Additional Acknowledgement"]
tags = [tag for tag in metadata_to_update["tags"] if tag["category"] in save_tags]
# Add any new tags from advSearchFilters
for f in metadata_to_update["advSearchFilters"]:
Expand All @@ -114,7 +114,7 @@ def update_filter_metadata(metadata_to_update):

def get_client_token(client_id: str, client_secret: str):
try:
token_url = f"http://revproxy-service/user/oauth2/token"
token_url = "http://fence-service/oauth2/token"
headers = {"Content-Type": "application/x-www-form-urlencoded"}
params = {"grant_type": "client_credentials"}
data = "scope=openid user data"
Expand All @@ -137,7 +137,7 @@ def get_related_studies(serial_num, guid, hostname):

if serial_num:
mds = requests.get(
f"http://revproxy-service/mds/metadata?nih_reporter.project_num_split.serial_num={serial_num}&data=true&limit=2000"
f"http://metadata-service/metadata?nih_reporter.project_num_split.serial_num={serial_num}&data=true&limit=2000"
)
if mds.status_code == 200:
related_study_metadata = mds.json()
Expand Down Expand Up @@ -212,7 +212,7 @@ def get_related_studies(serial_num, guid, hostname):
# Get the metadata from cedar to register
print("Querying CEDAR...")
cedar = requests.get(
f"http://revproxy-service/cedar/get-instance-by-directory/{dir_id}?limit={limit}&offset={offset}",
f"http://cedar-wrapper-service/get-instance-by-directory/{dir_id}?limit={limit}&offset={offset}",
headers=token_header,
)

Expand All @@ -239,7 +239,7 @@ def get_related_studies(serial_num, guid, hostname):

# Get the metadata record for the CEDAR instance id
mds = requests.get(
f"http://revproxy-service/mds/metadata?gen3_discovery.study_metadata.metadata_location.cedar_study_level_metadata_template_instance_ID={cedar_instance_id}&data=true"
f"http://metadata-service/metadata?gen3_discovery.study_metadata.metadata_location.cedar_study_level_metadata_template_instance_ID={cedar_instance_id}&data=true"
)
if mds.status_code == 200:
mds_res = mds.json()
Expand All @@ -255,7 +255,6 @@ def get_related_studies(serial_num, guid, hostname):
mds_res = mds_res[mds_record_guid]
mds_cedar_register_data_body = {**mds_res}
mds_discovery_data_body = {}
mds_clinical_trials = {}
if mds_res["_guid_type"] == "discovery_metadata":
print("Metadata is already registered. Updating MDS record")
elif mds_res["_guid_type"] == "unregistered_discovery_metadata":
Expand All @@ -268,15 +267,15 @@ def get_related_studies(serial_num, guid, hostname):
)
continue

if "clinicaltrials_gov" in cedar_record:
mds_clinical_trials = cedar_record["clinicaltrials_gov"]
del cedar_record["clinicaltrials_gov"]

# some special handing for this field, because its parent will be deleted before we merging the CEDAR and MDS SLMD to avoid duplicated values
cedar_record_other_study_websites = cedar_record.get(
"metadata_location", {}
).get("other_study_websites", [])
# this ensures the nih_application_id, cedar_study_level_metadata_template_instance_ID and study_name are not alterable from CEDAR side
del cedar_record["metadata_location"]
cedar_record["minimal_info"]["study_name"] = mds_res["gen3_discovery"]["study_metadata"].get("minimal_info", {}).get(
"study_name", ""
)

mds_res["gen3_discovery"]["study_metadata"].update(cedar_record)
mds_res["gen3_discovery"]["study_metadata"]["metadata_location"][
Expand Down Expand Up @@ -361,17 +360,12 @@ def get_related_studies(serial_num, guid, hostname):
)

mds_cedar_register_data_body["gen3_discovery"] = mds_discovery_data_body
if mds_clinical_trials:
mds_cedar_register_data_body["clinicaltrials_gov"] = {
**mds_cedar_register_data_body.get("clinicaltrials_gov", {}),
**mds_clinical_trials,
}

mds_cedar_register_data_body["_guid_type"] = "discovery_metadata"

print(f"Metadata {mds_record_guid} is now being registered.")
mds_put = requests.put(
f"http://revproxy-service/mds/metadata/{mds_record_guid}",
f"http://metadata-service/metadata/{mds_record_guid}",
headers=token_header,
json=mds_cedar_register_data_body,
)
Expand Down

0 comments on commit c1e0d38

Please sign in to comment.