From 634994cc68b8d82c9a99a28f1ab85bd6897d519d Mon Sep 17 00:00:00 2001 From: Aaron Zavora Date: Mon, 16 Dec 2024 16:35:41 -0500 Subject: [PATCH] Update README.md --- dbignite/writer/README.md | 82 ++++++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 19 deletions(-) diff --git a/dbignite/writer/README.md b/dbignite/writer/README.md index e3c98e2..73f3da1 100644 --- a/dbignite/writer/README.md +++ b/dbignite/writer/README.md @@ -5,28 +5,19 @@ Dataframes are the source for converting to FHIR. This can be a single table or a transformed table from a SQL Statement ```python -#Read in a DataFrame to convert to FHIR. Data is publicly downloaded from CMS SynPUF - -from dbignite.writer.fhir_encoder import * +#Read in a DataFrame and convert to FHIR. Using dummy data with claim id, procedure codes, and diagnosis codes. from dbignite.writer.bundler import * +from dbignite.writer.fhir_encoder import * import json -data = spark.sql(""" -select ---Patient info -b.DESYNPUF_ID, --Patient.id -b.BENE_BIRTH_DT, --Patient.birthDate -b.BENE_COUNTY_CD, --Patient.address.postalCode -c.CLM_ID, --Claim.id -c.HCPCS_CD_1, --Claim.procedure.procedureCodeableConcept.coding.code -c.HCPCS_CD_2, --Claim.procedure.procedureCodeableConcept.coding.code -c.ICD9_DGNS_CD_1, --Claim.diagnosis.diagnosisCodeableConcept.coding.code -c.ICD9_DGNS_CD_2, --Claim.diagnosis.diagnosisCodeableConcept.coding.code -"http://www.cms.gov/Medicare/Coding/HCPCSReleaseCodeSets" as hcpcs_cdset -from hls_healthcare.hls_cms_synpuf.ben_sum b - inner join hls_healthcare.hls_cms_synpuf.car_claims c - on c.DESYNPUF_ID = b.DESYNPUF_ID - """) +data = spark.createDataFrame( +[('CLM1', 'PRCDR11', 'PRCDR12', 'PRCDR13', 'DX11', 'DX12', 'DX13'), + ('CLM1', 'PRCDR21', 'PRCDR22', 'PRCDR23', 'DX21', 'DX22', 'DX23')], +['CLAIM_ID', 'PRCDR_CD1', 'PRCDR_CD2', 'PRCDR_CD3', 'DX_CD1', 'DX_CD2', 'DX_CD3']) +""" +This command could also be +data = spark.sql("SELECT CLM_ID, PRCDR_CD1, PRCDR_CD2, PRCDR_CD3, DX_CD1, DX_CD2, DX_CD3 FROM...") +""" ``` ## How do transformations happen without writing code? @@ -37,6 +28,59 @@ e.g. an array of strings is mapped to a string by the default behavior of [conca ```python FhirEncoder(False, False, lambda x: ','.join(x)) ``` + +e.g. to demonstrate an array of values mapping to a single string, can do the following +```python +maps = [ + Mapping('PRCDR_CD1', 'Claim.procedure.procedureCodeableConcept.coding.code'), + Mapping('PRCDR_CD2', 'Claim.procedure.procedureCodeableConcept.coding.code'), + Mapping('PRCDR_CD3', 'Claim.procedure.procedureCodeableConcept.coding.code')] + +m = MappingManager(maps, data.schema) +b = Bundle(m) +b.df_to_fhir(data).map(lambda x: json.loads(x)).foreach(lambda x: print(json.dumps(x, indent=4))) + + +""" +{..."resourceType": "Bundle", ... + "coding":[{ + --> "code": "PRCDR21,PRCDR22,PRCDR23" + }] +...} +""" +``` + +However, each code should be it's own value in the "coding" array and not as one single value. I can extend the lambda framework with specifying the transformation at the target column, e.g. + +```python +#maps... +em = FhirEncoderManager( + override_encoders ={ + "Claim.procedure.procedureCodeableConcept.coding": + FhirEncoder(False, False, lambda x: [{"code": y} for y in x[0].get("code").split(",")]) +}) +""" + ^^ Run this function instead when building values under "coding". +x = [ {"code": "PRCDR21,PRCDR22,PRCDR23"} ] +x[0].get("code").split(",") -> ['PRCDR21', 'PRCDR22', 'PRCDR23'] +lambda returns -> [{'code': 'PRCDR21'}, {'code': 'PRCDR22'}, {'code': 'PRCDR23'}] +""" + + +m = MappingManager(maps, data.schema, em) +b = Bundle(m) +""" +{..."resourceType": "Bundle", ... + "coding": [ + { "code": "PRCDR21" }, + { "code": "PRCDR22" }, + { "code": "PRCDR23" } + ] +} +""" +``` + + ## Mapping from Source to FHIR Specification ```python