diff --git a/docs/site/configuration.md b/docs/site/configuration.md index 261c23a3..b967860d 100644 --- a/docs/site/configuration.md +++ b/docs/site/configuration.md @@ -65,7 +65,7 @@ Each setting can be configured as follows: : URI for the Splunk HTTP Event Collector (HEC) endpoint. When set, logs will be sent to the configured Splunk instance for analysis. The format is - `splunkhec://@:?index=&proto=&ssl_verify=&source=` + `splunkhec://@:?index=&proto=&source=` **Docker Default**: `""` diff --git a/src/recordlinker/hl7/fhir.py b/src/recordlinker/hl7/fhir.py index 27163893..f3cf10f9 100644 --- a/src/recordlinker/hl7/fhir.py +++ b/src/recordlinker/hl7/fhir.py @@ -12,6 +12,17 @@ from recordlinker import schemas +def get_first_patient_resource(bundle: dict) -> dict: + """ + Get the first patient resource from a FHIR bundle + """ + for entry in bundle.get("entry", []): + resource = entry.get("resource", {}) + if resource.get("resourceType") == "Patient": + return resource + return {} + + def fhir_record_to_pii_record(fhir_record: dict) -> schemas.PIIRecord: """ Parse the FHIR record into a PIIRecord object @@ -92,4 +103,4 @@ def add_person_resource( }, } bundle.get("entry", []).append(person_resource) - return bundle \ No newline at end of file + return bundle diff --git a/src/recordlinker/linking/link.py b/src/recordlinker/linking/link.py index c4c063b1..2f1ebf37 100644 --- a/src/recordlinker/linking/link.py +++ b/src/recordlinker/linking/link.py @@ -7,6 +7,7 @@ import collections import dataclasses +import logging import typing from sqlalchemy import orm @@ -14,7 +15,9 @@ from recordlinker import models from recordlinker import schemas from recordlinker.database import mpi_service +from recordlinker.utils.mock import MockTracer +LOGGER = logging.getLogger(__name__) TRACER: typing.Any = None try: from opentelemetry import trace @@ -22,8 +25,6 @@ TRACER = trace.get_tracer(__name__) except ImportError: # OpenTelemetry is an optional dependency, if its not installed use a mock tracer - from recordlinker.utils.mock import MockTracer - TRACER = MockTracer() @@ -47,6 +48,7 @@ def compare( kwargs: dict[typing.Any, typing.Any] = algorithm_pass.kwargs results: list[float] = [] + details: dict[str, typing.Any] = {"patient.reference_id": patient.reference_id} for e in evals: # TODO: can we do this check earlier? feature = getattr(schemas.Feature, e.feature, None) @@ -55,7 +57,12 @@ def compare( # Evaluate the comparison function and append the result to the list result: float = e.func(record, patient, feature, **kwargs) # type: ignore results.append(result) - return matching_rule(results, **kwargs) # type: ignore + details[f"evaluator.{e.feature}.result"] = result + is_match = matching_rule(results, **kwargs) + details["rule.results"] = is_match + # TODO: this may add a lot of noise, consider moving to debug + LOGGER.info("patient comparison", extra=details) + return is_match def link_record_against_mpi( @@ -63,7 +70,7 @@ def link_record_against_mpi( session: orm.Session, algorithm: models.Algorithm, external_person_id: typing.Optional[str] = None, -) -> tuple[models.Patient, models.Person | None, list[LinkResult]]: +) -> tuple[models.Patient, models.Person | None, list[LinkResult], schemas.Prediction]: """ Runs record linkage on a single incoming record (extracted from a FHIR bundle) using an existing database as an MPI. Uses a flexible algorithm @@ -88,6 +95,12 @@ def link_record_against_mpi( scores: dict[models.Person, float] = collections.defaultdict(float) # the minimum ratio of matches needed to be considered a cluster member belongingness_ratio_lower_bound, belongingness_ratio_upper_bound = algorithm.belongingness_ratio + # initialize counters to track evaluation results to log + result_counts: dict[str, int] = { + "patients_compared": 0, + "above_lower_bound": 0, + "above_upper_bound": 0, + } for algorithm_pass in algorithm.passes: with TRACER.start_as_current_span("link.pass"): # initialize a dictionary to hold the clusters of patients for each person @@ -111,38 +124,62 @@ def link_record_against_mpi( with TRACER.start_as_current_span("link.compare"): if compare(record, patient, algorithm_pass): matched_count += 1 + result_counts["patients_compared"] += len(patients) # calculate the match ratio for this person cluster belongingness_ratio = matched_count / len(patients) + LOGGER.info( + "cluster belongingness", + extra={ + "ratio": belongingness_ratio, + "person.reference_id": person.reference_id, + "matched": matched_count, + "total": len(patients), + "algorithm.ratio_lower": belongingness_ratio_lower_bound, + "algorithm.ratio_upper": belongingness_ratio_upper_bound, + }, + ) if belongingness_ratio >= belongingness_ratio_lower_bound: # The match ratio is larger than the minimum cluster threshold, # optionally update the max score for this person scores[person] = max(scores[person], belongingness_ratio) + prediction: schemas.Prediction = "possible_match" matched_person: typing.Optional[models.Person] = None - if scores: - # Find the person with the highest matching score - matched_person, _ = max(scores.items(), key=lambda i: i[1]) - - sorted_scores: list[LinkResult] = [LinkResult(k, v) for k, v in sorted(scores.items(), reverse=True, key=lambda item: item[1])] - if not scores: + results: list[LinkResult] = [ + LinkResult(k, v) for k, v in sorted(scores.items(), reverse=True, key=lambda i: i[1]) + ] + result_counts["above_lower_bound"] = len(results) + if not results: # No match - matched_person = models.Person() # Create new Person Cluster - results = [] - elif sorted_scores[0].belongingness_ratio >= belongingness_ratio_upper_bound: + prediction = "no_match" + matched_person = models.Person() # Create new Person Cluster + elif results[0].belongingness_ratio >= belongingness_ratio_upper_bound: # Match (1 or many) - matched_person = sorted_scores[0].person - results = [x for x in sorted_scores if x.belongingness_ratio >= belongingness_ratio_upper_bound] # Multiple matches + prediction = "match" + matched_person = results[0].person + # reduce results to only those that meet the upper bound threshold + results = [x for x in results if x.belongingness_ratio >= belongingness_ratio_upper_bound] + result_counts["above_upper_bound"] = len(results) if not algorithm.include_multiple_matches: - results = [results[0]] # 1 Match (highest Belongingness Ratio) - else: - # Possible match - matched_person = None - results = sorted_scores + # reduce results to only the highest match + results = [results[0]] with TRACER.start_as_current_span("insert"): patient = mpi_service.insert_patient( session, record, matched_person, record.external_id, external_person_id, commit=False ) + LOGGER.info( + "link results", + extra={ + "person.reference_id": matched_person and matched_person.reference_id, + "patient.reference_id": patient.reference_id, + "result.prediction": prediction, + "result.count_patients_compared": result_counts["patients_compared"], + "result.count_persons_above_lower": result_counts["above_lower_bound"], + "result.count_persons_above_upper": result_counts["above_upper_bound"], + }, + ) + # return a tuple indicating whether a match was found and the person ID - return (patient, patient.person, results) + return (patient, patient.person, results, prediction) diff --git a/src/recordlinker/routes/link_router.py b/src/recordlinker/routes/link_router.py index 95511816..d0edbd1d 100644 --- a/src/recordlinker/routes/link_router.py +++ b/src/recordlinker/routes/link_router.py @@ -11,6 +11,7 @@ import fastapi import sqlalchemy.orm as orm +from recordlinker import models from recordlinker import schemas from recordlinker.database import algorithm_service from recordlinker.database import get_session @@ -20,119 +21,49 @@ router = fastapi.APIRouter() -@router.post("", summary="Link Record") -async def link_piirecord( - request: fastapi.Request, - input: typing.Annotated[schemas.LinkInput, fastapi.Body()], - response: fastapi.Response, - db_session: orm.Session = fastapi.Depends(get_session), -) -> schemas.LinkResponse: +def algorithm_or_422(db_session: orm.Session, label: str | None) -> models.Algorithm: """ - Compare a PII Record with records in the Master Patient Index (MPI) to - check for matches with existing patient records If matches are found, - returns the patient and person reference id's + Get the Algorithm, or default if no label. Raise a 422 if no Algorithm can be found. """ - if input.algorithm: - algorithm = algorithm_service.get_algorithm(db_session, input.algorithm) - else: - algorithm = algorithm_service.default_algorithm(db_session) - + algorithm = ( + algorithm_service.get_algorithm(db_session, label) + if label + else algorithm_service.default_algorithm(db_session) + ) if not algorithm: - msg = "Error: No algorithm found" raise fastapi.HTTPException( - status_code=fastapi.status.HTTP_422_UNPROCESSABLE_ENTITY, detail=msg + status_code=fastapi.status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="No algorithm found", ) + return algorithm - # link the record - try: - # Make a copy of record_to_link so we don't modify the original - (patient, person, results) = link.link_record_against_mpi( - record=input.record, - session=db_session, - algorithm=algorithm, - external_person_id=input.external_person_id, - ) - return schemas.LinkResponse( - patient_reference_id=patient.reference_id, - person_reference_id=(person and person.reference_id), - results=[schemas.LinkResult(**r.__dict__) for r in results] - ) - except ValueError: - msg = "Error: Bad request" - raise fastapi.HTTPException(status_code=fastapi.status.HTTP_400_BAD_REQUEST, detail=msg) - - -@router.post("/dibbs", summary="Link FHIR for DIBBs") -async def link_dibbs( +@router.post("", summary="Link Record") +async def link_piirecord( request: fastapi.Request, - input: typing.Annotated[schemas.LinkFhirInput, fastapi.Body()], + input: typing.Annotated[schemas.LinkInput, fastapi.Body()], response: fastapi.Response, db_session: orm.Session = fastapi.Depends(get_session), -) -> schemas.LinkFhirResponse: +) -> schemas.LinkResponse: """ - Compare a FHIR bundle with records in the Master Patient Index (MPI) to + Compare a PII Record with records in the Master Patient Index (MPI) to check for matches with existing patient records If matches are found, - returns the FHIR bundle with updated references to existing patients. - This is a special endpoint that allows integration into a DIBBs pipeline, - as it accepts and returns FHIR bundles. + returns the patient and person reference id's """ - input_bundle = input.bundle - external_id = input.external_person_id - - if input.algorithm: - algorithm = algorithm_service.get_algorithm(db_session, input.algorithm) - else: - algorithm = algorithm_service.default_algorithm(db_session) - - if not algorithm: - raise fastapi.HTTPException( - status_code=fastapi.status.HTTP_422_UNPROCESSABLE_ENTITY, - detail="Error: Invalid algorithm specified" - ) - - # Now extract the patient record we want to link - try: - record_to_link = [ - entry.get("resource") - for entry in input_bundle.get("entry", []) - if entry.get("resource", {}).get("resourceType", "") == "Patient" - ][0] - except IndexError: - raise fastapi.HTTPException( - status_code=fastapi.status.HTTP_400_BAD_REQUEST, - detail="Supplied bundle contains no Patient resource to link on." - ) - - - # convert record to PII - pii_record: schemas.PIIRecord = fhir.fhir_record_to_pii_record(record_to_link) - - # Now link the record - try: - (patient, person, results) = link.link_record_against_mpi( - record=pii_record, - session=db_session, - algorithm=algorithm, - external_person_id=external_id, - ) - updated_bundle: dict | None = None - if person: - updated_bundle = fhir.add_person_resource( - str(person.reference_id), pii_record.external_id, input_bundle - ) - return schemas.LinkFhirResponse( - patient_reference_id=patient.reference_id, - person_reference_id=(person and person.reference_id), - results=[schemas.LinkResult(**r.__dict__) for r in results], - updated_bundle=updated_bundle - ) - - except ValueError as err: - raise fastapi.HTTPException( - status_code=fastapi.status.HTTP_400_BAD_REQUEST, - detail=f"Could not connect to database: {err}" - ) + algorithm = algorithm_or_422(db_session, input.algorithm) + + (patient, person, results, prediction) = link.link_record_against_mpi( + record=input.record, + session=db_session, + algorithm=algorithm, + external_person_id=input.external_person_id, + ) + return schemas.LinkResponse( + prediction=prediction, + patient_reference_id=patient.reference_id, + person_reference_id=(person and person.reference_id), + results=[schemas.LinkResult(**r.__dict__) for r in results], + ) @router.post("/fhir", summary="Link FHIR") @@ -141,59 +72,42 @@ async def link_fhir( input: typing.Annotated[schemas.LinkFhirInput, fastapi.Body()], response: fastapi.Response, db_session: orm.Session = fastapi.Depends(get_session), -) -> schemas.LinkResponse: +) -> schemas.LinkFhirResponse: """ Compare a FHIR bundle with records in the Master Patient Index (MPI) to check for matches with existing patient records If matches are found, - returns the patient and person reference id's + returns the FHIR bundle with updated references to existing patients. """ - input_bundle = input.bundle - external_id = input.external_person_id - - if input.algorithm: - algorithm = algorithm_service.get_algorithm(db_session, input.algorithm) - else: - algorithm = algorithm_service.default_algorithm(db_session) - - if not algorithm: - response.status_code = fastapi.status.HTTP_422_UNPROCESSABLE_ENTITY - raise fastapi.HTTPException(status_code=422, detail="Error: No algorithm found") + algorithm = algorithm_or_422(db_session, input.algorithm) # Now extract the patient record we want to link - try: - record_to_link = [ - entry.get("resource") - for entry in input_bundle.get("entry", []) - if entry.get("resource", {}).get("resourceType", "") == "Patient" - ][0] - except IndexError: - response.status_code = fastapi.status.HTTP_400_BAD_REQUEST + patient: dict = fhir.get_first_patient_resource(input.bundle) + if not patient: raise fastapi.HTTPException( - status_code=400, - detail="Error: Supplied bundle contains no Patient resource to link on.", + status_code=fastapi.status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Supplied bundle contains no Patient resource", ) - - # convert record to PII - pii_record: schemas.PIIRecord = fhir.fhir_record_to_pii_record(record_to_link) - - # link the record try: - # Make a copy of pii_record so we don't modify the original - (patient, person, results) = link.link_record_against_mpi( - record=pii_record, - session=db_session, - algorithm=algorithm, - external_person_id=external_id, - ) - return schemas.LinkResponse( - patient_reference_id=patient.reference_id, - person_reference_id=(person and person.reference_id), - results=[schemas.LinkResult(**r.__dict__) for r in results] - ) - + record: schemas.PIIRecord = fhir.fhir_record_to_pii_record(patient) except ValueError: - response.status_code = fastapi.status.HTTP_400_BAD_REQUEST raise fastapi.HTTPException( - status_code=400, - detail="Error: Bad request" - ) + status_code=fastapi.status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="Invalid Patient resource", + ) + + (patient, person, results, prediction) = link.link_record_against_mpi( + record=record, + session=db_session, + algorithm=algorithm, + external_person_id=input.external_person_id, + ) + return schemas.LinkFhirResponse( + prediction=prediction, + patient_reference_id=patient.reference_id, + person_reference_id=(person and person.reference_id), + results=[schemas.LinkResult(**r.__dict__) for r in results], + updated_bundle=( + person + and fhir.add_person_resource(str(person.reference_id), record.external_id, input.bundle) + ), + ) diff --git a/src/recordlinker/routes/patient_router.py b/src/recordlinker/routes/patient_router.py index 8a9eb036..bddab830 100644 --- a/src/recordlinker/routes/patient_router.py +++ b/src/recordlinker/routes/patient_router.py @@ -58,7 +58,7 @@ def update_person( person = service.get_person_by_reference_id(session, data.person_reference_id) if person is None: - raise fastapi.HTTPException(status_code=fastapi.status.HTTP_400_BAD_REQUEST) + raise fastapi.HTTPException(status_code=fastapi.status.HTTP_422_UNPROCESSABLE_ENTITY) person = service.update_person_cluster(session, patient, person, commit=False) return schemas.PatientPersonRef( diff --git a/src/recordlinker/schemas/__init__.py b/src/recordlinker/schemas/__init__.py index dbebdafd..98270822 100644 --- a/src/recordlinker/schemas/__init__.py +++ b/src/recordlinker/schemas/__init__.py @@ -6,6 +6,7 @@ from .link import LinkInput from .link import LinkResponse from .link import LinkResult +from .link import Prediction from .mpi import PatientPersonRef from .mpi import PatientRef from .mpi import PersonRef @@ -22,6 +23,7 @@ "AlgorithmSummary", "Feature", "PIIRecord", + "Prediction", "LinkInput", "LinkResponse", "LinkResult", diff --git a/src/recordlinker/schemas/link.py b/src/recordlinker/schemas/link.py index e1da5142..329a74cd 100644 --- a/src/recordlinker/schemas/link.py +++ b/src/recordlinker/schemas/link.py @@ -12,6 +12,8 @@ from recordlinker.schemas.pii import PIIRecord +Prediction = typing.Literal["match", "possible_match", "no_match"] + class LinkInput(pydantic.BaseModel): """ @@ -45,7 +47,6 @@ class LinkResult(pydantic.BaseModel): "between 0 and 1.0)." ) - @pydantic.model_validator(mode="before") @classmethod def extract_person_reference_id(cls, data: typing.Any) -> typing.Any: @@ -63,35 +64,21 @@ class LinkResponse(pydantic.BaseModel): Schema for responses from the link endpoint. """ - + prediction: Prediction patient_reference_id: uuid.UUID = pydantic.Field( description="The unique identifier for the patient that has been linked." ) person_reference_id: uuid.UUID | None = pydantic.Field( description="The identifier for the person that the patient record has been linked to." - " If prediction=\"possible_match\", this value will be null." + ' If prediction="possible_match", this value will be null.' ) results: list[LinkResult] = pydantic.Field( description="A list of (possibly) matched Persons. If prediction='match', either the single" - "(include_multiple_matches=False) or multiple (include_multiple_matches=True) " - "Persons with which the Patient record matches. If prediction='possible_match'," - "all Persons with which the Patient record possibly matches." + "(include_multiple_matches=False) or multiple (include_multiple_matches=True) " + "Persons with which the Patient record matches. If prediction='possible_match'," + "all Persons with which the Patient record possibly matches." ) - # mypy doesn't support decorators on properties; https://github.com/python/mypy/issues/1362 - @pydantic.computed_field # type: ignore[misc] - @property - def prediction(self) -> typing.Literal["match", "possible_match", "no_match"]: - """ - Record Linkage algorithm prediction. - """ - if self.person_reference_id and self.results: - return "match" - elif not self.results: - return "no_match" - else: - return "possible_match" - class LinkFhirInput(pydantic.BaseModel): """ diff --git a/src/recordlinker/splunk.py b/src/recordlinker/splunk.py index 7e0d2980..397dcf04 100644 --- a/src/recordlinker/splunk.py +++ b/src/recordlinker/splunk.py @@ -3,6 +3,7 @@ import typing import urllib.parse import urllib.request +import uuid TIMEOUT = 5 @@ -19,7 +20,7 @@ def __init__(self, splunk_uri: str) -> None: Create a new Splunk HEC client and test its connection. The URI uses a custom scheme to specify the Splunk HEC server and parameters. The URI format is: - splunkhec://@:?index=&proto=&ssl_verify=&source= + splunkhec://@:?index=&proto=&source= """ try: uri: urllib.parse.ParseResult = urllib.parse.urlparse(splunk_uri) @@ -34,7 +35,12 @@ def __init__(self, splunk_uri: str) -> None: self.url = f"{scheme}://{host}{self.PATH}" self.headers = { "Authorization": f"Splunk {uri.username}", - "Content-Type": "application/json", + "Content-type": "application/json", + # There is no intention of using HEC index acknowledgments to follow up on events, + # however in the case that the Splunk administrator has enabled this feature on the + # HEC token, just pass a random UUID back as the channel so the request is accepted. + # When index acknowledgments are disabled, this header is ignored. + "X-splunk-request-channel": str(uuid.uuid4()), } # initialize the default payload parameters self.params: dict[str, str] = {"host": uri.hostname or "", "sourcetype": "_json"} @@ -51,7 +57,7 @@ def _send_request(self, body: bytes | None = None): try: with urllib.request.urlopen(request, timeout=TIMEOUT) as response: # return the response status code - return response.getcode() + return response.status except urllib.error.HTTPError as exc: return exc.code diff --git a/tests/performance/scripts/send_linkage_requests.sh b/tests/performance/scripts/send_linkage_requests.sh index 5e1d7753..16961470 100755 --- a/tests/performance/scripts/send_linkage_requests.sh +++ b/tests/performance/scripts/send_linkage_requests.sh @@ -41,7 +41,7 @@ for ((i=1; i<=ITERATIONS; i++)); do # record the response to response.txt and capture the status code from STDOUT response=$(curl -s -o response.txt -w "%{http_code}" \ --header "Content-Type: application/json" --header "$simple_header" \ - -X POST -d "$payload" "${API_URL}/link/dibbs") + -X POST -d "$payload" "${API_URL}/link/fhir") status_code="${response: -3}" # parse the response to see if a MPI match was found match=$(jq '.found_match' response.txt) diff --git a/tests/unit/linking/test_link.py b/tests/unit/linking/test_link.py index aaf0cca8..aa3a9cd8 100644 --- a/tests/unit/linking/test_link.py +++ b/tests/unit/linking/test_link.py @@ -146,7 +146,7 @@ def test_basic_match_one(self, session, basic_algorithm, patients): matches: list[bool] = [] mapped_patients: dict[str, int] = collections.defaultdict(int) for data in patients[:2]: - (patient, person, results) = link.link_record_against_mpi(data, session, basic_algorithm) + (_, person, results, _) = link.link_record_against_mpi(data, session, basic_algorithm) matches.append(bool(person and results)) mapped_patients[person.reference_id] += 1 @@ -159,7 +159,7 @@ def test_basic_match_two(self, session, basic_algorithm, patients): matches: list[bool] = [] mapped_patients: dict[str, int] = collections.defaultdict(int) for data in patients: - (patient, person, results) = link.link_record_against_mpi(data, session, basic_algorithm) + (_, person, results, _) = link.link_record_against_mpi(data, session, basic_algorithm) matches.append(bool(person and results)) mapped_patients[person.reference_id] += 1 @@ -187,11 +187,12 @@ def test_basic_possible_match( for lower_bound in [0.5, 0.45]: # test >= lower bound basic_algorithm.belongingness_ratio_lower_bound = lower_bound for i, data in enumerate(possible_match_basic_patients): - (patient, person, results) = link.link_record_against_mpi(data, session, basic_algorithm) + (patient, person, results, prediction) = link.link_record_against_mpi(data, session, basic_algorithm) predictions[i] = { "patient": patient, "person": person, - "results": results + "results": results, + "prediction": prediction } # 1 Possible Match assert not predictions[2]["person"] @@ -199,6 +200,7 @@ def test_basic_possible_match( assert predictions[2]["results"][0].person == predictions[0]["person"] assert predictions[2]["results"][0].belongingness_ratio >= basic_algorithm.belongingness_ratio_lower_bound assert predictions[2]["results"][0].belongingness_ratio < basic_algorithm.belongingness_ratio_upper_bound + assert predictions[2]["prediction"] == "possible_match" def test_enhanced_match_three(self, session, enhanced_algorithm, patients: list[schemas.PIIRecord]): @@ -210,7 +212,7 @@ def test_enhanced_match_three(self, session, enhanced_algorithm, patients: list[ matches: list[bool] = [] mapped_patients: dict[str, int] = collections.defaultdict(int) for data in patients: - (patient, person, results) = link.link_record_against_mpi(data, session, enhanced_algorithm) + (_, person, results, _) = link.link_record_against_mpi(data, session, enhanced_algorithm) matches.append(bool(person and results)) mapped_patients[person.reference_id] += 1 @@ -238,11 +240,12 @@ def test_enhanced_possible_match( for lower_bound in [0.5, 0.45]: # test >= lower bound enhanced_algorithm.belongingness_ratio_lower_bound = lower_bound for i, data in enumerate(possible_match_enhanced_patients): - (patient, person, results) = link.link_record_against_mpi(data, session, enhanced_algorithm) + (patient, person, results, prediction) = link.link_record_against_mpi(data, session, enhanced_algorithm) predictions[i] = { "patient": patient, "person": person, - "results": results + "results": results, + "prediction": prediction } # 1 Possible Match assert not predictions[2]["person"] @@ -250,6 +253,7 @@ def test_enhanced_possible_match( assert predictions[2]["results"][0].person == predictions[0]["person"] assert predictions[2]["results"][0].belongingness_ratio >= enhanced_algorithm.belongingness_ratio_lower_bound assert predictions[2]["results"][0].belongingness_ratio < enhanced_algorithm.belongingness_ratio_upper_bound + assert predictions[2]["prediction"] == "possible_match" def test_include_multiple_matches_true( @@ -264,17 +268,19 @@ def test_include_multiple_matches_true( for upper_bound in [0.5, 0.45]: # test >= upper bound basic_algorithm.belongingness_ratio_upper_bound = upper_bound for i, data in enumerate(multiple_matches_patients): - (patient, person, results) = link.link_record_against_mpi(data, session, basic_algorithm) + (patient, person, results, prediction) = link.link_record_against_mpi(data, session, basic_algorithm) predictions[i] = { "patient": patient, "person": person, - "results": results + "results": results, + "prediction": prediction } # 2 Matches assert len(predictions[3]["results"]) == 2 assert predictions[3]["person"] == predictions[1]["person"] # Assign to Person with highest Belongingness Ratio (1.0) for match in predictions[2]["results"]: assert match.belongingness_ratio >= basic_algorithm.belongingness_ratio_upper_bound + assert predictions[3]["prediction"] == "match" def test_include_multiple_matches_false( @@ -290,13 +296,15 @@ def test_include_multiple_matches_false( for upper_bound in [0.5, 0.45]: # test >= upper bound basic_algorithm.belongingness_ratio_upper_bound = upper_bound for i, data in enumerate(multiple_matches_patients): - (patient, person, results) = link.link_record_against_mpi(data, session, basic_algorithm) + (patient, person, results, prediction) = link.link_record_against_mpi(data, session, basic_algorithm) predictions[i] = { "patient": patient, "person": person, - "results": results + "results": results, + "prediction": prediction } # 2 Matches, but only include 1 assert len(predictions[3]["results"]) == 1 assert predictions[3]["person"] == predictions[1]["person"] # Assign to Person with highest Belongingness Ratio (1.0) assert predictions[3]["results"][0].belongingness_ratio >= basic_algorithm.belongingness_ratio_upper_bound + assert predictions[3]["prediction"] == "match" diff --git a/tests/unit/routes/test_link_router.py b/tests/unit/routes/test_link_router.py index b5dbfc19..c656383d 100644 --- a/tests/unit/routes/test_link_router.py +++ b/tests/unit/routes/test_link_router.py @@ -18,222 +18,6 @@ from recordlinker.hl7 import fhir -class TestLinkDIBBS: - @mock.patch("recordlinker.database.algorithm_service.default_algorithm") - def test_bundle_with_no_patient(self, patched_subprocess, basic_algorithm, client): - patched_subprocess.return_value = basic_algorithm - bad_bundle = {"entry": []} - expected_response = { - "detail": "Supplied bundle contains no Patient resource to link on.", - } - actual_response = client.post( - "/link/dibbs", - json={"bundle": bad_bundle}, - ) - assert actual_response.json() == expected_response - assert actual_response.status_code == status.HTTP_400_BAD_REQUEST - - @mock.patch("recordlinker.database.algorithm_service.default_algorithm") - def test_success(self, patched_subprocess, basic_algorithm, client): - patched_subprocess.return_value = basic_algorithm - test_bundle = load_test_json_asset("patient_bundle_to_link_with_mpi.json") - entry_list = copy.deepcopy(test_bundle["entry"]) - - bundle_1 = test_bundle - bundle_1["entry"] = [entry_list[0]] - resp_1 = client.post("/link/dibbs", json={"bundle": bundle_1}) - new_bundle = resp_1.json()["updated_bundle"] - person_1 = [ - r.get("resource") - for r in new_bundle["entry"] - if r.get("resource").get("resourceType") == "Person" - ][0] - assert resp_1.json()["patient_reference_id"] and uuid.UUID(resp_1.json()["patient_reference_id"]) - assert resp_1.json()["person_reference_id"] == person_1.get("id") - assert resp_1.json()["prediction"] == "no_match" - assert not resp_1.json()["results"] - - bundle_2 = test_bundle - bundle_2["entry"] = [entry_list[1]] - resp_2 = client.post("/link/dibbs", json={"bundle": bundle_2}) - new_bundle = resp_2.json()["updated_bundle"] - person_2 = [ - r.get("resource") - for r in new_bundle["entry"] - if r.get("resource").get("resourceType") == "Person" - ][0] - assert resp_2.json()["patient_reference_id"] and uuid.UUID(resp_2.json()["patient_reference_id"]) - assert resp_2.json()["person_reference_id"] == person_1.get("id") - assert person_2.get("id") == person_1.get("id") - assert resp_2.json()["prediction"] == "match" - assert len(resp_2.json()["results"]) == 1 - - bundle_3 = test_bundle - bundle_3["entry"] = [entry_list[2]] - resp_3 = client.post("/link/dibbs", json={"bundle": bundle_3}) - new_bundle = resp_3.json()["updated_bundle"] - person_3 = [ - r.get("resource") - for r in new_bundle["entry"] - if r.get("resource").get("resourceType") == "Person" - ][0] - assert resp_3.json()["patient_reference_id"] and uuid.UUID(resp_3.json()["patient_reference_id"]) - assert resp_3.json()["person_reference_id"] == person_3.get("id") - assert resp_3.json()["prediction"] == "no_match" - assert not resp_3.json()["results"] - - # Cluster membership success--justified match - bundle_4 = test_bundle - bundle_4["entry"] = [entry_list[3]] - resp_4 = client.post("/link/dibbs", json={"bundle": bundle_4}) - new_bundle = resp_4.json()["updated_bundle"] - person_4 = [ - r.get("resource") - for r in new_bundle["entry"] - if r.get("resource").get("resourceType") == "Person" - ][0] - assert resp_4.json()["patient_reference_id"] and uuid.UUID(resp_4.json()["patient_reference_id"]) - assert resp_4.json()["person_reference_id"] == person_4.get("id") - assert person_4.get("id") == person_1.get("id") - assert resp_4.json()["prediction"] == "match" - assert len(resp_4.json()["results"]) == 1 - - bundle_5 = test_bundle - bundle_5["entry"] = [entry_list[4]] - resp_5 = client.post("/link/dibbs", json={"bundle": bundle_5}) - new_bundle = resp_5.json()["updated_bundle"] - person_5 = [ - r.get("resource") - for r in new_bundle["entry"] - if r.get("resource").get("resourceType") == "Person" - ][0] - assert resp_5.json()["patient_reference_id"] and uuid.UUID(resp_5.json()["patient_reference_id"]) - assert resp_5.json()["person_reference_id"] == person_5.get("id") - assert resp_5.json()["prediction"] == "no_match" - assert not resp_5.json()["results"] - - bundle_6 = test_bundle - bundle_6["entry"] = [entry_list[5]] - resp_6 = client.post("/link/dibbs", json={"bundle": bundle_6}) - new_bundle = resp_6.json()["updated_bundle"] - person_6 = [ - r.get("resource") - for r in new_bundle["entry"] - if r.get("resource").get("resourceType") == "Person" - ][0] - assert resp_6.json()["patient_reference_id"] and uuid.UUID(resp_6.json()["patient_reference_id"]) - assert resp_6.json()["person_reference_id"] == person_6.get("id") - assert resp_6.json()["prediction"] == "no_match" - assert not resp_6.json()["results"] - - @mock.patch("recordlinker.database.algorithm_service.get_algorithm") - def test_enhanced_algo(self, patched_subprocess, enhanced_algorithm, client): - patched_subprocess.return_value = enhanced_algorithm - test_bundle = load_test_json_asset("patient_bundle_to_link_with_mpi.json") - entry_list = copy.deepcopy(test_bundle["entry"]) - - bundle_1 = test_bundle - bundle_1["entry"] = [entry_list[0]] - resp_1 = client.post("/link/dibbs", json={"bundle": bundle_1, "algorithm": "dibbs-enhanced"}) - new_bundle = resp_1.json()["updated_bundle"] - person_1 = [ - r.get("resource") - for r in new_bundle["entry"] - if r.get("resource").get("resourceType") == "Person" - ][0] - assert resp_1.json()["patient_reference_id"] and uuid.UUID(resp_1.json()["patient_reference_id"]) - assert resp_1.json()["person_reference_id"] == person_1.get("id") - assert resp_1.json()["prediction"] == "no_match" - assert not resp_1.json()["results"] - - bundle_2 = test_bundle - bundle_2["entry"] = [entry_list[1]] - resp_2 = client.post("/link/dibbs", json={"bundle": bundle_2, "algorithm": "dibbs-enhanced"}) - new_bundle = resp_2.json()["updated_bundle"] - person_2 = [ - r.get("resource") - for r in new_bundle["entry"] - if r.get("resource").get("resourceType") == "Person" - ][0] - assert resp_2.json()["patient_reference_id"] and uuid.UUID(resp_2.json()["patient_reference_id"]) - assert resp_2.json()["person_reference_id"] == person_2.get("id") - assert person_2.get("id") == person_1.get("id") - assert resp_2.json()["prediction"] == "match" - assert len(resp_2.json()["results"]) == 1 - - bundle_3 = test_bundle - bundle_3["entry"] = [entry_list[2]] - resp_3 = client.post("/link/dibbs", json={"bundle": bundle_3, "algorithm": "dibbs-enhanced"}) - new_bundle = resp_3.json()["updated_bundle"] - person_3 = [ - r.get("resource") - for r in new_bundle["entry"] - if r.get("resource").get("resourceType") == "Person" - ][0] - assert resp_3.json()["patient_reference_id"] and uuid.UUID(resp_3.json()["patient_reference_id"]) - assert resp_3.json()["person_reference_id"] == person_3.get("id") - assert resp_3.json()["prediction"] == "no_match" - assert not resp_3.json()["results"] - - bundle_4 = test_bundle - bundle_4["entry"] = [entry_list[3]] - resp_4 = client.post("/link/dibbs", json={"bundle": bundle_4, "algorithm": "dibbs-enhanced"}) - new_bundle = resp_4.json()["updated_bundle"] - person_4 = [ - r.get("resource") - for r in new_bundle["entry"] - if r.get("resource").get("resourceType") == "Person" - ][0] - assert resp_4.json()["patient_reference_id"] and uuid.UUID(resp_4.json()["patient_reference_id"]) - assert resp_4.json()["person_reference_id"] == person_1.get("id") - assert person_4.get("id") == person_1.get("id") - assert resp_4.json()["prediction"] == "match" - assert len(resp_4.json()["results"]) == 1 - - bundle_5 = test_bundle - bundle_5["entry"] = [entry_list[4]] - resp_5 = client.post("/link/dibbs", json={"bundle": bundle_5, "algorithm": "dibbs-enhanced"}) - new_bundle = resp_5.json()["updated_bundle"] - person_5 = [ - r.get("resource") - for r in new_bundle["entry"] - if r.get("resource").get("resourceType") == "Person" - ][0] - assert resp_5.json()["patient_reference_id"] and uuid.UUID(resp_5.json()["patient_reference_id"]) - assert resp_5.json()["person_reference_id"] == person_5.get("id") - assert resp_5.json()["prediction"] == "no_match" - assert not resp_5.json()["results"] - - bundle_6 = test_bundle - bundle_6["entry"] = [entry_list[5]] - resp_6 = client.post("/link/dibbs", json={"bundle": bundle_6, "algorithm": "dibbs-enhanced"}) - new_bundle = resp_6.json()["updated_bundle"] - person_6 = [ - r.get("resource") - for r in new_bundle["entry"] - if r.get("resource").get("resourceType") == "Person" - ][0] - assert resp_6.json()["patient_reference_id"] and uuid.UUID(resp_6.json()["patient_reference_id"]) - assert resp_6.json()["person_reference_id"] == person_6.get("id") - assert resp_6.json()["prediction"] == "no_match" - assert not resp_6.json()["results"] - - @mock.patch("recordlinker.database.algorithm_service.get_algorithm") - def test_invalid_algorithm_param(self, patched_subprocess, client): - patched_subprocess.return_value = None - test_bundle = load_test_json_asset("patient_bundle_to_link_with_mpi.json") - expected_response = { - "detail": "Error: Invalid algorithm specified", - } - - actual_response = client.post( - "/link/dibbs", json={"bundle": test_bundle, "algorithm": "INVALID"} - ) - - assert actual_response.json() == expected_response - assert actual_response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - - class TestLink: @pytest.fixture def patients(self): @@ -399,7 +183,7 @@ def test_link_invalid_algorithm_param(self, patched_subprocess, patients, client ) assert actual_response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert actual_response.json()["detail"] == "Error: No algorithm found" + assert actual_response.json()["detail"] == "No algorithm found" @mock.patch("recordlinker.database.algorithm_service.default_algorithm") def test_link_no_default_algorithm(self, patched_subprocess, patients, client): @@ -413,177 +197,234 @@ def test_link_no_default_algorithm(self, patched_subprocess, patients, client): ) assert actual_response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert actual_response.json()["detail"] == "Error: No algorithm found" + assert actual_response.json()["detail"] == "No algorithm found" + class TestLinkFHIR: @mock.patch("recordlinker.database.algorithm_service.default_algorithm") - def test_linkrecord_bundle_with_no_patient(self, patched_subprocess, basic_algorithm, client): + def test_bundle_with_no_patient(self, patched_subprocess, basic_algorithm, client): patched_subprocess.return_value = basic_algorithm bad_bundle = {"entry": []} + expected_response = { + "detail": "Supplied bundle contains no Patient resource", + } actual_response = client.post( "/link/fhir", json={"bundle": bad_bundle}, ) + assert actual_response.json() == expected_response + assert actual_response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert actual_response.status_code == status.HTTP_400_BAD_REQUEST - assert actual_response.json()["detail"] == "Error: Supplied bundle contains no Patient resource to link on." + @mock.patch("recordlinker.database.algorithm_service.default_algorithm") + def test_invalid_bundle(self, patched_subprocess, basic_algorithm, client): + patched_subprocess.return_value = basic_algorithm + bad_bundle = {"entry": [{"resource": {"resourceType": "Patient", "name": "John Doe"}}]} + expected_response = { + "detail": "Invalid Patient resource", + } + actual_response = client.post( + "/link/fhir", + json={"bundle": bad_bundle}, + ) + assert actual_response.json() == expected_response + assert actual_response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY @mock.patch("recordlinker.database.algorithm_service.default_algorithm") - def test_link_success(self, patched_subprocess, basic_algorithm, client): + def test_success(self, patched_subprocess, basic_algorithm, client): patched_subprocess.return_value = basic_algorithm test_bundle = load_test_json_asset("patient_bundle_to_link_with_mpi.json") entry_list = copy.deepcopy(test_bundle["entry"]) bundle_1 = test_bundle bundle_1["entry"] = [entry_list[0]] - response_1 = client.post("/link/fhir", json={"bundle": bundle_1}) - person_1 = response_1.json()["person_reference_id"] - assert response_1.json()["patient_reference_id"] and uuid.UUID(response_1.json()["patient_reference_id"]) - assert person_1 - assert response_1.json()["prediction"] == "no_match" - assert not response_1.json()["results"] + resp_1 = client.post("/link/fhir", json={"bundle": bundle_1}) + new_bundle = resp_1.json()["updated_bundle"] + person_1 = [ + r.get("resource") + for r in new_bundle["entry"] + if r.get("resource").get("resourceType") == "Person" + ][0] + assert resp_1.json()["patient_reference_id"] and uuid.UUID(resp_1.json()["patient_reference_id"]) + assert resp_1.json()["person_reference_id"] == person_1.get("id") + assert resp_1.json()["prediction"] == "no_match" + assert not resp_1.json()["results"] bundle_2 = test_bundle bundle_2["entry"] = [entry_list[1]] - response_2 = client.post("/link/fhir", json={"bundle": bundle_2}) - person_2 = response_2.json()["person_reference_id"] - assert response_2.json()["patient_reference_id"] and uuid.UUID(response_2.json()["patient_reference_id"]) - assert person_2 == person_1 - assert response_2.json()["prediction"] == "match" - assert len(response_2.json()["results"]) == 1 + resp_2 = client.post("/link/fhir", json={"bundle": bundle_2}) + new_bundle = resp_2.json()["updated_bundle"] + person_2 = [ + r.get("resource") + for r in new_bundle["entry"] + if r.get("resource").get("resourceType") == "Person" + ][0] + assert resp_2.json()["patient_reference_id"] and uuid.UUID(resp_2.json()["patient_reference_id"]) + assert resp_2.json()["person_reference_id"] == person_1.get("id") + assert person_2.get("id") == person_1.get("id") + assert resp_2.json()["prediction"] == "match" + assert len(resp_2.json()["results"]) == 1 bundle_3 = test_bundle bundle_3["entry"] = [entry_list[2]] - response_3 = client.post("/link/fhir", json={"bundle": bundle_3}) - person_3 = response_3.json()["person_reference_id"] - assert response_3.json()["patient_reference_id"] and uuid.UUID(response_3.json()["patient_reference_id"]) - assert person_3 - assert response_3.json()["prediction"] == "no_match" - assert not response_3.json()["results"] + resp_3 = client.post("/link/fhir", json={"bundle": bundle_3}) + new_bundle = resp_3.json()["updated_bundle"] + person_3 = [ + r.get("resource") + for r in new_bundle["entry"] + if r.get("resource").get("resourceType") == "Person" + ][0] + assert resp_3.json()["patient_reference_id"] and uuid.UUID(resp_3.json()["patient_reference_id"]) + assert resp_3.json()["person_reference_id"] == person_3.get("id") + assert resp_3.json()["prediction"] == "no_match" + assert not resp_3.json()["results"] # Cluster membership success--justified match bundle_4 = test_bundle bundle_4["entry"] = [entry_list[3]] - response_4 = client.post("/link/fhir", json={"bundle": bundle_4}) - person_4 = response_4.json()["person_reference_id"] - assert response_4.json()["patient_reference_id"] and uuid.UUID(response_4.json()["patient_reference_id"]) - assert person_4 == person_1 - assert response_4.json()["prediction"] == "match" - assert len(response_4.json()["results"]) == 1 + resp_4 = client.post("/link/fhir", json={"bundle": bundle_4}) + new_bundle = resp_4.json()["updated_bundle"] + person_4 = [ + r.get("resource") + for r in new_bundle["entry"] + if r.get("resource").get("resourceType") == "Person" + ][0] + assert resp_4.json()["patient_reference_id"] and uuid.UUID(resp_4.json()["patient_reference_id"]) + assert resp_4.json()["person_reference_id"] == person_4.get("id") + assert person_4.get("id") == person_1.get("id") + assert resp_4.json()["prediction"] == "match" + assert len(resp_4.json()["results"]) == 1 bundle_5 = test_bundle bundle_5["entry"] = [entry_list[4]] - response_5 = client.post("/link/fhir", json={"bundle": bundle_5}) - person_5 = response_5.json()["person_reference_id"] - assert response_5.json()["patient_reference_id"] and uuid.UUID(response_5.json()["patient_reference_id"]) - assert person_5 - assert response_5.json()["prediction"] == "no_match" - assert not response_5.json()["results"] + resp_5 = client.post("/link/fhir", json={"bundle": bundle_5}) + new_bundle = resp_5.json()["updated_bundle"] + person_5 = [ + r.get("resource") + for r in new_bundle["entry"] + if r.get("resource").get("resourceType") == "Person" + ][0] + assert resp_5.json()["patient_reference_id"] and uuid.UUID(resp_5.json()["patient_reference_id"]) + assert resp_5.json()["person_reference_id"] == person_5.get("id") + assert resp_5.json()["prediction"] == "no_match" + assert not resp_5.json()["results"] bundle_6 = test_bundle bundle_6["entry"] = [entry_list[5]] - response_6 = client.post("/link/fhir", json={"bundle": bundle_6}) - person_6 = response_6.json()["person_reference_id"] - assert response_6.json()["patient_reference_id"] and uuid.UUID(response_6.json()["patient_reference_id"]) - assert person_6 - assert response_6.json()["prediction"] == "no_match" - assert not response_6.json()["results"] - + resp_6 = client.post("/link/fhir", json={"bundle": bundle_6}) + new_bundle = resp_6.json()["updated_bundle"] + person_6 = [ + r.get("resource") + for r in new_bundle["entry"] + if r.get("resource").get("resourceType") == "Person" + ][0] + assert resp_6.json()["patient_reference_id"] and uuid.UUID(resp_6.json()["patient_reference_id"]) + assert resp_6.json()["person_reference_id"] == person_6.get("id") + assert resp_6.json()["prediction"] == "no_match" + assert not resp_6.json()["results"] + @mock.patch("recordlinker.database.algorithm_service.get_algorithm") - def test_link_enhanced_algorithm( - self, patched_subprocess, enhanced_algorithm, client - ): + def test_enhanced_algo(self, patched_subprocess, enhanced_algorithm, client): patched_subprocess.return_value = enhanced_algorithm test_bundle = load_test_json_asset("patient_bundle_to_link_with_mpi.json") entry_list = copy.deepcopy(test_bundle["entry"]) bundle_1 = test_bundle bundle_1["entry"] = [entry_list[0]] - response_1 = client.post( - "/link/fhir", json={"bundle": bundle_1, "algorithm": "dibbs-enhanced"} - ) - person_1 = response_1.json()["person_reference_id"] - assert response_1.json()["patient_reference_id"] and uuid.UUID(response_1.json()["patient_reference_id"]) - assert person_1 - assert response_1.json()["prediction"] == "no_match" - assert not response_1.json()["results"] + resp_1 = client.post("/link/fhir", json={"bundle": bundle_1, "algorithm": "dibbs-enhanced"}) + new_bundle = resp_1.json()["updated_bundle"] + person_1 = [ + r.get("resource") + for r in new_bundle["entry"] + if r.get("resource").get("resourceType") == "Person" + ][0] + assert resp_1.json()["patient_reference_id"] and uuid.UUID(resp_1.json()["patient_reference_id"]) + assert resp_1.json()["person_reference_id"] == person_1.get("id") + assert resp_1.json()["prediction"] == "no_match" + assert not resp_1.json()["results"] bundle_2 = test_bundle bundle_2["entry"] = [entry_list[1]] - response_2 = client.post( - "/link/fhir", json={"bundle": bundle_2, "algorithm": "dibbs-enhanced"} - ) - person_2 = response_2.json()["person_reference_id"] - assert response_2.json()["patient_reference_id"] and uuid.UUID(response_2.json()["patient_reference_id"]) - assert person_2 == person_1 - assert response_2.json()["prediction"] == "match" - assert len(response_2.json()["results"]) == 1 + resp_2 = client.post("/link/fhir", json={"bundle": bundle_2, "algorithm": "dibbs-enhanced"}) + new_bundle = resp_2.json()["updated_bundle"] + person_2 = [ + r.get("resource") + for r in new_bundle["entry"] + if r.get("resource").get("resourceType") == "Person" + ][0] + assert resp_2.json()["patient_reference_id"] and uuid.UUID(resp_2.json()["patient_reference_id"]) + assert resp_2.json()["person_reference_id"] == person_2.get("id") + assert person_2.get("id") == person_1.get("id") + assert resp_2.json()["prediction"] == "match" + assert len(resp_2.json()["results"]) == 1 bundle_3 = test_bundle bundle_3["entry"] = [entry_list[2]] - response_3 = client.post( - "/link/fhir", json={"bundle": bundle_3, "algorithm": "dibbs-enhanced"} - ) - person_3 = response_3.json()["person_reference_id"] - assert response_3.json()["patient_reference_id"] and uuid.UUID(response_3.json()["patient_reference_id"]) - assert person_3 - assert response_3.json()["prediction"] == "no_match" - assert not response_3.json()["results"] + resp_3 = client.post("/link/fhir", json={"bundle": bundle_3, "algorithm": "dibbs-enhanced"}) + new_bundle = resp_3.json()["updated_bundle"] + person_3 = [ + r.get("resource") + for r in new_bundle["entry"] + if r.get("resource").get("resourceType") == "Person" + ][0] + assert resp_3.json()["patient_reference_id"] and uuid.UUID(resp_3.json()["patient_reference_id"]) + assert resp_3.json()["person_reference_id"] == person_3.get("id") + assert resp_3.json()["prediction"] == "no_match" + assert not resp_3.json()["results"] - # Cluster membership success--justified match bundle_4 = test_bundle bundle_4["entry"] = [entry_list[3]] - response_4 = client.post( - "/link/fhir", json={"bundle": bundle_4, "algorithm": "dibbs-enhanced"} - ) - person_4 = response_4.json()["person_reference_id"] - assert response_4.json()["patient_reference_id"] and uuid.UUID(response_4.json()["patient_reference_id"]) - assert person_4 == person_1 - assert response_4.json()["prediction"] == "match" - assert len(response_4.json()["results"]) == 1 + resp_4 = client.post("/link/fhir", json={"bundle": bundle_4, "algorithm": "dibbs-enhanced"}) + new_bundle = resp_4.json()["updated_bundle"] + person_4 = [ + r.get("resource") + for r in new_bundle["entry"] + if r.get("resource").get("resourceType") == "Person" + ][0] + assert resp_4.json()["patient_reference_id"] and uuid.UUID(resp_4.json()["patient_reference_id"]) + assert resp_4.json()["person_reference_id"] == person_1.get("id") + assert person_4.get("id") == person_1.get("id") + assert resp_4.json()["prediction"] == "match" + assert len(resp_4.json()["results"]) == 1 bundle_5 = test_bundle bundle_5["entry"] = [entry_list[4]] - response_5 = client.post( - "/link/fhir", json={"bundle": bundle_5, "algorithm": "dibbs-enhanced"} - ) - person_5 = response_5.json()["person_reference_id"] - assert response_5.json()["patient_reference_id"] and uuid.UUID(response_5.json()["patient_reference_id"]) - assert person_5 - assert response_5.json()["prediction"] == "no_match" - assert not response_5.json()["results"] + resp_5 = client.post("/link/fhir", json={"bundle": bundle_5, "algorithm": "dibbs-enhanced"}) + new_bundle = resp_5.json()["updated_bundle"] + person_5 = [ + r.get("resource") + for r in new_bundle["entry"] + if r.get("resource").get("resourceType") == "Person" + ][0] + assert resp_5.json()["patient_reference_id"] and uuid.UUID(resp_5.json()["patient_reference_id"]) + assert resp_5.json()["person_reference_id"] == person_5.get("id") + assert resp_5.json()["prediction"] == "no_match" + assert not resp_5.json()["results"] bundle_6 = test_bundle bundle_6["entry"] = [entry_list[5]] - response_6 = client.post( - "/link/fhir", json={"bundle": bundle_6, "algorithm": "dibbs-enhanced"} - ) - person_6 = response_6.json()["person_reference_id"] - assert response_6.json()["patient_reference_id"] and uuid.UUID(response_6.json()["patient_reference_id"]) - assert person_6 - assert response_6.json()["prediction"] == "no_match" - assert not response_6.json()["results"] - - @mock.patch("recordlinker.database.algorithm_service.get_algorithm") - def test_linkrecord_invalid_algorithm_param(self, patched_subprocess, client): - patched_subprocess.return_value = None - test_bundle = load_test_json_asset("patient_bundle_to_link_with_mpi.json") - - actual_response = client.post( - "/link/fhir", json={"bundle": test_bundle, "algorithm": "INVALID"} - ) - - assert actual_response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert actual_response.json()["detail"] == "Error: No algorithm found" + resp_6 = client.post("/link/fhir", json={"bundle": bundle_6, "algorithm": "dibbs-enhanced"}) + new_bundle = resp_6.json()["updated_bundle"] + person_6 = [ + r.get("resource") + for r in new_bundle["entry"] + if r.get("resource").get("resourceType") == "Person" + ][0] + assert resp_6.json()["patient_reference_id"] and uuid.UUID(resp_6.json()["patient_reference_id"]) + assert resp_6.json()["person_reference_id"] == person_6.get("id") + assert resp_6.json()["prediction"] == "no_match" + assert not resp_6.json()["results"] - @mock.patch("recordlinker.database.algorithm_service.default_algorithm") - def test_linkrecord_no_default_algorithm(self, patched_subprocess, client): + @mock.patch("recordlinker.database.algorithm_service.get_algorithm") + def test_invalid_algorithm_param(self, patched_subprocess, client): patched_subprocess.return_value = None test_bundle = load_test_json_asset("patient_bundle_to_link_with_mpi.json") + expected_response = { + "detail": "No algorithm found", + } actual_response = client.post( "/link/fhir", json={"bundle": test_bundle, "algorithm": "INVALID"} ) + assert actual_response.json() == expected_response assert actual_response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY - assert actual_response.json()["detail"] == "Error: No algorithm found" diff --git a/tests/unit/routes/test_patient_router.py b/tests/unit/routes/test_patient_router.py index 4e0830af..572caf0f 100644 --- a/tests/unit/routes/test_patient_router.py +++ b/tests/unit/routes/test_patient_router.py @@ -48,7 +48,7 @@ def test_invalid_person(self, client): data = {"person_reference_id": str(uuid.uuid4())} response = client.patch(f"/patient/{patient.reference_id}/person", json=data) - assert response.status_code == 400 + assert response.status_code == 422 def test_update_person(self, client): original_person = models.Person() diff --git a/tests/unit/test_splunk.py b/tests/unit/test_splunk.py index 66d6ba9f..61e36943 100644 --- a/tests/unit/test_splunk.py +++ b/tests/unit/test_splunk.py @@ -12,45 +12,38 @@ def test_invalid_uri(self): def test_valid_uri(self): with unittest.mock.patch("urllib.request.urlopen") as mock_urlopen: - mock_response = unittest.mock.MagicMock() - mock_response.read.return_value = b"{}" - mock_response.getcode.return_value = 400 # Set getcode() to return 400 - mock_urlopen.return_value.__enter__.return_value = mock_response + mresp = unittest.mock.MagicMock(status=400) + mock_urlopen.return_value.__enter__.return_value = mresp client = splunk.SplunkHECClient("splunkhec://token@localhost:8088?index=idx&source=src") assert client.url == "https://localhost:8088/services/collector/event" - assert client.headers == { - "Authorization": "Splunk token", - "Content-Type": "application/json", - } + assert client.headers["Authorization"] == "Splunk token" + assert client.headers["Content-type"] == "application/json" + assert len(client.headers["X-splunk-request-channel"]) == 36 assert client.params == {"host": "localhost", "sourcetype": "_json", "index": "idx", "source": "src"} def test_valid_uri_no_port(self): with unittest.mock.patch("urllib.request.urlopen") as mock_urlopen: - mock_response = unittest.mock.MagicMock() - mock_response.read.return_value = b"{}" - mock_response.getcode.return_value = 400 # Set getcode() to return 400 - mock_urlopen.return_value.__enter__.return_value = mock_response + mresp1 = unittest.mock.MagicMock(status=400) + mresp2 = unittest.mock.MagicMock(status=200) + mock_urlopen.return_value.__enter__.side_effect = (mresp1, mresp2) client = splunk.SplunkHECClient("splunkhec://token@localhost?index=idx&source=src") assert client.url == "https://localhost/services/collector/event" - assert client.headers == { - "Authorization": "Splunk token", - "Content-Type": "application/json", - } + assert client.headers["Authorization"] == "Splunk token" + assert client.headers["Content-type"] == "application/json" + assert len(client.headers["X-splunk-request-channel"]) == 36 assert client.params == {"host": "localhost", "sourcetype": "_json", "index": "idx", "source": "src"} def test_send(self): with unittest.mock.patch("urllib.request.urlopen") as mock_urlopen: - mock_response = unittest.mock.MagicMock() - mock_response.read.return_value = b"{}" - mock_response.getcode.side_effect = [400, 200] # Set getcode() to return 400 - mock_urlopen.return_value.__enter__.return_value = mock_response + mresp1 = unittest.mock.MagicMock(status=400) + mresp2 = unittest.mock.MagicMock(status=200) + mock_urlopen.return_value.__enter__.side_effect = (mresp1, mresp2) client = splunk.SplunkHECClient("splunkhec://token@localhost?index=idx&source=src") assert client.send({"key": "value"}, epoch=10.5) == 200 req = mock_urlopen.call_args[0][0] assert req.method == "POST" assert req.get_full_url() == "https://localhost/services/collector/event" - assert req.headers == { - "Authorization": "Splunk token", - "Content-type": "application/json", - } + assert client.headers["Authorization"] == "Splunk token" + assert client.headers["Content-type"] == "application/json" + assert len(client.headers["X-splunk-request-channel"]) == 36 assert req.data == b'{"time": 10.5, "event": {"key": "value"}, "host": "localhost", "sourcetype": "_json", "index": "idx", "source": "src"}'