From f89158dac9b151fb900800d3d5ec7e1830aee350 Mon Sep 17 00:00:00 2001 From: Michael Chouinard <46358556+chouinar@users.noreply.github.com> Date: Fri, 2 Aug 2024 11:29:42 -0400 Subject: [PATCH] [Issue #104] Refactor/restructure the transformation code (#112) ## Summary Fixes #104 ### Time to review: __10 mins__ ## Changes proposed Restructured the transformation code - Split each chunk of the transformation logic into separate "Subtask" classes - Made a constants file - A few duplicated pieces of implementation were pulled into functions that the subtasks are derived from Some additional logging Created a Subtask class for breaking up a task into multiple steps for organizational reasons Added configuration to the transformation step for enabling/disabling different parts of the process (not used yet - but lets us build things out and not worry about breaking non-local environments). ## Context for reviewers This looks far larger than it actually is, most of the actual changes are very small, I made all of the changes without adjusting the tests (outside of a few small bits of cleanup) and then refactored the tests as well. This does not aim to change the meaningful behavior of the transformation logic, but instead make it a lot easier to parse. Now when we add new transformations, that's conceptually simpler as its adding another one of the subtasks rather than adding to the massive mess of functions it was before. There are a few small logging / metric related changes from the Subtask just so we can have very granular metrics of how long each part of the task takes. ## Additional information I ran locally with a full snapshot of the production data and didn't see anything of note different from prior runs. Still takes ~10 minutes. --------- Co-authored-by: nava-platform-bot --- .../data_migration/transformation/__init__.py | 39 - .../transformation/subtask/__init__.py | 0 .../subtask/abstract_transform_subtask.py | 180 ++ .../subtask/transform_applicant_type.py | 189 ++ .../subtask/transform_assistance_listing.py | 113 + .../subtask/transform_funding_category.py | 196 ++ .../subtask/transform_funding_instrument.py | 199 ++ .../subtask/transform_opportunity.py | 81 + .../subtask/transform_opportunity_summary.py | 145 ++ .../transformation/transform_constants.py | 75 + .../transform_oracle_data_task.py | 949 +------ .../transformation/transform_util.py | 85 +- api/src/db/models/staging/forecast.py | 16 + api/src/db/models/staging/synopsis.py | 24 + api/src/task/subtask.py | 69 + .../data_migration/transformation/conftest.py | 659 +++++ .../transformation/subtask/__init__.py | 0 .../subtask/test_transform_applicant_type.py | 395 +++ .../test_transform_assistance_listing.py | 157 ++ .../test_transform_funding_category.py | 374 +++ .../test_transform_funding_instrument.py | 298 +++ .../subtask/test_transform_opportunity.py | 110 + .../test_transform_opportunity_summary.py | 280 +++ .../test_transform_oracle_data_task.py | 2199 +---------------- .../transformation/test_transform_util.py | 2 +- 25 files changed, 3665 insertions(+), 3169 deletions(-) create mode 100644 api/src/data_migration/transformation/subtask/__init__.py create mode 100644 api/src/data_migration/transformation/subtask/abstract_transform_subtask.py create mode 100644 api/src/data_migration/transformation/subtask/transform_applicant_type.py create mode 100644 api/src/data_migration/transformation/subtask/transform_assistance_listing.py create mode 100644 api/src/data_migration/transformation/subtask/transform_funding_category.py create mode 100644 api/src/data_migration/transformation/subtask/transform_funding_instrument.py create mode 100644 api/src/data_migration/transformation/subtask/transform_opportunity.py create mode 100644 api/src/data_migration/transformation/subtask/transform_opportunity_summary.py create mode 100644 api/src/data_migration/transformation/transform_constants.py create mode 100644 api/src/task/subtask.py create mode 100644 api/tests/src/data_migration/transformation/conftest.py create mode 100644 api/tests/src/data_migration/transformation/subtask/__init__.py create mode 100644 api/tests/src/data_migration/transformation/subtask/test_transform_applicant_type.py create mode 100644 api/tests/src/data_migration/transformation/subtask/test_transform_assistance_listing.py create mode 100644 api/tests/src/data_migration/transformation/subtask/test_transform_funding_category.py create mode 100644 api/tests/src/data_migration/transformation/subtask/test_transform_funding_instrument.py create mode 100644 api/tests/src/data_migration/transformation/subtask/test_transform_opportunity.py create mode 100644 api/tests/src/data_migration/transformation/subtask/test_transform_opportunity_summary.py diff --git a/api/src/data_migration/transformation/__init__.py b/api/src/data_migration/transformation/__init__.py index c3c7751b1..e69de29bb 100644 --- a/api/src/data_migration/transformation/__init__.py +++ b/api/src/data_migration/transformation/__init__.py @@ -1,39 +0,0 @@ -from typing import TypeAlias - -from src.db.models.staging.forecast import ( - TapplicanttypesForecast, - TapplicanttypesForecastHist, - Tforecast, - TforecastHist, - TfundactcatForecast, - TfundactcatForecastHist, - TfundinstrForecast, - TfundinstrForecastHist, -) -from src.db.models.staging.synopsis import ( - TapplicanttypesSynopsis, - TapplicanttypesSynopsisHist, - TfundactcatSynopsis, - TfundactcatSynopsisHist, - TfundinstrSynopsis, - TfundinstrSynopsisHist, - Tsynopsis, - TsynopsisHist, -) - -SourceSummary: TypeAlias = Tforecast | Tsynopsis | TforecastHist | TsynopsisHist - -SourceApplicantType: TypeAlias = ( - TapplicanttypesForecast - | TapplicanttypesForecastHist - | TapplicanttypesSynopsis - | TapplicanttypesSynopsisHist -) - -SourceFundingCategory: TypeAlias = ( - TfundactcatForecast | TfundactcatForecastHist | TfundactcatSynopsis | TfundactcatSynopsisHist -) - -SourceFundingInstrument: TypeAlias = ( - TfundinstrForecastHist | TfundinstrForecast | TfundinstrSynopsisHist | TfundinstrSynopsis -) diff --git a/api/src/data_migration/transformation/subtask/__init__.py b/api/src/data_migration/transformation/subtask/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/api/src/data_migration/transformation/subtask/abstract_transform_subtask.py b/api/src/data_migration/transformation/subtask/abstract_transform_subtask.py new file mode 100644 index 000000000..91c12bdee --- /dev/null +++ b/api/src/data_migration/transformation/subtask/abstract_transform_subtask.py @@ -0,0 +1,180 @@ +import abc +import logging +from datetime import datetime +from typing import Any, Sequence, Tuple, Type, cast + +from sqlalchemy import and_, select +from sqlalchemy.orm import selectinload + +import src.data_migration.transformation.transform_constants as transform_constants +from src.db.models.opportunity_models import Opportunity, OpportunitySummary +from src.task.subtask import SubTask +from src.task.task import Task + +logger = logging.getLogger(__name__) + + +class AbstractTransformSubTask(SubTask): + def __init__(self, task: Task): + super().__init__(task) + + # This is a bit of a hacky way of making sure the task passed into this method + # is the TransformOracleDataTask class. We could make this init function take in that + # type specifically, but we'd run into circular type dependencies which are complex to resolve + transform_time = getattr(task, "transform_time", None) + if transform_time is None: + raise Exception("Task passed into AbstractTransformSubTask must have a transform_time") + + self.transform_time: datetime = transform_time + + def run_subtask(self) -> None: + with self.db_session.begin(): + self.transform_records() + logger.info( + "Finished running transformations for %s - committing results", self.cls_name() + ) + + # As a safety net, expire all references in the session + # after running. This avoids any potential complexities in + # cached data between separate subtasks running. + # By default sessions actually do this when committing, but + # our db session creation logic disables it, so it's the ordinary behavior. + self.db_session.expire_all() + + @abc.abstractmethod + def transform_records(self) -> None: + pass + + def _handle_delete( + self, + source: transform_constants.S, + target: transform_constants.D | None, + record_type: str, + extra: dict, + error_on_missing_target: bool = False, + ) -> None: + # If the target we want to delete is None, we have nothing to delete + if target is None: + # In some scenarios we want to error when this happens + if error_on_missing_target: + raise ValueError("Cannot delete %s record as it does not exist" % record_type) + + # In a lot of scenarios, we actually just want to log a message as it is expected to happen + # For example, if we are deleting an opportunity_summary record, and already deleted the opportunity, + # then SQLAlchemy would have deleted the opportunity_summary for us already. When we later go to delete + # it, we'd hit this case, which isn't a problem. + logger.info("Cannot delete %s record as it does not exist", record_type, extra=extra) + source.transformation_notes = transform_constants.ORPHANED_DELETE_RECORD + self.increment( + transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED, prefix=record_type + ) + return + + logger.info("Deleting %s record", record_type, extra=extra) + self.increment(transform_constants.Metrics.TOTAL_RECORDS_DELETED, prefix=record_type) + self.db_session.delete(target) + + def _is_orphaned_historical( + self, + parent_record: Opportunity | OpportunitySummary | None, + source_record: transform_constants.SourceAny, + ) -> bool: + return parent_record is None and source_record.is_historical_table + + def _handle_orphaned_historical( + self, source_record: transform_constants.SourceAny, record_type: str, extra: dict + ) -> None: + logger.warning( + "Historical %s does not have a corresponding parent record - cannot import, but will mark as processed", + record_type, + extra=extra, + ) + self.increment( + transform_constants.Metrics.TOTAL_HISTORICAL_ORPHANS_SKIPPED, prefix=record_type + ) + source_record.transformation_notes = transform_constants.ORPHANED_HISTORICAL_RECORD + + def fetch( + self, + source_model: Type[transform_constants.S], + destination_model: Type[transform_constants.D], + join_clause: Sequence, + ) -> list[Tuple[transform_constants.S, transform_constants.D | None]]: + # The real type is: Sequence[Row[Tuple[S, D | None]]] + # but MyPy is weird about this and the Row+Tuple causes some + # confusion in the parsing so it ends up assuming everything is Any + # So just cast it to a simpler type that doesn't confuse anything + return cast( + list[Tuple[transform_constants.S, transform_constants.D | None]], + self.db_session.execute( + select(source_model, destination_model) + .join(destination_model, and_(*join_clause), isouter=True) + .where(source_model.transformed_at.is_(None)) + .execution_options(yield_per=5000) + ), + ) + + def fetch_with_opportunity( + self, + source_model: Type[transform_constants.S], + destination_model: Type[transform_constants.D], + join_clause: Sequence, + ) -> list[Tuple[transform_constants.S, transform_constants.D | None, Opportunity | None]]: + # Similar to the above fetch function, but also grabs an opportunity record + # Note that this requires your source_model to have an opportunity_id field defined. + + return cast( + list[Tuple[transform_constants.S, transform_constants.D | None, Opportunity | None]], + self.db_session.execute( + select(source_model, destination_model, Opportunity) + .join(destination_model, and_(*join_clause), isouter=True) + .join( + Opportunity, + source_model.opportunity_id == Opportunity.opportunity_id, # type: ignore[attr-defined] + isouter=True, + ) + .where(source_model.transformed_at.is_(None)) + .execution_options(yield_per=5000) + ), + ) + + def fetch_with_opportunity_summary( + self, + source_model: Type[transform_constants.S], + destination_model: Type[transform_constants.D], + join_clause: Sequence, + is_forecast: bool, + is_historical_table: bool, + relationship_load_value: Any, + ) -> list[ + Tuple[transform_constants.S, transform_constants.D | None, OpportunitySummary | None] + ]: + # setup the join clause for getting the opportunity summary + + opportunity_summary_join_clause = [ + source_model.opportunity_id == OpportunitySummary.opportunity_id, # type: ignore[attr-defined] + OpportunitySummary.is_forecast.is_(is_forecast), + ] + + if is_historical_table: + opportunity_summary_join_clause.append( + source_model.revision_number == OpportunitySummary.revision_number # type: ignore[attr-defined] + ) + else: + opportunity_summary_join_clause.append(OpportunitySummary.revision_number.is_(None)) + + return cast( + list[ + Tuple[ + transform_constants.S, transform_constants.D | None, OpportunitySummary | None + ] + ], + self.db_session.execute( + select(source_model, destination_model, OpportunitySummary) + .join(OpportunitySummary, and_(*opportunity_summary_join_clause), isouter=True) + .join(destination_model, and_(*join_clause), isouter=True) + .where(source_model.transformed_at.is_(None)) + .options(selectinload(relationship_load_value)) + .execution_options(yield_per=5000, populate_existing=True) + ), + ) diff --git a/api/src/data_migration/transformation/subtask/transform_applicant_type.py b/api/src/data_migration/transformation/subtask/transform_applicant_type.py new file mode 100644 index 000000000..fb9bb5802 --- /dev/null +++ b/api/src/data_migration/transformation/subtask/transform_applicant_type.py @@ -0,0 +1,189 @@ +import logging +from typing import Sequence, Tuple + +import src.data_migration.transformation.transform_constants as transform_constants +import src.data_migration.transformation.transform_util as transform_util +from src.data_migration.transformation.subtask.abstract_transform_subtask import ( + AbstractTransformSubTask, +) +from src.db.models.opportunity_models import LinkOpportunitySummaryApplicantType, OpportunitySummary +from src.db.models.staging.forecast import TapplicanttypesForecast, TapplicanttypesForecastHist +from src.db.models.staging.synopsis import TapplicanttypesSynopsis, TapplicanttypesSynopsisHist + +logger = logging.getLogger(__name__) + + +class TransformApplicantType(AbstractTransformSubTask): + def transform_records(self) -> None: + link_table = LinkOpportunitySummaryApplicantType + relationship_load_value = OpportunitySummary.link_applicant_types + + logger.info("Processing forecast applicant types") + forecast_applicant_type_records = self.fetch_with_opportunity_summary( + TapplicanttypesForecast, + link_table, + [ + TapplicanttypesForecast.at_frcst_id + == LinkOpportunitySummaryApplicantType.legacy_applicant_type_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryApplicantType.opportunity_summary_id, + ], + is_forecast=True, + is_historical_table=False, + relationship_load_value=relationship_load_value, + ) + self.process_link_applicant_types_group(forecast_applicant_type_records) + + logger.info("Processing historical forecast applicant types") + forecast_applicant_type_hist_records = self.fetch_with_opportunity_summary( + TapplicanttypesForecastHist, + link_table, + [ + TapplicanttypesForecastHist.at_frcst_id + == LinkOpportunitySummaryApplicantType.legacy_applicant_type_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryApplicantType.opportunity_summary_id, + ], + is_forecast=True, + is_historical_table=True, + relationship_load_value=relationship_load_value, + ) + self.process_link_applicant_types_group(forecast_applicant_type_hist_records) + + logger.info("Processing synopsis applicant types") + synopsis_applicant_type_records = self.fetch_with_opportunity_summary( + TapplicanttypesSynopsis, + link_table, + [ + TapplicanttypesSynopsis.at_syn_id + == LinkOpportunitySummaryApplicantType.legacy_applicant_type_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryApplicantType.opportunity_summary_id, + ], + is_forecast=False, + is_historical_table=False, + relationship_load_value=relationship_load_value, + ) + self.process_link_applicant_types_group(synopsis_applicant_type_records) + + logger.info("Processing historical synopsis applicant types") + synopsis_applicant_type_hist_records = self.fetch_with_opportunity_summary( + TapplicanttypesSynopsisHist, + link_table, + [ + TapplicanttypesSynopsisHist.at_syn_id + == LinkOpportunitySummaryApplicantType.legacy_applicant_type_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryApplicantType.opportunity_summary_id, + ], + is_forecast=False, + is_historical_table=True, + relationship_load_value=relationship_load_value, + ) + self.process_link_applicant_types_group(synopsis_applicant_type_hist_records) + + def process_link_applicant_types_group( + self, + records: Sequence[ + Tuple[ + transform_constants.SourceApplicantType, + LinkOpportunitySummaryApplicantType | None, + OpportunitySummary | None, + ] + ], + ) -> None: + for source_applicant_type, target_applicant_type, opportunity_summary in records: + try: + self.process_link_applicant_type( + source_applicant_type, target_applicant_type, opportunity_summary + ) + except ValueError: + self.increment( + transform_constants.Metrics.TOTAL_ERROR_COUNT, + prefix=transform_constants.APPLICANT_TYPE, + ) + logger.exception( + "Failed to process opportunity summary applicant type", + extra=transform_util.get_log_extra_applicant_type(source_applicant_type), + ) + + def process_link_applicant_type( + self, + source_applicant_type: transform_constants.SourceApplicantType, + target_applicant_type: LinkOpportunitySummaryApplicantType | None, + opportunity_summary: OpportunitySummary | None, + ) -> None: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_PROCESSED, + prefix=transform_constants.APPLICANT_TYPE, + ) + extra = transform_util.get_log_extra_applicant_type(source_applicant_type) + logger.info("Processing applicant type", extra=extra) + + if source_applicant_type.is_deleted: + self._handle_delete( + source_applicant_type, + target_applicant_type, + transform_constants.APPLICANT_TYPE, + extra, + ) + + # Historical records are linked to other historical records, however + # we don't import historical opportunity records, so if the opportunity + # was deleted, we won't have created the opportunity summary. Whenever we do + # support historical opportunities, we'll have these all marked with a + # flag that we can use to reprocess these. + elif self._is_orphaned_historical(opportunity_summary, source_applicant_type): + self._handle_orphaned_historical( + source_applicant_type, transform_constants.APPLICANT_TYPE, extra + ) + + elif opportunity_summary is None: + # This shouldn't be possible as the incoming data has foreign keys, but as a safety net + # we'll make sure the opportunity actually exists + raise ValueError( + "Applicant type record cannot be processed as the opportunity summary for it does not exist" + ) + else: + # To avoid incrementing metrics for records we fail to transform, record + # here whether it's an insert/update and we'll increment after transforming + is_insert = target_applicant_type is None + + logger.info("Transforming and upserting applicant type", extra=extra) + transformed_applicant_type = transform_util.convert_opportunity_summary_applicant_type( + source_applicant_type, target_applicant_type, opportunity_summary + ) + + # Before we insert, we have to still be certain we're not adding a duplicate record + # because the primary key of the legacy tables is the legacy ID + lookup value + opportunity ID + # its possible for the same lookup value to appear multiple times because the legacy ID is different + # This would hit a conflict in our DBs primary key, so we need to verify that won't happen + if ( + is_insert + and transformed_applicant_type.applicant_type in opportunity_summary.applicant_types + ): + self.increment( + transform_constants.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED, + prefix=transform_constants.APPLICANT_TYPE, + ) + logger.warning( + "Skipping applicant type record", + extra=extra | {"applicant_type": transformed_applicant_type.applicant_type}, + ) + elif is_insert: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_INSERTED, + prefix=transform_constants.APPLICANT_TYPE, + ) + # We append to the relationship so SQLAlchemy immediately attaches it to its cached + # opportunity summary object so that the above check works when we receive dupes in the same batch + opportunity_summary.link_applicant_types.append(transformed_applicant_type) + else: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_UPDATED, + prefix=transform_constants.APPLICANT_TYPE, + ) + self.db_session.merge(transformed_applicant_type) + + logger.info("Processed applicant type", extra=extra) + source_applicant_type.transformed_at = self.transform_time diff --git a/api/src/data_migration/transformation/subtask/transform_assistance_listing.py b/api/src/data_migration/transformation/subtask/transform_assistance_listing.py new file mode 100644 index 000000000..c429c5146 --- /dev/null +++ b/api/src/data_migration/transformation/subtask/transform_assistance_listing.py @@ -0,0 +1,113 @@ +import logging +from typing import Tuple + +import src.data_migration.transformation.transform_constants as transform_constants +import src.data_migration.transformation.transform_util as transform_util +from src.data_migration.transformation.subtask.abstract_transform_subtask import ( + AbstractTransformSubTask, +) +from src.db.models.opportunity_models import Opportunity, OpportunityAssistanceListing +from src.db.models.staging.opportunity import TopportunityCfda + +logger = logging.getLogger(__name__) + + +class TransformAssistanceListing(AbstractTransformSubTask): + def transform_records(self) -> None: + assistance_listings: list[ + Tuple[TopportunityCfda, OpportunityAssistanceListing | None, Opportunity | None] + ] = self.fetch_with_opportunity( + TopportunityCfda, + OpportunityAssistanceListing, + [ + TopportunityCfda.opp_cfda_id + == OpportunityAssistanceListing.opportunity_assistance_listing_id + ], + ) + + for ( + source_assistance_listing, + target_assistance_listing, + opportunity, + ) in assistance_listings: + try: + self.process_assistance_listing( + source_assistance_listing, target_assistance_listing, opportunity + ) + except ValueError: + self.increment( + transform_constants.Metrics.TOTAL_ERROR_COUNT, + prefix=transform_constants.ASSISTANCE_LISTING, + ) + logger.exception( + "Failed to process assistance listing", + extra={ + "opportunity_assistance_listing_id": source_assistance_listing.opp_cfda_id + }, + ) + + def process_assistance_listing( + self, + source_assistance_listing: TopportunityCfda, + target_assistance_listing: OpportunityAssistanceListing | None, + opportunity: Opportunity | None, + ) -> None: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_PROCESSED, + prefix=transform_constants.ASSISTANCE_LISTING, + ) + extra = { + "opportunity_assistance_listing_id": source_assistance_listing.opp_cfda_id, + "opportunity_id": source_assistance_listing.opportunity_id, + } + logger.info("Processing assistance listing", extra=extra) + + if source_assistance_listing.is_deleted: + self._handle_delete( + source_assistance_listing, + target_assistance_listing, + transform_constants.ASSISTANCE_LISTING, + extra, + ) + + elif opportunity is None: + # The Oracle system we're importing these from does not have a foreign key between + # the opportunity ID in the TOPPORTUNITY_CFDA table and the TOPPORTUNITY table. + # There are many (2306 as of writing) orphaned CFDA records, created between 2007 and 2011 + # We don't want to continuously process these, so won't error for these, and will just + # mark them as transformed below. + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_ORPHANED, + prefix=transform_constants.ASSISTANCE_LISTING, + ) + logger.info( + "Assistance listing is orphaned and does not connect to any opportunity", + extra=extra, + ) + source_assistance_listing.transformation_notes = transform_constants.ORPHANED_CFDA + + else: + # To avoid incrementing metrics for records we fail to transform, record + # here whether it's an insert/update and we'll increment after transforming + is_insert = target_assistance_listing is None + + logger.info("Transforming and upserting assistance listing", extra=extra) + transformed_assistance_listing = transform_util.transform_assistance_listing( + source_assistance_listing, target_assistance_listing + ) + + if is_insert: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_INSERTED, + prefix=transform_constants.ASSISTANCE_LISTING, + ) + self.db_session.add(transformed_assistance_listing) + else: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_UPDATED, + prefix=transform_constants.ASSISTANCE_LISTING, + ) + self.db_session.merge(transformed_assistance_listing) + + logger.info("Processed assistance listing", extra=extra) + source_assistance_listing.transformed_at = self.transform_time diff --git a/api/src/data_migration/transformation/subtask/transform_funding_category.py b/api/src/data_migration/transformation/subtask/transform_funding_category.py new file mode 100644 index 000000000..70461cea2 --- /dev/null +++ b/api/src/data_migration/transformation/subtask/transform_funding_category.py @@ -0,0 +1,196 @@ +import logging +from typing import Sequence, Tuple + +import src.data_migration.transformation.transform_constants as transform_constants +import src.data_migration.transformation.transform_util as transform_util +from src.data_migration.transformation.subtask.abstract_transform_subtask import ( + AbstractTransformSubTask, +) +from src.db.models.opportunity_models import ( + LinkOpportunitySummaryFundingCategory, + OpportunitySummary, +) +from src.db.models.staging.forecast import TfundactcatForecast, TfundactcatForecastHist +from src.db.models.staging.synopsis import TfundactcatSynopsis, TfundactcatSynopsisHist + +logger = logging.getLogger(__name__) + + +class TransformFundingCategory(AbstractTransformSubTask): + def transform_records(self) -> None: + link_table = LinkOpportunitySummaryFundingCategory + relationship_load_value = OpportunitySummary.link_funding_categories + + logger.info("Processing forecast funding categories") + forecast_funding_category_records = self.fetch_with_opportunity_summary( + TfundactcatForecast, + link_table, + [ + TfundactcatForecast.fac_frcst_id + == LinkOpportunitySummaryFundingCategory.legacy_funding_category_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryFundingCategory.opportunity_summary_id, + ], + is_forecast=True, + is_historical_table=False, + relationship_load_value=relationship_load_value, + ) + self.process_link_funding_categories_group(forecast_funding_category_records) + + logger.info("Processing historical forecast funding categories") + forecast_funding_category_hist_records = self.fetch_with_opportunity_summary( + TfundactcatForecastHist, + link_table, + [ + TfundactcatForecastHist.fac_frcst_id + == LinkOpportunitySummaryFundingCategory.legacy_funding_category_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryFundingCategory.opportunity_summary_id, + ], + is_forecast=True, + is_historical_table=True, + relationship_load_value=relationship_load_value, + ) + self.process_link_funding_categories_group(forecast_funding_category_hist_records) + + logger.info("Processing synopsis funding categories") + synopsis_funding_category_records = self.fetch_with_opportunity_summary( + TfundactcatSynopsis, + link_table, + [ + TfundactcatSynopsis.fac_syn_id + == LinkOpportunitySummaryFundingCategory.legacy_funding_category_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryFundingCategory.opportunity_summary_id, + ], + is_forecast=False, + is_historical_table=False, + relationship_load_value=relationship_load_value, + ) + self.process_link_funding_categories_group(synopsis_funding_category_records) + + logger.info("Processing historical synopsis funding categories") + synopsis_funding_category_hist_records = self.fetch_with_opportunity_summary( + TfundactcatSynopsisHist, + link_table, + [ + TfundactcatSynopsisHist.fac_syn_id + == LinkOpportunitySummaryFundingCategory.legacy_funding_category_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryFundingCategory.opportunity_summary_id, + ], + is_forecast=False, + is_historical_table=True, + relationship_load_value=relationship_load_value, + ) + self.process_link_funding_categories_group(synopsis_funding_category_hist_records) + + def process_link_funding_categories_group( + self, + records: Sequence[ + Tuple[ + transform_constants.SourceFundingCategory, + LinkOpportunitySummaryFundingCategory | None, + OpportunitySummary | None, + ] + ], + ) -> None: + for source_funding_category, target_funding_category, opportunity_summary in records: + try: + self.process_link_funding_category( + source_funding_category, target_funding_category, opportunity_summary + ) + except ValueError: + self.increment( + transform_constants.Metrics.TOTAL_ERROR_COUNT, + prefix=transform_constants.FUNDING_CATEGORY, + ) + logger.exception( + "Failed to process opportunity summary funding category", + extra=transform_util.get_log_extra_funding_category(source_funding_category), + ) + + def process_link_funding_category( + self, + source_funding_category: transform_constants.SourceFundingCategory, + target_funding_category: LinkOpportunitySummaryFundingCategory | None, + opportunity_summary: OpportunitySummary | None, + ) -> None: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_PROCESSED, + prefix=transform_constants.FUNDING_CATEGORY, + ) + extra = transform_util.get_log_extra_funding_category(source_funding_category) + logger.info("Processing funding category", extra=extra) + + if source_funding_category.is_deleted: + self._handle_delete( + source_funding_category, + target_funding_category, + transform_constants.FUNDING_CATEGORY, + extra, + ) + + # Historical records are linked to other historical records, however + # we don't import historical opportunity records, so if the opportunity + # was deleted, we won't have created the opportunity summary. Whenever we do + # support historical opportunities, we'll have these all marked with a + # flag that we can use to reprocess these. + elif self._is_orphaned_historical(opportunity_summary, source_funding_category): + self._handle_orphaned_historical( + source_funding_category, transform_constants.FUNDING_CATEGORY, extra + ) + + elif opportunity_summary is None: + # This shouldn't be possible as the incoming data has foreign keys, but as a safety net + # we'll make sure the opportunity actually exists + raise ValueError( + "Funding category record cannot be processed as the opportunity summary for it does not exist" + ) + else: + # To avoid incrementing metrics for records we fail to transform, record + # here whether it's an insert/update and we'll increment after transforming + is_insert = target_funding_category is None + + logger.info("Transforming and upserting funding category", extra=extra) + transformed_funding_category = ( + transform_util.convert_opportunity_summary_funding_category( + source_funding_category, target_funding_category, opportunity_summary + ) + ) + + # Before we insert, we have to still be certain we're not adding a duplicate record + # because the primary key of the legacy tables is the legacy ID + lookup value + opportunity ID + # its possible for the same lookup value to appear multiple times because the legacy ID is different + # This would hit a conflict in our DBs primary key, so we need to verify that won't happen + if ( + is_insert + and transformed_funding_category.funding_category + in opportunity_summary.funding_categories + ): + self.increment( + transform_constants.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED, + prefix=transform_constants.FUNDING_CATEGORY, + ) + logger.warning( + "Skipping funding category record", + extra=extra + | {"funding_category": transformed_funding_category.funding_category}, + ) + elif is_insert: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_INSERTED, + prefix=transform_constants.FUNDING_CATEGORY, + ) + # We append to the relationship so SQLAlchemy immediately attaches it to its cached + # opportunity summary object so that the above check works when we receive dupes in the same batch + opportunity_summary.link_funding_categories.append(transformed_funding_category) + else: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_UPDATED, + prefix=transform_constants.FUNDING_CATEGORY, + ) + self.db_session.merge(transformed_funding_category) + + logger.info("Processed funding category", extra=extra) + source_funding_category.transformed_at = self.transform_time diff --git a/api/src/data_migration/transformation/subtask/transform_funding_instrument.py b/api/src/data_migration/transformation/subtask/transform_funding_instrument.py new file mode 100644 index 000000000..9c15161e1 --- /dev/null +++ b/api/src/data_migration/transformation/subtask/transform_funding_instrument.py @@ -0,0 +1,199 @@ +import logging +from typing import Sequence, Tuple + +import src.data_migration.transformation.transform_constants as transform_constants +import src.data_migration.transformation.transform_util as transform_util +from src.data_migration.transformation.subtask.abstract_transform_subtask import ( + AbstractTransformSubTask, +) +from src.db.models.opportunity_models import ( + LinkOpportunitySummaryFundingInstrument, + OpportunitySummary, +) +from src.db.models.staging.forecast import TfundinstrForecast, TfundinstrForecastHist +from src.db.models.staging.synopsis import TfundinstrSynopsis, TfundinstrSynopsisHist + +logger = logging.getLogger(__name__) + + +class TransformFundingInstrument(AbstractTransformSubTask): + def transform_records(self) -> None: + link_table = LinkOpportunitySummaryFundingInstrument + relationship_load_value = OpportunitySummary.link_funding_instruments + + logger.info("Processing forecast funding instruments") + forecast_funding_instrument_records = self.fetch_with_opportunity_summary( + TfundinstrForecast, + link_table, + [ + TfundinstrForecast.fi_frcst_id + == LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryFundingInstrument.opportunity_summary_id, + ], + is_forecast=True, + is_historical_table=False, + relationship_load_value=relationship_load_value, + ) + self.process_link_funding_instruments_group(forecast_funding_instrument_records) + + logger.info("Processing historical forecast funding instruments") + forecast_funding_instrument_hist_records = self.fetch_with_opportunity_summary( + TfundinstrForecastHist, + link_table, + [ + TfundinstrForecastHist.fi_frcst_id + == LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryFundingInstrument.opportunity_summary_id, + ], + is_forecast=True, + is_historical_table=True, + relationship_load_value=relationship_load_value, + ) + self.process_link_funding_instruments_group(forecast_funding_instrument_hist_records) + + logger.info("Processing synopsis funding instruments") + synopsis_funding_instrument_records = self.fetch_with_opportunity_summary( + TfundinstrSynopsis, + link_table, + [ + TfundinstrSynopsis.fi_syn_id + == LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryFundingInstrument.opportunity_summary_id, + ], + is_forecast=False, + is_historical_table=False, + relationship_load_value=relationship_load_value, + ) + self.process_link_funding_instruments_group(synopsis_funding_instrument_records) + + logger.info("Processing historical synopsis funding instruments") + synopsis_funding_instrument_hist_records = self.fetch_with_opportunity_summary( + TfundinstrSynopsisHist, + link_table, + [ + TfundinstrSynopsisHist.fi_syn_id + == LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryFundingInstrument.opportunity_summary_id, + ], + is_forecast=False, + is_historical_table=True, + relationship_load_value=relationship_load_value, + ) + self.process_link_funding_instruments_group(synopsis_funding_instrument_hist_records) + + def process_link_funding_instruments_group( + self, + records: Sequence[ + Tuple[ + transform_constants.SourceFundingInstrument, + LinkOpportunitySummaryFundingInstrument | None, + OpportunitySummary | None, + ] + ], + ) -> None: + for source_funding_instrument, target_funding_instrument, opportunity_summary in records: + try: + self.process_link_funding_instrument( + source_funding_instrument, target_funding_instrument, opportunity_summary + ) + except ValueError: + self.increment( + transform_constants.Metrics.TOTAL_ERROR_COUNT, + prefix=transform_constants.FUNDING_INSTRUMENT, + ) + logger.exception( + "Failed to process opportunity summary funding instrument", + extra=transform_util.get_log_extra_funding_instrument( + source_funding_instrument + ), + ) + + def process_link_funding_instrument( + self, + source_funding_instrument: transform_constants.SourceFundingInstrument, + target_funding_instrument: LinkOpportunitySummaryFundingInstrument | None, + opportunity_summary: OpportunitySummary | None, + ) -> None: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_PROCESSED, + prefix=transform_constants.FUNDING_INSTRUMENT, + ) + extra = transform_util.get_log_extra_funding_instrument(source_funding_instrument) + logger.info("Processing funding instrument", extra=extra) + + if source_funding_instrument.is_deleted: + self._handle_delete( + source_funding_instrument, + target_funding_instrument, + transform_constants.FUNDING_INSTRUMENT, + extra, + ) + + # Historical records are linked to other historical records, however + # we don't import historical opportunity records, so if the opportunity + # was deleted, we won't have created the opportunity summary. Whenever we do + # support historical opportunities, we'll have these all marked with a + # flag that we can use to reprocess these. + elif self._is_orphaned_historical(opportunity_summary, source_funding_instrument): + self._handle_orphaned_historical( + source_funding_instrument, transform_constants.FUNDING_INSTRUMENT, extra + ) + + elif opportunity_summary is None: + # This shouldn't be possible as the incoming data has foreign keys, but as a safety net + # we'll make sure the opportunity actually exists + raise ValueError( + "Funding instrument record cannot be processed as the opportunity summary for it does not exist" + ) + + else: + # To avoid incrementing metrics for records we fail to transform, record + # here whether it's an insert/update and we'll increment after transforming + is_insert = target_funding_instrument is None + + logger.info("Transforming and upserting funding instrument", extra=extra) + transformed_funding_instrument = ( + transform_util.convert_opportunity_summary_funding_instrument( + source_funding_instrument, target_funding_instrument, opportunity_summary + ) + ) + + # Before we insert, we have to still be certain we're not adding a duplicate record + # because the primary key of the legacy tables is the legacy ID + lookup value + opportunity ID + # its possible for the same lookup value to appear multiple times because the legacy ID is different + # This would hit a conflict in our DBs primary key, so we need to verify that won't happen + if ( + is_insert + and transformed_funding_instrument.funding_instrument + in opportunity_summary.funding_instruments + ): + self.increment( + transform_constants.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED, + prefix=transform_constants.FUNDING_INSTRUMENT, + ) + logger.warning( + "Skipping funding instrument record", + extra=extra + | {"funding_instrument": transformed_funding_instrument.funding_instrument}, + ) + elif is_insert: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_INSERTED, + prefix=transform_constants.FUNDING_INSTRUMENT, + ) + # We append to the relationship so SQLAlchemy immediately attaches it to its cached + # opportunity summary object so that the above check works when we receive dupes in the same batch + opportunity_summary.link_funding_instruments.append(transformed_funding_instrument) + else: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_UPDATED, + prefix=transform_constants.FUNDING_INSTRUMENT, + ) + self.db_session.merge(transformed_funding_instrument) + + logger.info("Processed funding instrument", extra=extra) + source_funding_instrument.transformed_at = self.transform_time diff --git a/api/src/data_migration/transformation/subtask/transform_opportunity.py b/api/src/data_migration/transformation/subtask/transform_opportunity.py new file mode 100644 index 000000000..4a354b542 --- /dev/null +++ b/api/src/data_migration/transformation/subtask/transform_opportunity.py @@ -0,0 +1,81 @@ +import logging +from typing import Tuple + +import src.data_migration.transformation.transform_constants as transform_constants +import src.data_migration.transformation.transform_util as transform_util +from src.data_migration.transformation.subtask.abstract_transform_subtask import ( + AbstractTransformSubTask, +) +from src.db.models.opportunity_models import Opportunity +from src.db.models.staging.opportunity import Topportunity + +logger = logging.getLogger(__name__) + + +class TransformOpportunity(AbstractTransformSubTask): + def transform_records(self) -> None: + # Fetch all opportunities that were modified + # Alongside that, grab the existing opportunity record + opportunities: list[Tuple[Topportunity, Opportunity | None]] = self.fetch( + Topportunity, + Opportunity, + [Topportunity.opportunity_id == Opportunity.opportunity_id], + ) + + for source_opportunity, target_opportunity in opportunities: + try: + self.process_opportunity(source_opportunity, target_opportunity) + except ValueError: + self.increment( + transform_constants.Metrics.TOTAL_ERROR_COUNT, + prefix=transform_constants.OPPORTUNITY, + ) + logger.exception( + "Failed to process opportunity", + extra={"opportunity_id": source_opportunity.opportunity_id}, + ) + + def process_opportunity( + self, source_opportunity: Topportunity, target_opportunity: Opportunity | None + ) -> None: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_PROCESSED, + prefix=transform_constants.OPPORTUNITY, + ) + extra = {"opportunity_id": source_opportunity.opportunity_id} + logger.info("Processing opportunity", extra=extra) + + if source_opportunity.is_deleted: + self._handle_delete( + source_opportunity, + target_opportunity, + transform_constants.OPPORTUNITY, + extra, + error_on_missing_target=True, + ) + + else: + # To avoid incrementing metrics for records we fail to transform, record + # here whether it's an insert/update and we'll increment after transforming + is_insert = target_opportunity is None + + logger.info("Transforming and upserting opportunity", extra=extra) + transformed_opportunity = transform_util.transform_opportunity( + source_opportunity, target_opportunity + ) + + if is_insert: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_INSERTED, + prefix=transform_constants.OPPORTUNITY, + ) + self.db_session.add(transformed_opportunity) + else: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_UPDATED, + prefix=transform_constants.OPPORTUNITY, + ) + self.db_session.merge(transformed_opportunity) + + logger.info("Processed opportunity", extra=extra) + source_opportunity.transformed_at = self.transform_time diff --git a/api/src/data_migration/transformation/subtask/transform_opportunity_summary.py b/api/src/data_migration/transformation/subtask/transform_opportunity_summary.py new file mode 100644 index 000000000..d6385c15b --- /dev/null +++ b/api/src/data_migration/transformation/subtask/transform_opportunity_summary.py @@ -0,0 +1,145 @@ +import logging +from typing import Sequence, Tuple + +import src.data_migration.transformation.transform_constants as transform_constants +import src.data_migration.transformation.transform_util as transform_util +from src.data_migration.transformation.subtask.abstract_transform_subtask import ( + AbstractTransformSubTask, +) +from src.db.models.opportunity_models import Opportunity, OpportunitySummary +from src.db.models.staging.forecast import Tforecast, TforecastHist +from src.db.models.staging.synopsis import Tsynopsis, TsynopsisHist + +logger = logging.getLogger(__name__) + + +class TransformOpportunitySummary(AbstractTransformSubTask): + def transform_records(self) -> None: + logger.info("Processing opportunity summaries") + logger.info("Processing synopsis records") + synopsis_records = self.fetch_with_opportunity( + Tsynopsis, + OpportunitySummary, + [ + Tsynopsis.opportunity_id == OpportunitySummary.opportunity_id, + OpportunitySummary.is_forecast.is_(False), + OpportunitySummary.revision_number.is_(None), + ], + ) + self.process_opportunity_summary_group(synopsis_records) + + logger.info("Processing synopsis hist records") + synopsis_hist_records = self.fetch_with_opportunity( + TsynopsisHist, + OpportunitySummary, + [ + TsynopsisHist.opportunity_id == OpportunitySummary.opportunity_id, + TsynopsisHist.revision_number == OpportunitySummary.revision_number, + OpportunitySummary.is_forecast.is_(False), + ], + ) + self.process_opportunity_summary_group(synopsis_hist_records) + + logger.info("Processing forecast records") + forecast_records = self.fetch_with_opportunity( + Tforecast, + OpportunitySummary, + [ + Tforecast.opportunity_id == OpportunitySummary.opportunity_id, + OpportunitySummary.is_forecast.is_(True), + OpportunitySummary.revision_number.is_(None), + ], + ) + self.process_opportunity_summary_group(forecast_records) + + logger.info("Processing forecast hist records") + forecast_hist_records = self.fetch_with_opportunity( + TforecastHist, + OpportunitySummary, + [ + TforecastHist.opportunity_id == OpportunitySummary.opportunity_id, + TforecastHist.revision_number == OpportunitySummary.revision_number, + OpportunitySummary.is_forecast.is_(True), + ], + ) + self.process_opportunity_summary_group(forecast_hist_records) + + def process_opportunity_summary_group( + self, + records: Sequence[ + Tuple[transform_constants.SourceSummary, OpportunitySummary | None, Opportunity | None] + ], + ) -> None: + for source_summary, target_summary, opportunity in records: + try: + self.process_opportunity_summary(source_summary, target_summary, opportunity) + except ValueError: + self.increment( + transform_constants.Metrics.TOTAL_ERROR_COUNT, + prefix=transform_constants.OPPORTUNITY_SUMMARY, + ) + logger.exception( + "Failed to process opportunity summary", + extra=transform_util.get_log_extra_summary(source_summary), + ) + + def process_opportunity_summary( + self, + source_summary: transform_constants.SourceSummary, + target_summary: OpportunitySummary | None, + opportunity: Opportunity | None, + ) -> None: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_PROCESSED, + prefix=transform_constants.OPPORTUNITY_SUMMARY, + ) + extra = transform_util.get_log_extra_summary(source_summary) + logger.info("Processing opportunity summary", extra=extra) + + if source_summary.is_deleted: + self._handle_delete( + source_summary, target_summary, transform_constants.OPPORTUNITY_SUMMARY, extra + ) + + # Historical records are linked to other historical records, however + # we don't import historical opportunity records, so if the opportunity + # was deleted, we don't have anything to link these to. Whenever we do + # support historical opportunities, we'll have these all marked with a + # flag that we can use to reprocess these. + elif self._is_orphaned_historical(opportunity, source_summary): + self._handle_orphaned_historical( + source_summary, transform_constants.OPPORTUNITY_SUMMARY, extra + ) + + elif opportunity is None: + # This shouldn't be possible as the incoming data has foreign keys, but as a safety net + # we'll make sure the opportunity actually exists + raise ValueError( + "Opportunity summary cannot be processed as the opportunity for it does not exist" + ) + + else: + # To avoid incrementing metrics for records we fail to transform, record + # here whether it's an insert/update and we'll increment after transforming + is_insert = target_summary is None + + logger.info("Transforming and upserting opportunity summary", extra=extra) + transformed_opportunity_summary = transform_util.transform_opportunity_summary( + source_summary, target_summary + ) + + if is_insert: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_INSERTED, + prefix=transform_constants.OPPORTUNITY_SUMMARY, + ) + self.db_session.add(transformed_opportunity_summary) + else: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_UPDATED, + prefix=transform_constants.OPPORTUNITY_SUMMARY, + ) + self.db_session.merge(transformed_opportunity_summary) + + logger.info("Processed opportunity summary", extra=extra) + source_summary.transformed_at = self.transform_time diff --git a/api/src/data_migration/transformation/transform_constants.py b/api/src/data_migration/transformation/transform_constants.py new file mode 100644 index 000000000..9d50e2069 --- /dev/null +++ b/api/src/data_migration/transformation/transform_constants.py @@ -0,0 +1,75 @@ +from enum import StrEnum +from typing import TypeAlias, TypeVar + +from src.db.models.base import ApiSchemaTable +from src.db.models.staging.forecast import ( + TapplicanttypesForecast, + TapplicanttypesForecastHist, + Tforecast, + TforecastHist, + TfundactcatForecast, + TfundactcatForecastHist, + TfundinstrForecast, + TfundinstrForecastHist, +) +from src.db.models.staging.staging_base import StagingParamMixin +from src.db.models.staging.synopsis import ( + TapplicanttypesSynopsis, + TapplicanttypesSynopsisHist, + TfundactcatSynopsis, + TfundactcatSynopsisHist, + TfundinstrSynopsis, + TfundinstrSynopsisHist, + Tsynopsis, + TsynopsisHist, +) + +ORPHANED_CFDA = "orphaned_cfda" +ORPHANED_HISTORICAL_RECORD = "orphaned_historical_record" +ORPHANED_DELETE_RECORD = "orphaned_delete_record" + +OPPORTUNITY = "opportunity" +ASSISTANCE_LISTING = "assistance_listing" +OPPORTUNITY_SUMMARY = "opportunity_summary" +APPLICANT_TYPE = "applicant_type" +FUNDING_CATEGORY = "funding_category" +FUNDING_INSTRUMENT = "funding_instrument" + + +class Metrics(StrEnum): + TOTAL_RECORDS_PROCESSED = "total_records_processed" + TOTAL_RECORDS_DELETED = "total_records_deleted" + TOTAL_RECORDS_INSERTED = "total_records_inserted" + TOTAL_RECORDS_UPDATED = "total_records_updated" + TOTAL_RECORDS_ORPHANED = "total_records_orphaned" + TOTAL_DUPLICATE_RECORDS_SKIPPED = "total_duplicate_records_skipped" + TOTAL_HISTORICAL_ORPHANS_SKIPPED = "total_historical_orphans_skipped" + TOTAL_DELETE_ORPHANS_SKIPPED = "total_delete_orphans_skipped" + + TOTAL_ERROR_COUNT = "total_error_count" + + +S = TypeVar("S", bound=StagingParamMixin) +D = TypeVar("D", bound=ApiSchemaTable) + + +SourceSummary: TypeAlias = Tforecast | Tsynopsis | TforecastHist | TsynopsisHist + +SourceApplicantType: TypeAlias = ( + TapplicanttypesForecast + | TapplicanttypesForecastHist + | TapplicanttypesSynopsis + | TapplicanttypesSynopsisHist +) + +SourceFundingCategory: TypeAlias = ( + TfundactcatForecast | TfundactcatForecastHist | TfundactcatSynopsis | TfundactcatSynopsisHist +) + +SourceFundingInstrument: TypeAlias = ( + TfundinstrForecastHist | TfundinstrForecast | TfundinstrSynopsisHist | TfundinstrSynopsis +) + +SourceAny: TypeAlias = ( + SourceSummary | SourceApplicantType | SourceFundingCategory | SourceFundingInstrument +) diff --git a/api/src/data_migration/transformation/transform_oracle_data_task.py b/api/src/data_migration/transformation/transform_oracle_data_task.py index 4018f0767..ed5f33a3c 100644 --- a/api/src/data_migration/transformation/transform_oracle_data_task.py +++ b/api/src/data_migration/transformation/transform_oracle_data_task.py @@ -1,931 +1,78 @@ import logging from datetime import datetime -from enum import StrEnum -from typing import Any, Sequence, Tuple, Type, TypeVar, cast -from sqlalchemy import and_, select -from sqlalchemy.orm import selectinload +from pydantic_settings import SettingsConfigDict +import src.data_migration.transformation.transform_constants as transform_constants from src.adapters import db -from src.data_migration.transformation import transform_util -from src.db.models.base import ApiSchemaTable -from src.db.models.opportunity_models import ( - LinkOpportunitySummaryApplicantType, - LinkOpportunitySummaryFundingCategory, - LinkOpportunitySummaryFundingInstrument, - Opportunity, - OpportunityAssistanceListing, - OpportunitySummary, +from src.data_migration.transformation.subtask.transform_applicant_type import ( + TransformApplicantType, ) -from src.db.models.staging.forecast import ( - TapplicanttypesForecast, - TapplicanttypesForecastHist, - Tforecast, - TforecastHist, - TfundactcatForecast, - TfundactcatForecastHist, - TfundinstrForecast, - TfundinstrForecastHist, +from src.data_migration.transformation.subtask.transform_assistance_listing import ( + TransformAssistanceListing, ) -from src.db.models.staging.opportunity import Topportunity, TopportunityCfda -from src.db.models.staging.staging_base import StagingParamMixin -from src.db.models.staging.synopsis import ( - TapplicanttypesSynopsis, - TapplicanttypesSynopsisHist, - TfundactcatSynopsis, - TfundactcatSynopsisHist, - TfundinstrSynopsis, - TfundinstrSynopsisHist, - Tsynopsis, - TsynopsisHist, +from src.data_migration.transformation.subtask.transform_funding_category import ( + TransformFundingCategory, +) +from src.data_migration.transformation.subtask.transform_funding_instrument import ( + TransformFundingInstrument, +) +from src.data_migration.transformation.subtask.transform_opportunity import TransformOpportunity +from src.data_migration.transformation.subtask.transform_opportunity_summary import ( + TransformOpportunitySummary, ) from src.task.task import Task from src.util import datetime_util - -from . import SourceApplicantType, SourceFundingCategory, SourceFundingInstrument, SourceSummary - -S = TypeVar("S", bound=StagingParamMixin) -D = TypeVar("D", bound=ApiSchemaTable) +from src.util.env_config import PydanticBaseEnvConfig logger = logging.getLogger(__name__) -### Constants -ORPHANED_CFDA = "orphaned_cfda" -ORPHANED_HISTORICAL_RECORD = "orphaned_historical_record" -ORPHANED_DELETE_RECORD = "orphaned_delete_record" -OPPORTUNITY = "opportunity" -ASSISTANCE_LISTING = "assistance_listing" -OPPORTUNITY_SUMMARY = "opportunity_summary" -APPLICANT_TYPE = "applicant_type" -FUNDING_CATEGORY = "funding_category" -FUNDING_INSTRUMENT = "funding_instrument" +class TransformOracleDataTaskConfig(PydanticBaseEnvConfig): + model_config = SettingsConfigDict(env_prefix="TRANSFORM_ORACLE_DATA_") + enable_opportunity: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_OPPORTUNITY + enable_assistance_listing: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_ASSISTANCE_LISTING + enable_opportunity_summary: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_OPPORTUNITY_SUMMARY + enable_applicant_type: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_APPLICANT_TYPE + enable_funding_category: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_FUNDING_CATEGORY + enable_funding_instrument: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_FUNDING_INSTRUMENT -class TransformOracleDataTask(Task): - class Metrics(StrEnum): - TOTAL_RECORDS_PROCESSED = "total_records_processed" - TOTAL_RECORDS_DELETED = "total_records_deleted" - TOTAL_RECORDS_INSERTED = "total_records_inserted" - TOTAL_RECORDS_UPDATED = "total_records_updated" - TOTAL_RECORDS_ORPHANED = "total_records_orphaned" - TOTAL_DUPLICATE_RECORDS_SKIPPED = "total_duplicate_records_skipped" - TOTAL_HISTORICAL_ORPHANS_SKIPPED = "total_historical_orphans_skipped" - TOTAL_DELETE_ORPHANS_SKIPPED = "total_delete_orphans_skipped" - TOTAL_ERROR_COUNT = "total_error_count" +class TransformOracleDataTask(Task): + Metrics = transform_constants.Metrics - def __init__(self, db_session: db.Session, transform_time: datetime | None = None) -> None: + def __init__( + self, + db_session: db.Session, + transform_time: datetime | None = None, + transform_config: TransformOracleDataTaskConfig | None = None, + ) -> None: super().__init__(db_session) if transform_time is None: transform_time = datetime_util.utcnow() self.transform_time = transform_time - def run_task(self) -> None: - with self.db_session.begin(): - # Opportunities - self.process_opportunities() - - # Assistance Listings - self.process_assistance_listings() - - # Opportunity Summary - self.process_opportunity_summaries() - - # One-to-many lookups - self.process_link_applicant_types() - self.process_link_funding_categories() - self.process_link_funding_instruments() - - def _handle_delete( - self, - source: S, - target: D | None, - record_type: str, - extra: dict, - error_on_missing_target: bool = False, - ) -> None: - # If the target we want to delete is None, we have nothing to delete - if target is None: - # In some scenarios we want to error when this happens - if error_on_missing_target: - raise ValueError("Cannot delete %s record as it does not exist" % record_type) - - # In a lot of scenarios, we actually just want to log a message as it is expected to happen - # For example, if we are deleting an opportunity_summary record, and already deleted the opportunity, - # then SQLAlchemy would have deleted the opportunity_summary for us already. When we later go to delete - # it, we'd hit this case, which isn't a problem. - logger.info("Cannot delete %s record as it does not exist", record_type, extra=extra) - source.transformation_notes = ORPHANED_DELETE_RECORD - self.increment(self.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED, prefix=record_type) - return - - logger.info("Deleting %s record", record_type, extra=extra) - self.increment(self.Metrics.TOTAL_RECORDS_DELETED, prefix=record_type) - self.db_session.delete(target) - - def fetch( - self, source_model: Type[S], destination_model: Type[D], join_clause: Sequence - ) -> list[Tuple[S, D | None]]: - # The real type is: Sequence[Row[Tuple[S, D | None]]] - # but MyPy is weird about this and the Row+Tuple causes some - # confusion in the parsing so it ends up assuming everything is Any - # So just cast it to a simpler type that doesn't confuse anything - return cast( - list[Tuple[S, D | None]], - self.db_session.execute( - select(source_model, destination_model) - .join(destination_model, and_(*join_clause), isouter=True) - .where(source_model.transformed_at.is_(None)) - .execution_options(yield_per=5000) - ), - ) - - def fetch_with_opportunity( - self, source_model: Type[S], destination_model: Type[D], join_clause: Sequence - ) -> list[Tuple[S, D | None, Opportunity | None]]: - # Similar to the above fetch function, but also grabs an opportunity record - # Note that this requires your source_model to have an opportunity_id field defined. - - return cast( - list[Tuple[S, D | None, Opportunity | None]], - self.db_session.execute( - select(source_model, destination_model, Opportunity) - .join(destination_model, and_(*join_clause), isouter=True) - .join( - Opportunity, - source_model.opportunity_id == Opportunity.opportunity_id, # type: ignore[attr-defined] - isouter=True, - ) - .where(source_model.transformed_at.is_(None)) - .execution_options(yield_per=5000) - ), - ) - - def fetch_with_opportunity_summary( - self, - source_model: Type[S], - destination_model: Type[D], - join_clause: Sequence, - is_forecast: bool, - is_historical_table: bool, - relationship_load_value: Any, - ) -> list[Tuple[S, D | None, OpportunitySummary | None]]: - # setup the join clause for getting the opportunity summary - - opportunity_summary_join_clause = [ - source_model.opportunity_id == OpportunitySummary.opportunity_id, # type: ignore[attr-defined] - OpportunitySummary.is_forecast.is_(is_forecast), - ] - - if is_historical_table: - opportunity_summary_join_clause.append( - source_model.revision_number == OpportunitySummary.revision_number # type: ignore[attr-defined] - ) - else: - opportunity_summary_join_clause.append(OpportunitySummary.revision_number.is_(None)) - - return cast( - list[Tuple[S, D | None, OpportunitySummary | None]], - self.db_session.execute( - select(source_model, destination_model, OpportunitySummary) - .join(OpportunitySummary, and_(*opportunity_summary_join_clause), isouter=True) - .join(destination_model, and_(*join_clause), isouter=True) - .where(source_model.transformed_at.is_(None)) - .options(selectinload(relationship_load_value)) - .execution_options(yield_per=5000, populate_existing=True) - ), - ) - - def process_opportunities(self) -> None: - # Fetch all opportunities that were modified - # Alongside that, grab the existing opportunity record - opportunities: list[Tuple[Topportunity, Opportunity | None]] = self.fetch( - Topportunity, - Opportunity, - [Topportunity.opportunity_id == Opportunity.opportunity_id], - ) - - for source_opportunity, target_opportunity in opportunities: - try: - self.process_opportunity(source_opportunity, target_opportunity) - except ValueError: - self.increment(self.Metrics.TOTAL_ERROR_COUNT, prefix=OPPORTUNITY) - logger.exception( - "Failed to process opportunity", - extra={"opportunity_id": source_opportunity.opportunity_id}, - ) - - def process_opportunity( - self, source_opportunity: Topportunity, target_opportunity: Opportunity | None - ) -> None: - self.increment(self.Metrics.TOTAL_RECORDS_PROCESSED, prefix=OPPORTUNITY) - extra = {"opportunity_id": source_opportunity.opportunity_id} - logger.info("Processing opportunity", extra=extra) - - if source_opportunity.is_deleted: - self._handle_delete( - source_opportunity, - target_opportunity, - OPPORTUNITY, - extra, - error_on_missing_target=True, - ) - - else: - # To avoid incrementing metrics for records we fail to transform, record - # here whether it's an insert/update and we'll increment after transforming - is_insert = target_opportunity is None - - logger.info("Transforming and upserting opportunity", extra=extra) - transformed_opportunity = transform_util.transform_opportunity( - source_opportunity, target_opportunity - ) - - if is_insert: - self.increment(self.Metrics.TOTAL_RECORDS_INSERTED, prefix=OPPORTUNITY) - self.db_session.add(transformed_opportunity) - else: - self.increment(self.Metrics.TOTAL_RECORDS_UPDATED, prefix=OPPORTUNITY) - self.db_session.merge(transformed_opportunity) - - logger.info("Processed opportunity", extra=extra) - source_opportunity.transformed_at = self.transform_time - - def process_assistance_listings(self) -> None: - assistance_listings: list[ - Tuple[TopportunityCfda, OpportunityAssistanceListing | None, Opportunity | None] - ] = self.fetch_with_opportunity( - TopportunityCfda, - OpportunityAssistanceListing, - [ - TopportunityCfda.opp_cfda_id - == OpportunityAssistanceListing.opportunity_assistance_listing_id - ], - ) - - for ( - source_assistance_listing, - target_assistance_listing, - opportunity, - ) in assistance_listings: - try: - self.process_assistance_listing( - source_assistance_listing, target_assistance_listing, opportunity - ) - except ValueError: - self.increment(self.Metrics.TOTAL_ERROR_COUNT, prefix=ASSISTANCE_LISTING) - logger.exception( - "Failed to process assistance listing", - extra={ - "opportunity_assistance_listing_id": source_assistance_listing.opp_cfda_id - }, - ) - - def process_assistance_listing( - self, - source_assistance_listing: TopportunityCfda, - target_assistance_listing: OpportunityAssistanceListing | None, - opportunity: Opportunity | None, - ) -> None: - self.increment(self.Metrics.TOTAL_RECORDS_PROCESSED, prefix=ASSISTANCE_LISTING) - extra = { - "opportunity_assistance_listing_id": source_assistance_listing.opp_cfda_id, - "opportunity_id": source_assistance_listing.opportunity_id, - } - logger.info("Processing assistance listing", extra=extra) - - if source_assistance_listing.is_deleted: - self._handle_delete( - source_assistance_listing, target_assistance_listing, ASSISTANCE_LISTING, extra - ) - - elif opportunity is None: - # The Oracle system we're importing these from does not have a foreign key between - # the opportunity ID in the TOPPORTUNITY_CFDA table and the TOPPORTUNITY table. - # There are many (2306 as of writing) orphaned CFDA records, created between 2007 and 2011 - # We don't want to continuously process these, so won't error for these, and will just - # mark them as transformed below. - self.increment(self.Metrics.TOTAL_RECORDS_ORPHANED, prefix=ASSISTANCE_LISTING) - logger.info( - "Assistance listing is orphaned and does not connect to any opportunity", - extra=extra, - ) - source_assistance_listing.transformation_notes = ORPHANED_CFDA - - else: - # To avoid incrementing metrics for records we fail to transform, record - # here whether it's an insert/update and we'll increment after transforming - is_insert = target_assistance_listing is None - - logger.info("Transforming and upserting assistance listing", extra=extra) - transformed_assistance_listing = transform_util.transform_assistance_listing( - source_assistance_listing, target_assistance_listing - ) - - if is_insert: - self.increment(self.Metrics.TOTAL_RECORDS_INSERTED, prefix=ASSISTANCE_LISTING) - self.db_session.add(transformed_assistance_listing) - else: - self.increment(self.Metrics.TOTAL_RECORDS_UPDATED, prefix=ASSISTANCE_LISTING) - self.db_session.merge(transformed_assistance_listing) - - logger.info("Processed assistance listing", extra=extra) - source_assistance_listing.transformed_at = self.transform_time - - def process_opportunity_summaries(self) -> None: - logger.info("Processing opportunity summaries") - logger.info("Processing synopsis records") - synopsis_records = self.fetch_with_opportunity( - Tsynopsis, - OpportunitySummary, - [ - Tsynopsis.opportunity_id == OpportunitySummary.opportunity_id, - OpportunitySummary.is_forecast.is_(False), - OpportunitySummary.revision_number.is_(None), - ], - ) - self.process_opportunity_summary_group(synopsis_records) - - logger.info("Processing synopsis hist records") - synopsis_hist_records = self.fetch_with_opportunity( - TsynopsisHist, - OpportunitySummary, - [ - TsynopsisHist.opportunity_id == OpportunitySummary.opportunity_id, - TsynopsisHist.revision_number == OpportunitySummary.revision_number, - OpportunitySummary.is_forecast.is_(False), - ], - ) - self.process_opportunity_summary_group(synopsis_hist_records) - - logger.info("Processing forecast records") - forecast_records = self.fetch_with_opportunity( - Tforecast, - OpportunitySummary, - [ - Tforecast.opportunity_id == OpportunitySummary.opportunity_id, - OpportunitySummary.is_forecast.is_(True), - OpportunitySummary.revision_number.is_(None), - ], - ) - self.process_opportunity_summary_group(forecast_records) - - logger.info("Processing forecast hist records") - forecast_hist_records = self.fetch_with_opportunity( - TforecastHist, - OpportunitySummary, - [ - TforecastHist.opportunity_id == OpportunitySummary.opportunity_id, - TforecastHist.revision_number == OpportunitySummary.revision_number, - OpportunitySummary.is_forecast.is_(True), - ], - ) - self.process_opportunity_summary_group(forecast_hist_records) - - def process_opportunity_summary_group( - self, records: Sequence[Tuple[SourceSummary, OpportunitySummary | None, Opportunity | None]] - ) -> None: - for source_summary, target_summary, opportunity in records: - try: - self.process_opportunity_summary(source_summary, target_summary, opportunity) - except ValueError: - self.increment(self.Metrics.TOTAL_ERROR_COUNT, prefix=OPPORTUNITY_SUMMARY) - logger.exception( - "Failed to process opportunity summary", - extra=transform_util.get_log_extra_summary(source_summary), - ) - - def process_opportunity_summary( - self, - source_summary: SourceSummary, - target_summary: OpportunitySummary | None, - opportunity: Opportunity | None, - ) -> None: - self.increment(self.Metrics.TOTAL_RECORDS_PROCESSED, prefix=OPPORTUNITY_SUMMARY) - extra = transform_util.get_log_extra_summary(source_summary) - logger.info("Processing opportunity summary", extra=extra) - - if source_summary.is_deleted: - self._handle_delete(source_summary, target_summary, OPPORTUNITY_SUMMARY, extra) - - # Historical records are linked to other historical records, however - # we don't import historical opportunity records, so if the opportunity - # was deleted, we don't have anything to link these to. Whenever we do - # support historical opportunities, we'll have these all marked with a - # flag that we can use to reprocess these. - elif opportunity is None and source_summary.is_historical_table: - logger.warning( - "Historical opportunity summary does not have a corresponding opportunity - cannot import, but will mark as processed", - extra=extra, - ) - self.increment( - self.Metrics.TOTAL_HISTORICAL_ORPHANS_SKIPPED, prefix=OPPORTUNITY_SUMMARY - ) - source_summary.transformation_notes = ORPHANED_HISTORICAL_RECORD - - elif opportunity is None: - # This shouldn't be possible as the incoming data has foreign keys, but as a safety net - # we'll make sure the opportunity actually exists - raise ValueError( - "Opportunity summary cannot be processed as the opportunity for it does not exist" - ) - - else: - # To avoid incrementing metrics for records we fail to transform, record - # here whether it's an insert/update and we'll increment after transforming - is_insert = target_summary is None - - logger.info("Transforming and upserting opportunity summary", extra=extra) - transformed_opportunity_summary = transform_util.transform_opportunity_summary( - source_summary, target_summary - ) - - if is_insert: - self.increment(self.Metrics.TOTAL_RECORDS_INSERTED, prefix=OPPORTUNITY_SUMMARY) - self.db_session.add(transformed_opportunity_summary) - else: - self.increment(self.Metrics.TOTAL_RECORDS_UPDATED, prefix=OPPORTUNITY_SUMMARY) - self.db_session.merge(transformed_opportunity_summary) - - logger.info("Processed opportunity summary", extra=extra) - source_summary.transformed_at = self.transform_time - - def process_link_applicant_types(self) -> None: - link_table = LinkOpportunitySummaryApplicantType - relationship_load_value = OpportunitySummary.link_applicant_types - - forecast_applicant_type_records = self.fetch_with_opportunity_summary( - TapplicanttypesForecast, - link_table, - [ - TapplicanttypesForecast.at_frcst_id - == LinkOpportunitySummaryApplicantType.legacy_applicant_type_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryApplicantType.opportunity_summary_id, - ], - is_forecast=True, - is_historical_table=False, - relationship_load_value=relationship_load_value, - ) - self.process_link_applicant_types_group(forecast_applicant_type_records) - - forecast_applicant_type_hist_records = self.fetch_with_opportunity_summary( - TapplicanttypesForecastHist, - link_table, - [ - TapplicanttypesForecastHist.at_frcst_id - == LinkOpportunitySummaryApplicantType.legacy_applicant_type_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryApplicantType.opportunity_summary_id, - ], - is_forecast=True, - is_historical_table=True, - relationship_load_value=relationship_load_value, - ) - self.process_link_applicant_types_group(forecast_applicant_type_hist_records) - - synopsis_applicant_type_records = self.fetch_with_opportunity_summary( - TapplicanttypesSynopsis, - link_table, - [ - TapplicanttypesSynopsis.at_syn_id - == LinkOpportunitySummaryApplicantType.legacy_applicant_type_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryApplicantType.opportunity_summary_id, - ], - is_forecast=False, - is_historical_table=False, - relationship_load_value=relationship_load_value, - ) - self.process_link_applicant_types_group(synopsis_applicant_type_records) - - synopsis_applicant_type_hist_records = self.fetch_with_opportunity_summary( - TapplicanttypesSynopsisHist, - link_table, - [ - TapplicanttypesSynopsisHist.at_syn_id - == LinkOpportunitySummaryApplicantType.legacy_applicant_type_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryApplicantType.opportunity_summary_id, - ], - is_forecast=False, - is_historical_table=True, - relationship_load_value=relationship_load_value, - ) - self.process_link_applicant_types_group(synopsis_applicant_type_hist_records) - - def process_link_applicant_types_group( - self, - records: Sequence[ - Tuple[ - SourceApplicantType, - LinkOpportunitySummaryApplicantType | None, - OpportunitySummary | None, - ] - ], - ) -> None: - for source_applicant_type, target_applicant_type, opportunity_summary in records: - try: - self.process_link_applicant_type( - source_applicant_type, target_applicant_type, opportunity_summary - ) - except ValueError: - self.increment(self.Metrics.TOTAL_ERROR_COUNT, prefix=APPLICANT_TYPE) - logger.exception( - "Failed to process opportunity summary applicant type", - extra=transform_util.get_log_extra_applicant_type(source_applicant_type), - ) - - def process_link_applicant_type( - self, - source_applicant_type: SourceApplicantType, - target_applicant_type: LinkOpportunitySummaryApplicantType | None, - opportunity_summary: OpportunitySummary | None, - ) -> None: - self.increment(self.Metrics.TOTAL_RECORDS_PROCESSED, prefix=APPLICANT_TYPE) - extra = transform_util.get_log_extra_applicant_type(source_applicant_type) - logger.info("Processing applicant type", extra=extra) - - if source_applicant_type.is_deleted: - self._handle_delete(source_applicant_type, target_applicant_type, APPLICANT_TYPE, extra) - - # Historical records are linked to other historical records, however - # we don't import historical opportunity records, so if the opportunity - # was deleted, we won't have created the opportunity summary. Whenever we do - # support historical opportunities, we'll have these all marked with a - # flag that we can use to reprocess these. - elif opportunity_summary is None and source_applicant_type.is_historical_table: - logger.warning( - "Historical applicant type does not have a corresponding opportunity summary - cannot import, but will mark as processed", - extra=extra, - ) - self.increment(self.Metrics.TOTAL_HISTORICAL_ORPHANS_SKIPPED, prefix=APPLICANT_TYPE) - source_applicant_type.transformation_notes = ORPHANED_HISTORICAL_RECORD - - elif opportunity_summary is None: - # This shouldn't be possible as the incoming data has foreign keys, but as a safety net - # we'll make sure the opportunity actually exists - raise ValueError( - "Applicant type record cannot be processed as the opportunity summary for it does not exist" - ) - else: - # To avoid incrementing metrics for records we fail to transform, record - # here whether it's an insert/update and we'll increment after transforming - is_insert = target_applicant_type is None - - logger.info("Transforming and upserting applicant type", extra=extra) - transformed_applicant_type = transform_util.convert_opportunity_summary_applicant_type( - source_applicant_type, target_applicant_type, opportunity_summary - ) - - # Before we insert, we have to still be certain we're not adding a duplicate record - # because the primary key of the legacy tables is the legacy ID + lookup value + opportunity ID - # its possible for the same lookup value to appear multiple times because the legacy ID is different - # This would hit a conflict in our DBs primary key, so we need to verify that won't happen - if ( - is_insert - and transformed_applicant_type.applicant_type in opportunity_summary.applicant_types - ): - self.increment(self.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED, prefix=APPLICANT_TYPE) - logger.warning( - "Skipping applicant type record", - extra=extra | {"applicant_type": transformed_applicant_type.applicant_type}, - ) - elif is_insert: - self.increment(self.Metrics.TOTAL_RECORDS_INSERTED, prefix=APPLICANT_TYPE) - # We append to the relationship so SQLAlchemy immediately attaches it to its cached - # opportunity summary object so that the above check works when we receive dupes in the same batch - opportunity_summary.link_applicant_types.append(transformed_applicant_type) - else: - self.increment(self.Metrics.TOTAL_RECORDS_UPDATED, prefix=APPLICANT_TYPE) - self.db_session.merge(transformed_applicant_type) - - logger.info("Processed applicant type", extra=extra) - source_applicant_type.transformed_at = self.transform_time - - def process_link_funding_categories(self) -> None: - link_table = LinkOpportunitySummaryFundingCategory - relationship_load_value = OpportunitySummary.link_funding_categories - - forecast_funding_category_records = self.fetch_with_opportunity_summary( - TfundactcatForecast, - link_table, - [ - TfundactcatForecast.fac_frcst_id - == LinkOpportunitySummaryFundingCategory.legacy_funding_category_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryFundingCategory.opportunity_summary_id, - ], - is_forecast=True, - is_historical_table=False, - relationship_load_value=relationship_load_value, - ) - self.process_link_funding_categories_group(forecast_funding_category_records) - - forecast_funding_category_hist_records = self.fetch_with_opportunity_summary( - TfundactcatForecastHist, - link_table, - [ - TfundactcatForecastHist.fac_frcst_id - == LinkOpportunitySummaryFundingCategory.legacy_funding_category_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryFundingCategory.opportunity_summary_id, - ], - is_forecast=True, - is_historical_table=True, - relationship_load_value=relationship_load_value, - ) - self.process_link_funding_categories_group(forecast_funding_category_hist_records) + if transform_config is None: + transform_config = TransformOracleDataTaskConfig() + self.transform_config = transform_config - synopsis_funding_category_records = self.fetch_with_opportunity_summary( - TfundactcatSynopsis, - link_table, - [ - TfundactcatSynopsis.fac_syn_id - == LinkOpportunitySummaryFundingCategory.legacy_funding_category_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryFundingCategory.opportunity_summary_id, - ], - is_forecast=False, - is_historical_table=False, - relationship_load_value=relationship_load_value, - ) - self.process_link_funding_categories_group(synopsis_funding_category_records) - - synopsis_funding_category_hist_records = self.fetch_with_opportunity_summary( - TfundactcatSynopsisHist, - link_table, - [ - TfundactcatSynopsisHist.fac_syn_id - == LinkOpportunitySummaryFundingCategory.legacy_funding_category_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryFundingCategory.opportunity_summary_id, - ], - is_forecast=False, - is_historical_table=True, - relationship_load_value=relationship_load_value, - ) - self.process_link_funding_categories_group(synopsis_funding_category_hist_records) - - def process_link_funding_categories_group( - self, - records: Sequence[ - Tuple[ - SourceFundingCategory, - LinkOpportunitySummaryFundingCategory | None, - OpportunitySummary | None, - ] - ], - ) -> None: - for source_funding_category, target_funding_category, opportunity_summary in records: - try: - self.process_link_funding_category( - source_funding_category, target_funding_category, opportunity_summary - ) - except ValueError: - self.increment(self.Metrics.TOTAL_ERROR_COUNT, prefix=FUNDING_CATEGORY) - logger.exception( - "Failed to process opportunity summary funding category", - extra=transform_util.get_log_extra_funding_category(source_funding_category), - ) - - def process_link_funding_category( - self, - source_funding_category: SourceFundingCategory, - target_funding_category: LinkOpportunitySummaryFundingCategory | None, - opportunity_summary: OpportunitySummary | None, - ) -> None: - self.increment(self.Metrics.TOTAL_RECORDS_PROCESSED, prefix=FUNDING_CATEGORY) - extra = transform_util.get_log_extra_funding_category(source_funding_category) - logger.info("Processing funding category", extra=extra) - - if source_funding_category.is_deleted: - self._handle_delete( - source_funding_category, target_funding_category, FUNDING_CATEGORY, extra - ) - - # Historical records are linked to other historical records, however - # we don't import historical opportunity records, so if the opportunity - # was deleted, we won't have created the opportunity summary. Whenever we do - # support historical opportunities, we'll have these all marked with a - # flag that we can use to reprocess these. - elif opportunity_summary is None and source_funding_category.is_historical_table: - logger.warning( - "Historical funding category does not have a corresponding opportunity summary - cannot import, but will mark as processed", - extra=extra, - ) - self.increment(self.Metrics.TOTAL_HISTORICAL_ORPHANS_SKIPPED, prefix=FUNDING_CATEGORY) - source_funding_category.transformation_notes = ORPHANED_HISTORICAL_RECORD - - elif opportunity_summary is None: - # This shouldn't be possible as the incoming data has foreign keys, but as a safety net - # we'll make sure the opportunity actually exists - raise ValueError( - "Funding category record cannot be processed as the opportunity summary for it does not exist" - ) - else: - # To avoid incrementing metrics for records we fail to transform, record - # here whether it's an insert/update and we'll increment after transforming - is_insert = target_funding_category is None - - logger.info("Transforming and upserting funding category", extra=extra) - transformed_funding_category = ( - transform_util.convert_opportunity_summary_funding_category( - source_funding_category, target_funding_category, opportunity_summary - ) - ) - - # Before we insert, we have to still be certain we're not adding a duplicate record - # because the primary key of the legacy tables is the legacy ID + lookup value + opportunity ID - # its possible for the same lookup value to appear multiple times because the legacy ID is different - # This would hit a conflict in our DBs primary key, so we need to verify that won't happen - if ( - is_insert - and transformed_funding_category.funding_category - in opportunity_summary.funding_categories - ): - self.increment( - self.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED, prefix=FUNDING_CATEGORY - ) - logger.warning( - "Skipping funding category record", - extra=extra - | {"funding_category": transformed_funding_category.funding_category}, - ) - elif is_insert: - self.increment(self.Metrics.TOTAL_RECORDS_INSERTED, prefix=FUNDING_CATEGORY) - # We append to the relationship so SQLAlchemy immediately attaches it to its cached - # opportunity summary object so that the above check works when we receive dupes in the same batch - opportunity_summary.link_funding_categories.append(transformed_funding_category) - else: - self.increment(self.Metrics.TOTAL_RECORDS_UPDATED, prefix=FUNDING_CATEGORY) - self.db_session.merge(transformed_funding_category) - - logger.info("Processed funding category", extra=extra) - source_funding_category.transformed_at = self.transform_time - - def process_link_funding_instruments(self) -> None: - link_table = LinkOpportunitySummaryFundingInstrument - relationship_load_value = OpportunitySummary.link_funding_instruments - - forecast_funding_instrument_records = self.fetch_with_opportunity_summary( - TfundinstrForecast, - link_table, - [ - TfundinstrForecast.fi_frcst_id - == LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryFundingInstrument.opportunity_summary_id, - ], - is_forecast=True, - is_historical_table=False, - relationship_load_value=relationship_load_value, - ) - self.process_link_funding_instruments_group(forecast_funding_instrument_records) - - forecast_funding_instrument_hist_records = self.fetch_with_opportunity_summary( - TfundinstrForecastHist, - link_table, - [ - TfundinstrForecastHist.fi_frcst_id - == LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryFundingInstrument.opportunity_summary_id, - ], - is_forecast=True, - is_historical_table=True, - relationship_load_value=relationship_load_value, - ) - self.process_link_funding_instruments_group(forecast_funding_instrument_hist_records) - - synopsis_funding_instrument_records = self.fetch_with_opportunity_summary( - TfundinstrSynopsis, - link_table, - [ - TfundinstrSynopsis.fi_syn_id - == LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryFundingInstrument.opportunity_summary_id, - ], - is_forecast=False, - is_historical_table=False, - relationship_load_value=relationship_load_value, - ) - self.process_link_funding_instruments_group(synopsis_funding_instrument_records) - - synopsis_funding_instrument_hist_records = self.fetch_with_opportunity_summary( - TfundinstrSynopsisHist, - link_table, - [ - TfundinstrSynopsisHist.fi_syn_id - == LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryFundingInstrument.opportunity_summary_id, - ], - is_forecast=False, - is_historical_table=True, - relationship_load_value=relationship_load_value, - ) - self.process_link_funding_instruments_group(synopsis_funding_instrument_hist_records) - - def process_link_funding_instruments_group( - self, - records: Sequence[ - Tuple[ - SourceFundingInstrument, - LinkOpportunitySummaryFundingInstrument | None, - OpportunitySummary | None, - ] - ], - ) -> None: - for source_funding_instrument, target_funding_instrument, opportunity_summary in records: - try: - self.process_link_funding_instrument( - source_funding_instrument, target_funding_instrument, opportunity_summary - ) - except ValueError: - self.increment(self.Metrics.TOTAL_ERROR_COUNT, prefix=FUNDING_INSTRUMENT) - logger.exception( - "Failed to process opportunity summary funding instrument", - extra=transform_util.get_log_extra_funding_instrument( - source_funding_instrument - ), - ) - - def process_link_funding_instrument( - self, - source_funding_instrument: SourceFundingInstrument, - target_funding_instrument: LinkOpportunitySummaryFundingInstrument | None, - opportunity_summary: OpportunitySummary | None, - ) -> None: - self.increment(self.Metrics.TOTAL_RECORDS_PROCESSED, prefix=FUNDING_INSTRUMENT) - extra = transform_util.get_log_extra_funding_instrument(source_funding_instrument) - logger.info("Processing funding instrument", extra=extra) - - if source_funding_instrument.is_deleted: - self._handle_delete( - source_funding_instrument, target_funding_instrument, FUNDING_INSTRUMENT, extra - ) - - # Historical records are linked to other historical records, however - # we don't import historical opportunity records, so if the opportunity - # was deleted, we won't have created the opportunity summary. Whenever we do - # support historical opportunities, we'll have these all marked with a - # flag that we can use to reprocess these. - elif opportunity_summary is None and source_funding_instrument.is_historical_table: - logger.warning( - "Historical funding instrument does not have a corresponding opportunity summary - cannot import, but will mark as processed", - extra=extra, - ) - self.increment(self.Metrics.TOTAL_HISTORICAL_ORPHANS_SKIPPED, prefix=FUNDING_INSTRUMENT) - source_funding_instrument.transformation_notes = ORPHANED_HISTORICAL_RECORD + def run_task(self) -> None: + if self.transform_config.enable_opportunity: + TransformOpportunity(self).run() - elif opportunity_summary is None: - # This shouldn't be possible as the incoming data has foreign keys, but as a safety net - # we'll make sure the opportunity actually exists - raise ValueError( - "Funding instrument record cannot be processed as the opportunity summary for it does not exist" - ) + if self.transform_config.enable_assistance_listing: + TransformAssistanceListing(self).run() - else: - # To avoid incrementing metrics for records we fail to transform, record - # here whether it's an insert/update and we'll increment after transforming - is_insert = target_funding_instrument is None + if self.transform_config.enable_opportunity_summary: + TransformOpportunitySummary(self).run() - logger.info("Transforming and upserting funding instrument", extra=extra) - transformed_funding_instrument = ( - transform_util.convert_opportunity_summary_funding_instrument( - source_funding_instrument, target_funding_instrument, opportunity_summary - ) - ) + if self.transform_config.enable_applicant_type: + TransformApplicantType(self).run() - # Before we insert, we have to still be certain we're not adding a duplicate record - # because the primary key of the legacy tables is the legacy ID + lookup value + opportunity ID - # its possible for the same lookup value to appear multiple times because the legacy ID is different - # This would hit a conflict in our DBs primary key, so we need to verify that won't happen - if ( - is_insert - and transformed_funding_instrument.funding_instrument - in opportunity_summary.funding_instruments - ): - self.increment( - self.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED, prefix=FUNDING_INSTRUMENT - ) - logger.warning( - "Skipping funding instrument record", - extra=extra - | {"funding_instrument": transformed_funding_instrument.funding_instrument}, - ) - elif is_insert: - self.increment(self.Metrics.TOTAL_RECORDS_INSERTED, prefix=FUNDING_INSTRUMENT) - # We append to the relationship so SQLAlchemy immediately attaches it to its cached - # opportunity summary object so that the above check works when we receive dupes in the same batch - opportunity_summary.link_funding_instruments.append(transformed_funding_instrument) - else: - self.increment(self.Metrics.TOTAL_RECORDS_UPDATED, prefix=FUNDING_INSTRUMENT) - self.db_session.merge(transformed_funding_instrument) + if self.transform_config.enable_funding_category: + TransformFundingCategory(self).run() - logger.info("Processed funding instrument", extra=extra) - source_funding_instrument.transformed_at = self.transform_time + if self.transform_config.enable_funding_instrument: + TransformFundingInstrument(self).run() diff --git a/api/src/data_migration/transformation/transform_util.py b/api/src/data_migration/transformation/transform_util.py index 3dc183f9e..d8bf58a1b 100644 --- a/api/src/data_migration/transformation/transform_util.py +++ b/api/src/data_migration/transformation/transform_util.py @@ -7,6 +7,12 @@ FundingInstrument, OpportunityCategory, ) +from src.data_migration.transformation.transform_constants import ( + SourceApplicantType, + SourceFundingCategory, + SourceFundingInstrument, + SourceSummary, +) from src.db.models.base import TimestampMixin from src.db.models.opportunity_models import ( LinkOpportunitySummaryApplicantType, @@ -16,14 +22,10 @@ OpportunityAssistanceListing, OpportunitySummary, ) -from src.db.models.staging.forecast import TforecastHist from src.db.models.staging.opportunity import Topportunity, TopportunityCfda from src.db.models.staging.staging_base import StagingBase -from src.db.models.staging.synopsis import Tsynopsis, TsynopsisHist from src.util import datetime_util -from . import SourceApplicantType, SourceFundingCategory, SourceFundingInstrument, SourceSummary - logger = logging.getLogger(__name__) OPPORTUNITY_CATEGORY_MAP = { @@ -194,15 +196,15 @@ def transform_opportunity_summary( if incoming_summary is None: logger.info("Creating new opportunity summary record", extra=log_extra) + # These values are a part of a unique key for identifying across tables, we don't + # ever want to modify them once created target_summary = OpportunitySummary( opportunity_id=source_summary.opportunity_id, is_forecast=source_summary.is_forecast, - revision_number=None, + # Revision number is only found in the historical table, use getattr + # to avoid type checking + revision_number=getattr(source_summary, "revision_number", None), ) - - # Revision number is only found in the historical table - if isinstance(source_summary, (TsynopsisHist, TforecastHist)): - target_summary.revision_number = source_summary.revision_number else: # We create a new summary object and merge it outside this function # that way if any modifications occur on the object and then it errors @@ -238,39 +240,32 @@ def transform_opportunity_summary( target_summary.updated_by = source_summary.last_upd_id target_summary.created_by = source_summary.creator_id - # Some fields either are named different in synopsis/forecast - # or only come from one of those tables, so handle those here - if isinstance(source_summary, (Tsynopsis, TsynopsisHist)): - target_summary.summary_description = source_summary.syn_desc - target_summary.agency_code = source_summary.a_sa_code - target_summary.agency_phone_number = source_summary.ac_phone_number - - # Synopsis only fields - target_summary.agency_contact_description = source_summary.agency_contact_desc - target_summary.close_date = source_summary.response_date - target_summary.close_date_description = source_summary.response_date_desc - target_summary.unarchive_date = source_summary.unarchive_date - - else: # TForecast & TForecastHist - target_summary.summary_description = source_summary.forecast_desc - target_summary.agency_code = source_summary.agency_code - target_summary.agency_phone_number = source_summary.ac_phone - - # Forecast only fields - target_summary.forecasted_post_date = source_summary.est_synopsis_posting_date - target_summary.forecasted_close_date = source_summary.est_appl_response_date - target_summary.forecasted_close_date_description = ( - source_summary.est_appl_response_date_desc - ) - target_summary.forecasted_award_date = source_summary.est_award_date - target_summary.forecasted_project_start_date = source_summary.est_project_start_date - target_summary.fiscal_year = source_summary.fiscal_year + target_summary.summary_description = source_summary.description + target_summary.agency_code = source_summary.agency_code + target_summary.agency_phone_number = source_summary.agency_phone_number + + # These fields are only on synopsis records, use getattr to avoid isinstance + target_summary.agency_contact_description = getattr(source_summary, "agency_contact_desc", None) + target_summary.close_date = getattr(source_summary, "response_date", None) + target_summary.close_date_description = getattr(source_summary, "response_date_desc", None) + target_summary.unarchive_date = getattr(source_summary, "unarchive_date", None) + + # These fields are only on forecast records, use getattr to avoid isinstance + target_summary.forecasted_post_date = getattr(source_summary, "est_synopsis_posting_date", None) + target_summary.forecasted_close_date = getattr(source_summary, "est_appl_response_date", None) + target_summary.forecasted_close_date_description = getattr( + source_summary, "est_appl_response_date_desc", None + ) + target_summary.forecasted_award_date = getattr(source_summary, "est_award_date", None) + target_summary.forecasted_project_start_date = getattr( + source_summary, "est_project_start_date", None + ) + target_summary.fiscal_year = getattr(source_summary, "fiscal_year", None) - # Historical only - if isinstance(source_summary, (TsynopsisHist, TforecastHist)): - target_summary.is_deleted = convert_action_type_to_is_deleted(source_summary.action_type) - else: - target_summary.is_deleted = False + # Set whether it is deleted based on action_type, which only appears on the historical records + target_summary.is_deleted = convert_action_type_to_is_deleted( + getattr(source_summary, "action_type", None) + ) transform_update_create_timestamp(source_summary, target_summary, log_extra=log_extra) @@ -436,9 +431,13 @@ def convert_yn_bool(value: str | None) -> bool | None: raise ValueError("Unexpected Y/N bool value: %s" % value) -def convert_action_type_to_is_deleted(value: str | None) -> bool | None: +def convert_action_type_to_is_deleted(value: str | None) -> bool: + # Action type can be U (update) or D (delete) + # however many older records seem to not have this set at all + # The legacy system looks like it treats anything that isn't D + # the same, so we'll go with that assumption as well. if value is None or value == "": - return None + return False if value == "D": # D = Delete return True diff --git a/api/src/db/models/staging/forecast.py b/api/src/db/models/staging/forecast.py index 2030d2329..4c9ddc61d 100644 --- a/api/src/db/models/staging/forecast.py +++ b/api/src/db/models/staging/forecast.py @@ -24,6 +24,14 @@ def is_forecast(self) -> bool: def is_historical_table(self) -> bool: return False + @property + def description(self) -> str | None: + return self.forecast_desc + + @property + def agency_phone_number(self) -> str | None: + return self.ac_phone + class TforecastHist(StagingBase, forecast_mixin.TforecastHistMixin, StagingParamMixin): __tablename__ = "tforecast_hist" @@ -43,6 +51,14 @@ def is_forecast(self) -> bool: def is_historical_table(self) -> bool: return True + @property + def description(self) -> str | None: + return self.forecast_desc + + @property + def agency_phone_number(self) -> str | None: + return self.ac_phone + class TapplicanttypesForecast( StagingBase, forecast_mixin.TapplicanttypesForecastMixin, StagingParamMixin diff --git a/api/src/db/models/staging/synopsis.py b/api/src/db/models/staging/synopsis.py index 2ad20d7c9..3fc59ab18 100644 --- a/api/src/db/models/staging/synopsis.py +++ b/api/src/db/models/staging/synopsis.py @@ -24,6 +24,18 @@ def is_forecast(self) -> bool: def is_historical_table(self) -> bool: return False + @property + def description(self) -> str | None: + return self.syn_desc + + @property + def agency_code(self) -> str | None: + return self.a_sa_code + + @property + def agency_phone_number(self) -> str | None: + return self.ac_phone_number + class TsynopsisHist(StagingBase, synopsis_mixin.TsynopsisHistMixin, StagingParamMixin): __tablename__ = "tsynopsis_hist" @@ -43,6 +55,18 @@ def is_forecast(self) -> bool: def is_historical_table(self) -> bool: return True + @property + def description(self) -> str | None: + return self.syn_desc + + @property + def agency_code(self) -> str | None: + return self.a_sa_code + + @property + def agency_phone_number(self) -> str | None: + return self.ac_phone_number + class TapplicanttypesSynopsis( StagingBase, synopsis_mixin.TapplicanttypesSynopsisMixin, StagingParamMixin diff --git a/api/src/task/subtask.py b/api/src/task/subtask.py new file mode 100644 index 000000000..178b067b0 --- /dev/null +++ b/api/src/task/subtask.py @@ -0,0 +1,69 @@ +import abc +import logging +import time +from typing import Any + +import src.adapters.db as db +from src.task.task import Task + +logger = logging.getLogger(__name__) + + +class SubTask(abc.ABC, metaclass=abc.ABCMeta): + """ + A SubTask is a class that defines a set of behavior + that can be seen as a subset of a Task. + + This object has access to the same internal metrics + and reporting attributes as its Task, but can be defined + as a separate class which can help with organizing large + complex tasks that can't be easily broken down. + """ + + def __init__(self, task: Task): + self.task = task + + def run(self) -> None: + try: + logger.info("Starting subtask %s", self.cls_name()) + start = time.perf_counter() + + # Run the actual subtask + self.run_subtask() + + # Calculate and set a duration + end = time.perf_counter() + duration = round((end - start), 3) + self.set_metrics({f"{self.cls_name()}_subtask_duration_sec": duration}) + + logger.info("Completed subtask %s in %s seconds", self.cls_name(), duration) + + except Exception: + logger.exception("Failed to run subtask %s", self.cls_name()) + raise + + def set_metrics(self, metrics: dict[str, Any]) -> None: + # Passthrough method to the task set_metrics function + self.task.set_metrics(metrics) + + def increment(self, name: str, value: int = 1, prefix: str | None = None) -> None: + # Passthrough method to the task increment function + self.task.increment(name, value, prefix) + + def cls_name(self) -> str: + return self.__class__.__name__ + + @abc.abstractmethod + def run_subtask(self) -> None: + """Override to define the subtask logic""" + pass + + @property + def db_session(self) -> db.Session: + # Property to make it so the subtask can reference the db_session + # as if it were the task itself + return self.task.db_session + + @property + def metrics(self) -> dict[str, Any]: + return self.task.metrics diff --git a/api/tests/src/data_migration/transformation/conftest.py b/api/tests/src/data_migration/transformation/conftest.py new file mode 100644 index 000000000..443c113b6 --- /dev/null +++ b/api/tests/src/data_migration/transformation/conftest.py @@ -0,0 +1,659 @@ +from datetime import date, datetime +from typing import Tuple + +import pytest + +import tests.src.db.models.factories as f +from src.constants.lookup_constants import ApplicantType, FundingCategory, FundingInstrument +from src.data_migration.transformation.transform_oracle_data_task import TransformOracleDataTask +from src.db.models import staging +from src.db.models.opportunity_models import ( + LinkOpportunitySummaryApplicantType, + LinkOpportunitySummaryFundingCategory, + LinkOpportunitySummaryFundingInstrument, + Opportunity, + OpportunityAssistanceListing, + OpportunitySummary, +) +from tests.conftest import BaseTestClass + + +class BaseTransformTestClass(BaseTestClass): + @pytest.fixture() + def transform_oracle_data_task( + self, db_session, enable_factory_create, truncate_opportunities + ) -> TransformOracleDataTask: + return TransformOracleDataTask(db_session) + + +def setup_opportunity( + create_existing: bool, + is_delete: bool = False, + is_already_processed: bool = False, + source_values: dict | None = None, + all_fields_null: bool = False, +) -> staging.opportunity.Topportunity: + if source_values is None: + source_values = {} + + source_opportunity = f.StagingTopportunityFactory.create( + **source_values, + is_deleted=is_delete, + already_transformed=is_already_processed, + all_fields_null=all_fields_null, + cfdas=[], + ) + + if create_existing: + f.OpportunityFactory.create( + opportunity_id=source_opportunity.opportunity_id, + # set created_at/updated_at to an earlier time so its clear + # when they were last updated + timestamps_in_past=True, + ) + + return source_opportunity + + +def setup_cfda( + create_existing: bool, + is_delete: bool = False, + is_already_processed: bool = False, + source_values: dict | None = None, + all_fields_null: bool = False, + opportunity: Opportunity | None = None, +) -> staging.opportunity.TopportunityCfda: + if source_values is None: + source_values = {} + + # If you don't provide an opportunity, you need to provide an ID + if opportunity is not None: + source_values["opportunity_id"] = opportunity.opportunity_id + + source_cfda = f.StagingTopportunityCfdaFactory.create( + **source_values, + opportunity=None, # To override the factory trying to create something + is_deleted=is_delete, + already_transformed=is_already_processed, + all_fields_null=all_fields_null, + ) + + if create_existing: + f.OpportunityAssistanceListingFactory.create( + opportunity=opportunity, + opportunity_assistance_listing_id=source_cfda.opp_cfda_id, + # set created_at/updated_at to an earlier time so its clear + # when they were last updated + timestamps_in_past=True, + ) + + return source_cfda + + +def setup_synopsis_forecast( + is_forecast: bool, + revision_number: int | None, + create_existing: bool, + opportunity: Opportunity | None, + is_delete: bool = False, + is_already_processed: bool = False, + source_values: dict | None = None, +): + if source_values is None: + source_values = {} + + if is_forecast: + if revision_number is None: + factory_cls = f.StagingTforecastFactory + else: + factory_cls = f.StagingTforecastHistFactory + else: + if revision_number is None: + factory_cls = f.StagingTsynopsisFactory + else: + factory_cls = f.StagingTsynopsisHistFactory + + if revision_number is not None: + source_values["revision_number"] = revision_number + + if opportunity is not None: + source_values["opportunity_id"] = opportunity.opportunity_id + + source_summary = factory_cls.create( + **source_values, + opportunity=None, # To override the factory trying to create something + is_deleted=is_delete, + already_transformed=is_already_processed, + ) + + if create_existing: + f.OpportunitySummaryFactory.create( + opportunity=opportunity, is_forecast=is_forecast, revision_number=revision_number + ) + + return source_summary + + +def setup_applicant_type( + create_existing: bool, + opportunity_summary: OpportunitySummary, + legacy_lookup_value: str, + applicant_type: ApplicantType | None = None, + is_delete: bool = False, + is_already_processed: bool = False, + source_values: dict | None = None, +): + if create_existing and is_delete is False and applicant_type is None: + raise Exception( + "If create_existing is True, is_delete is False - must provide the properly converted / mapped value for applicant_type" + ) + + if source_values is None: + source_values = {} + + if opportunity_summary.is_forecast: + source_values["forecast"] = None + if opportunity_summary.revision_number is None: + factory_cls = f.StagingTapplicanttypesForecastFactory + else: + factory_cls = f.StagingTapplicanttypesForecastHistFactory + source_values["revision_number"] = opportunity_summary.revision_number + else: + source_values["synopsis"] = None + if opportunity_summary.revision_number is None: + factory_cls = f.StagingTapplicanttypesSynopsisFactory + else: + factory_cls = f.StagingTapplicanttypesSynopsisHistFactory + source_values["revision_number"] = opportunity_summary.revision_number + + source_applicant_type = factory_cls.create( + **source_values, + opportunity_id=opportunity_summary.opportunity_id, + is_deleted=is_delete, + already_transformed=is_already_processed, + at_id=legacy_lookup_value, + ) + + if create_existing: + if opportunity_summary.is_forecast: + legacy_id = source_applicant_type.at_frcst_id + else: + legacy_id = source_applicant_type.at_syn_id + + f.LinkOpportunitySummaryApplicantTypeFactory.create( + opportunity_summary=opportunity_summary, + legacy_applicant_type_id=legacy_id, + applicant_type=applicant_type, + ) + + return source_applicant_type + + +def setup_funding_instrument( + create_existing: bool, + opportunity_summary: OpportunitySummary, + legacy_lookup_value: str, + funding_instrument: FundingInstrument | None = None, + is_delete: bool = False, + is_already_processed: bool = False, + source_values: dict | None = None, +): + if create_existing and is_delete is False and funding_instrument is None: + raise Exception( + "If create_existing is True, is_delete is False - must provide the properly converted / mapped value for funding_instrument" + ) + + if source_values is None: + source_values = {} + + if opportunity_summary.is_forecast: + source_values["forecast"] = None + if opportunity_summary.revision_number is None: + factory_cls = f.StagingTfundinstrForecastFactory + else: + factory_cls = f.StagingTfundinstrForecastHistFactory + source_values["revision_number"] = opportunity_summary.revision_number + else: + source_values["synopsis"] = None + if opportunity_summary.revision_number is None: + factory_cls = f.StagingTfundinstrSynopsisFactory + else: + factory_cls = f.StagingTfundinstrSynopsisHistFactory + source_values["revision_number"] = opportunity_summary.revision_number + + source_funding_instrument = factory_cls.create( + **source_values, + opportunity_id=opportunity_summary.opportunity_id, + is_deleted=is_delete, + already_transformed=is_already_processed, + fi_id=legacy_lookup_value, + ) + + if create_existing: + if opportunity_summary.is_forecast: + legacy_id = source_funding_instrument.fi_frcst_id + else: + legacy_id = source_funding_instrument.fi_syn_id + + f.LinkOpportunitySummaryFundingInstrumentFactory.create( + opportunity_summary=opportunity_summary, + legacy_funding_instrument_id=legacy_id, + funding_instrument=funding_instrument, + ) + + return source_funding_instrument + + +def setup_funding_category( + create_existing: bool, + opportunity_summary: OpportunitySummary, + legacy_lookup_value: str, + funding_category: FundingCategory | None = None, + is_delete: bool = False, + is_already_processed: bool = False, + source_values: dict | None = None, +): + if create_existing and is_delete is False and funding_category is None: + raise Exception( + "If create_existing is True, is_delete is False - must provide the properly converted / mapped value for funding_category" + ) + + if source_values is None: + source_values = {} + + if opportunity_summary.is_forecast: + source_values["forecast"] = None + if opportunity_summary.revision_number is None: + factory_cls = f.StagingTfundactcatForecastFactory + else: + factory_cls = f.StagingTfundactcatForecastHistFactory + source_values["revision_number"] = opportunity_summary.revision_number + else: + source_values["synopsis"] = None + if opportunity_summary.revision_number is None: + factory_cls = f.StagingTfundactcatSynopsisFactory + else: + factory_cls = f.StagingTfundactcatSynopsisHistFactory + source_values["revision_number"] = opportunity_summary.revision_number + + source_funding_category = factory_cls.create( + **source_values, + opportunity_id=opportunity_summary.opportunity_id, + is_deleted=is_delete, + already_transformed=is_already_processed, + fac_id=legacy_lookup_value, + ) + + if create_existing: + if opportunity_summary.is_forecast: + legacy_id = source_funding_category.fac_frcst_id + else: + legacy_id = source_funding_category.fac_syn_id + + f.LinkOpportunitySummaryFundingCategoryFactory.create( + opportunity_summary=opportunity_summary, + legacy_funding_category_id=legacy_id, + funding_category=funding_category, + ) + + return source_funding_category + + +def validate_matching_fields( + source, destination, fields: list[Tuple[str, str]], expect_all_to_match: bool +): + mismatched_fields = [] + + for source_field, destination_field in fields: + source_value = getattr(source, source_field) + destination_value = getattr(destination, destination_field) + + # Some fields that we copy in are datetime typed (although behave as dates and we convert as such) + # If so, we need to make sure they're both dates for the purposes of comparison + if isinstance(source_value, datetime) and isinstance(destination_value, date): + source_value = source_value.date() + + if source_value != destination_value: + mismatched_fields.append( + f"{source_field}/{destination_field}: '{source_value}' != '{destination_value}'" + ) + + # If a values weren't copied in an update + # then we should expect most things to not match, + # but randomness in the factories might cause some overlap + if expect_all_to_match: + assert ( + len(mismatched_fields) == 0 + ), f"Expected all fields to match between {source.__class__} and {destination.__class__}, but found mismatched fields: {','.join(mismatched_fields)}" + else: + assert ( + len(mismatched_fields) != 0 + ), f"Did not expect all fields to match between {source.__class__} and {destination.__class__}, but they did which means an unexpected update occurred" + + +def validate_opportunity( + db_session, + source_opportunity: staging.opportunity.Topportunity, + expect_in_db: bool = True, + expect_values_to_match: bool = True, +): + opportunity = ( + db_session.query(Opportunity) + .filter(Opportunity.opportunity_id == source_opportunity.opportunity_id) + .one_or_none() + ) + + if not expect_in_db: + assert opportunity is None + return + + assert opportunity is not None + # For fields that we expect to match 1:1, verify that they match as expected + validate_matching_fields( + source_opportunity, + opportunity, + [ + ("oppnumber", "opportunity_number"), + ("opptitle", "opportunity_title"), + ("owningagency", "agency"), + ("category_explanation", "category_explanation"), + ("revision_number", "revision_number"), + ("modified_comments", "modified_comments"), + ("publisheruid", "publisher_user_id"), + ("publisher_profile_id", "publisher_profile_id"), + ], + expect_values_to_match, + ) + + # Validation of fields that aren't copied exactly + if expect_values_to_match: + # Deliberately validating is_draft with a different calculation + if source_opportunity.is_draft == "N": + assert opportunity.is_draft is False + else: + assert opportunity.is_draft is True + + +def validate_assistance_listing( + db_session, + source_cfda: staging.opportunity.TopportunityCfda, + expect_in_db: bool = True, + expect_values_to_match: bool = True, +): + assistance_listing = ( + db_session.query(OpportunityAssistanceListing) + .filter( + OpportunityAssistanceListing.opportunity_assistance_listing_id + == source_cfda.opp_cfda_id + ) + .one_or_none() + ) + + if not expect_in_db: + assert assistance_listing is None + return + + assert assistance_listing is not None + # For fields that we expect to match 1:1, verify that they match as expected + validate_matching_fields( + source_cfda, + assistance_listing, + [ + ("cfdanumber", "assistance_listing_number"), + ("programtitle", "program_title"), + ], + expect_values_to_match, + ) + + +def get_summary_from_source(db_session, source_summary): + revision_number = None + is_forecast = source_summary.is_forecast + if isinstance(source_summary, (staging.synopsis.TsynopsisHist, staging.forecast.TforecastHist)): + revision_number = source_summary.revision_number + + opportunity_summary = ( + db_session.query(OpportunitySummary) + .filter( + OpportunitySummary.opportunity_id == source_summary.opportunity_id, + OpportunitySummary.revision_number == revision_number, + OpportunitySummary.is_forecast == is_forecast, + # Populate existing to force it to fetch updates from the DB + ) + .execution_options(populate_existing=True) + .one_or_none() + ) + + return opportunity_summary + + +def validate_opportunity_summary( + db_session, source_summary, expect_in_db: bool = True, expect_values_to_match: bool = True +): + opportunity_summary = get_summary_from_source(db_session, source_summary) + + if not expect_in_db: + assert opportunity_summary is None + return + + matching_fields = [ + ("version_nbr", "version_number"), + ("posting_date", "post_date"), + ("archive_date", "archive_date"), + ("fd_link_url", "additional_info_url"), + ("fd_link_desc", "additional_info_url_description"), + ("modification_comments", "modification_comments"), + ("oth_cat_fa_desc", "funding_category_description"), + ("applicant_elig_desc", "applicant_eligibility_description"), + ("ac_name", "agency_name"), + ("ac_email_addr", "agency_email_address"), + ("ac_email_desc", "agency_email_address_description"), + ("publisher_profile_id", "publisher_profile_id"), + ("publisheruid", "publisher_user_id"), + ("last_upd_id", "updated_by"), + ("creator_id", "created_by"), + ] + + if isinstance(source_summary, (staging.synopsis.Tsynopsis, staging.synopsis.TsynopsisHist)): + matching_fields.extend( + [ + ("syn_desc", "summary_description"), + ("a_sa_code", "agency_code"), + ("ac_phone_number", "agency_phone_number"), + ("agency_contact_desc", "agency_contact_description"), + ("response_date", "close_date"), + ("response_date_desc", "close_date_description"), + ("unarchive_date", "unarchive_date"), + ] + ) + else: # Forecast+ForecastHist + matching_fields.extend( + [ + ("forecast_desc", "summary_description"), + ("agency_code", "agency_code"), + ("ac_phone", "agency_phone_number"), + ("est_synopsis_posting_date", "forecasted_post_date"), + ("est_appl_response_date", "forecasted_close_date"), + ("est_appl_response_date_desc", "forecasted_close_date_description"), + ("est_award_date", "forecasted_award_date"), + ("est_project_start_date", "forecasted_project_start_date"), + ("fiscal_year", "fiscal_year"), + ] + ) + + # History only fields + is_deleted = False + if isinstance(source_summary, (staging.synopsis.TsynopsisHist, staging.forecast.TforecastHist)): + matching_fields.extend([("revision_number", "revision_number")]) + + is_deleted = source_summary.action_type == "D" + + assert opportunity_summary is not None + validate_matching_fields( + source_summary, opportunity_summary, matching_fields, expect_values_to_match + ) + + assert opportunity_summary.is_deleted == is_deleted + + +def validate_summary_and_nested( + db_session, + source_summary, + expected_applicant_types: list[ApplicantType], + expected_funding_categories: list[FundingCategory], + expected_funding_instruments: list[FundingInstrument], + expect_in_db: bool = True, + expect_values_to_match: bool = True, +): + validate_opportunity_summary(db_session, source_summary, expect_in_db, expect_values_to_match) + + if not expect_in_db: + return + + created_record = get_summary_from_source(db_session, source_summary) + + assert set(created_record.applicant_types) == set(expected_applicant_types) + assert set(created_record.funding_categories) == set(expected_funding_categories) + assert set(created_record.funding_instruments) == set(expected_funding_instruments) + + +def validate_applicant_type( + db_session, + source_applicant_type, + expect_in_db: bool = True, + expected_applicant_type: ApplicantType | None = None, + was_processed: bool = True, + expect_values_to_match: bool = True, +): + assert (source_applicant_type.transformed_at is not None) == was_processed + + # In order to properly find the link table value, need to first determine + # the opportunity summary in a subquery + opportunity_summary_id = ( + db_session.query(OpportunitySummary.opportunity_summary_id) + .filter( + OpportunitySummary.revision_number == source_applicant_type.revision_number, + OpportunitySummary.is_forecast == source_applicant_type.is_forecast, + OpportunitySummary.opportunity_id == source_applicant_type.opportunity_id, + ) + .scalar() + ) + + link_applicant_type = ( + db_session.query(LinkOpportunitySummaryApplicantType) + .filter( + LinkOpportunitySummaryApplicantType.legacy_applicant_type_id + == source_applicant_type.legacy_applicant_type_id, + LinkOpportunitySummaryApplicantType.opportunity_summary_id == opportunity_summary_id, + ) + .one_or_none() + ) + + if not expect_in_db: + assert link_applicant_type is None + return + + assert link_applicant_type is not None + assert link_applicant_type.applicant_type == expected_applicant_type + + validate_matching_fields( + source_applicant_type, + link_applicant_type, + [("creator_id", "created_by"), ("last_upd_id", "updated_by")], + expect_values_to_match, + ) + + +def validate_funding_instrument( + db_session, + source_funding_instrument, + expect_in_db: bool = True, + expected_funding_instrument: FundingInstrument | None = None, + was_processed: bool = True, + expect_values_to_match: bool = True, +): + assert (source_funding_instrument.transformed_at is not None) == was_processed + + # In order to properly find the link table value, need to first determine + # the opportunity summary in a subquery + opportunity_summary_id = ( + db_session.query(OpportunitySummary.opportunity_summary_id) + .filter( + OpportunitySummary.revision_number == source_funding_instrument.revision_number, + OpportunitySummary.is_forecast == source_funding_instrument.is_forecast, + OpportunitySummary.opportunity_id == source_funding_instrument.opportunity_id, + ) + .scalar() + ) + + link_funding_instrument = ( + db_session.query(LinkOpportunitySummaryFundingInstrument) + .filter( + LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id + == source_funding_instrument.legacy_funding_instrument_id, + LinkOpportunitySummaryFundingInstrument.opportunity_summary_id + == opportunity_summary_id, + ) + .one_or_none() + ) + + if not expect_in_db: + assert link_funding_instrument is None + return + + assert link_funding_instrument is not None + assert link_funding_instrument.funding_instrument == expected_funding_instrument + + validate_matching_fields( + source_funding_instrument, + link_funding_instrument, + [("creator_id", "created_by"), ("last_upd_id", "updated_by")], + expect_values_to_match, + ) + + +def validate_funding_category( + db_session, + source_funding_category, + expect_in_db: bool = True, + expected_funding_category: FundingCategory | None = None, + was_processed: bool = True, + expect_values_to_match: bool = True, +): + assert (source_funding_category.transformed_at is not None) == was_processed + + # In order to properly find the link table value, need to first determine + # the opportunity summary in a subquery + opportunity_summary_id = ( + db_session.query(OpportunitySummary.opportunity_summary_id) + .filter( + OpportunitySummary.revision_number == source_funding_category.revision_number, + OpportunitySummary.is_forecast == source_funding_category.is_forecast, + OpportunitySummary.opportunity_id == source_funding_category.opportunity_id, + ) + .scalar() + ) + + link_funding_category = ( + db_session.query(LinkOpportunitySummaryFundingCategory) + .filter( + LinkOpportunitySummaryFundingCategory.legacy_funding_category_id + == source_funding_category.legacy_funding_category_id, + LinkOpportunitySummaryFundingCategory.opportunity_summary_id == opportunity_summary_id, + ) + .one_or_none() + ) + + if not expect_in_db: + assert link_funding_category is None + return + + assert link_funding_category is not None + assert link_funding_category.funding_category == expected_funding_category + + validate_matching_fields( + source_funding_category, + link_funding_category, + [("creator_id", "created_by"), ("last_upd_id", "updated_by")], + expect_values_to_match, + ) diff --git a/api/tests/src/data_migration/transformation/subtask/__init__.py b/api/tests/src/data_migration/transformation/subtask/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/api/tests/src/data_migration/transformation/subtask/test_transform_applicant_type.py b/api/tests/src/data_migration/transformation/subtask/test_transform_applicant_type.py new file mode 100644 index 000000000..1f4949ce8 --- /dev/null +++ b/api/tests/src/data_migration/transformation/subtask/test_transform_applicant_type.py @@ -0,0 +1,395 @@ +import pytest + +import src.data_migration.transformation.transform_constants as transform_constants +import tests.src.db.models.factories as f +from src.constants.lookup_constants import ApplicantType +from src.data_migration.transformation.subtask.transform_applicant_type import ( + TransformApplicantType, +) +from tests.src.data_migration.transformation.conftest import ( + BaseTransformTestClass, + setup_applicant_type, + validate_applicant_type, +) + + +class TestTransformApplicantType(BaseTransformTestClass): + @pytest.fixture() + def transform_applicant_type(self, transform_oracle_data_task): + return TransformApplicantType(transform_oracle_data_task) + + def test_process_applicant_types(self, db_session, transform_applicant_type): + opportunity_summary_forecast = f.OpportunitySummaryFactory.create( + is_forecast=True, revision_number=None, no_link_values=True + ) + forecast_insert1 = setup_applicant_type( + create_existing=False, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="00", + ) + forecast_update1 = setup_applicant_type( + create_existing=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="01", + applicant_type=ApplicantType.COUNTY_GOVERNMENTS, + ) + forecast_update2 = setup_applicant_type( + create_existing=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="02", + applicant_type=ApplicantType.CITY_OR_TOWNSHIP_GOVERNMENTS, + ) + forecast_delete1 = setup_applicant_type( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="04", + applicant_type=ApplicantType.SPECIAL_DISTRICT_GOVERNMENTS, + ) + forecast_delete2 = setup_applicant_type( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="05", + applicant_type=ApplicantType.INDEPENDENT_SCHOOL_DISTRICTS, + ) + forecast_update_already_processed = setup_applicant_type( + create_existing=True, + is_already_processed=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="06", + applicant_type=ApplicantType.PUBLIC_AND_STATE_INSTITUTIONS_OF_HIGHER_EDUCATION, + ) + + opportunity_summary_forecast_hist = f.OpportunitySummaryFactory.create( + is_forecast=True, revision_number=3, no_link_values=True + ) + forecast_hist_insert1 = setup_applicant_type( + create_existing=False, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="07", + ) + forecast_hist_update1 = setup_applicant_type( + create_existing=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="08", + applicant_type=ApplicantType.PUBLIC_AND_INDIAN_HOUSING_AUTHORITIES, + ) + forecast_hist_update2 = setup_applicant_type( + create_existing=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="11", + applicant_type=ApplicantType.OTHER_NATIVE_AMERICAN_TRIBAL_ORGANIZATIONS, + ) + forecast_hist_delete1 = setup_applicant_type( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="12", + applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITH_501C3, + ) + forecast_hist_delete_already_processed = setup_applicant_type( + create_existing=False, + is_delete=True, + is_already_processed=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="13", + ) + forecast_hist_duplicate_insert = setup_applicant_type( + create_existing=False, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="08", + ) + + opportunity_summary_syn = f.OpportunitySummaryFactory.create( + is_forecast=False, revision_number=None, no_link_values=True + ) + syn_insert1 = setup_applicant_type( + create_existing=False, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="20", + ) + syn_insert2 = setup_applicant_type( + create_existing=False, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="21", + ) + syn_update1 = setup_applicant_type( + create_existing=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="22", + applicant_type=ApplicantType.FOR_PROFIT_ORGANIZATIONS_OTHER_THAN_SMALL_BUSINESSES, + ) + syn_update2 = setup_applicant_type( + create_existing=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="23", + applicant_type=ApplicantType.SMALL_BUSINESSES, + ) + syn_delete1 = setup_applicant_type( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="25", + applicant_type=ApplicantType.OTHER, + ) + syn_delete2 = setup_applicant_type( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="99", + applicant_type=ApplicantType.UNRESTRICTED, + ) + syn_delete_but_current_missing = setup_applicant_type( + create_existing=False, + is_delete=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="07", + ) + syn_update_already_processed = setup_applicant_type( + create_existing=True, + is_already_processed=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="08", + applicant_type=ApplicantType.PUBLIC_AND_INDIAN_HOUSING_AUTHORITIES, + ) + + opportunity_summary_syn_hist = f.OpportunitySummaryFactory.create( + is_forecast=False, revision_number=21, no_link_values=True + ) + syn_hist_insert1 = setup_applicant_type( + create_existing=False, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="11", + ) + syn_hist_update1 = setup_applicant_type( + create_existing=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="12", + applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITH_501C3, + ) + syn_hist_update2 = setup_applicant_type( + create_existing=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="13", + applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITHOUT_501C3, + ) + syn_hist_delete1 = setup_applicant_type( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="25", + applicant_type=ApplicantType.OTHER, + ) + syn_hist_delete2 = setup_applicant_type( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="99", + applicant_type=ApplicantType.UNRESTRICTED, + ) + syn_hist_insert_invalid_type = setup_applicant_type( + create_existing=False, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="X", + applicant_type=ApplicantType.STATE_GOVERNMENTS, + ) + + transform_applicant_type.run_subtask() + + validate_applicant_type( + db_session, forecast_insert1, expected_applicant_type=ApplicantType.STATE_GOVERNMENTS + ) + validate_applicant_type( + db_session, + forecast_hist_insert1, + expected_applicant_type=ApplicantType.FEDERALLY_RECOGNIZED_NATIVE_AMERICAN_TRIBAL_GOVERNMENTS, + ) + validate_applicant_type( + db_session, + syn_insert1, + expected_applicant_type=ApplicantType.PRIVATE_INSTITUTIONS_OF_HIGHER_EDUCATION, + ) + validate_applicant_type( + db_session, syn_insert2, expected_applicant_type=ApplicantType.INDIVIDUALS + ) + validate_applicant_type( + db_session, + syn_hist_insert1, + expected_applicant_type=ApplicantType.OTHER_NATIVE_AMERICAN_TRIBAL_ORGANIZATIONS, + ) + + validate_applicant_type( + db_session, forecast_update1, expected_applicant_type=ApplicantType.COUNTY_GOVERNMENTS + ) + validate_applicant_type( + db_session, + forecast_update2, + expected_applicant_type=ApplicantType.CITY_OR_TOWNSHIP_GOVERNMENTS, + ) + validate_applicant_type( + db_session, + forecast_hist_update1, + expected_applicant_type=ApplicantType.PUBLIC_AND_INDIAN_HOUSING_AUTHORITIES, + ) + validate_applicant_type( + db_session, + forecast_hist_update2, + expected_applicant_type=ApplicantType.OTHER_NATIVE_AMERICAN_TRIBAL_ORGANIZATIONS, + ) + validate_applicant_type( + db_session, + syn_update1, + expected_applicant_type=ApplicantType.FOR_PROFIT_ORGANIZATIONS_OTHER_THAN_SMALL_BUSINESSES, + ) + validate_applicant_type( + db_session, syn_update2, expected_applicant_type=ApplicantType.SMALL_BUSINESSES + ) + validate_applicant_type( + db_session, + syn_hist_update1, + expected_applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITH_501C3, + ) + validate_applicant_type( + db_session, + syn_hist_update2, + expected_applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITHOUT_501C3, + ) + + validate_applicant_type(db_session, forecast_delete1, expect_in_db=False) + validate_applicant_type(db_session, forecast_delete2, expect_in_db=False) + validate_applicant_type(db_session, forecast_hist_delete1, expect_in_db=False) + validate_applicant_type(db_session, syn_delete1, expect_in_db=False) + validate_applicant_type(db_session, syn_delete2, expect_in_db=False) + validate_applicant_type(db_session, syn_hist_delete1, expect_in_db=False) + validate_applicant_type(db_session, syn_hist_delete2, expect_in_db=False) + + validate_applicant_type( + db_session, + forecast_update_already_processed, + expected_applicant_type=ApplicantType.PUBLIC_AND_STATE_INSTITUTIONS_OF_HIGHER_EDUCATION, + expect_values_to_match=False, + ) + validate_applicant_type( + db_session, forecast_hist_delete_already_processed, expect_in_db=False + ) + validate_applicant_type( + db_session, + syn_update_already_processed, + expected_applicant_type=ApplicantType.PUBLIC_AND_INDIAN_HOUSING_AUTHORITIES, + expect_values_to_match=False, + ) + + validate_applicant_type( + db_session, syn_delete_but_current_missing, expect_in_db=False, was_processed=True + ) + validate_applicant_type( + db_session, syn_hist_insert_invalid_type, expect_in_db=False, was_processed=False + ) + + validate_applicant_type( + db_session, forecast_hist_duplicate_insert, expect_in_db=False, was_processed=True + ) + + metrics = transform_applicant_type.metrics + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 23 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 7 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 5 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 8 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 1 + assert metrics[transform_constants.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED] == 1 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + # Rerunning will only attempt to re-process the errors, so total+errors goes up by 1 + db_session.commit() # commit to end any existing transactions as run_subtask starts a new one + transform_applicant_type.run_subtask() + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 24 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 7 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 5 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 8 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 2 + assert metrics[transform_constants.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED] == 1 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + @pytest.mark.parametrize( + "is_forecast,revision_number", [(True, None), (False, None), (True, 5), (False, 10)] + ) + def test_process_applicant_types_but_current_missing( + self, db_session, transform_applicant_type, is_forecast, revision_number + ): + opportunity_summary = f.OpportunitySummaryFactory.create( + is_forecast=is_forecast, revision_number=revision_number, no_link_values=True + ) + delete_but_current_missing = setup_applicant_type( + create_existing=False, + opportunity_summary=opportunity_summary, + legacy_lookup_value="00", + is_delete=True, + ) + + transform_applicant_type.process_link_applicant_type( + delete_but_current_missing, None, opportunity_summary + ) + + validate_applicant_type(db_session, delete_but_current_missing, expect_in_db=False) + assert delete_but_current_missing.transformed_at is not None + assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" + + @pytest.mark.parametrize( + "is_forecast,revision_number,legacy_lookup_value", + [(True, None, "90"), (False, None, "xx"), (True, 5, "50"), (False, 10, "1")], + ) + def test_process_applicant_types_but_invalid_lookup_value( + self, + db_session, + transform_applicant_type, + is_forecast, + revision_number, + legacy_lookup_value, + ): + opportunity_summary = f.OpportunitySummaryFactory.create( + is_forecast=is_forecast, revision_number=revision_number, no_link_values=True + ) + insert_but_invalid_value = setup_applicant_type( + create_existing=False, + opportunity_summary=opportunity_summary, + legacy_lookup_value=legacy_lookup_value, + ) + + with pytest.raises(ValueError, match="Unrecognized applicant type"): + transform_applicant_type.process_link_applicant_type( + insert_but_invalid_value, None, opportunity_summary + ) + + @pytest.mark.parametrize( + "factory_cls", + [f.StagingTapplicanttypesForecastFactory, f.StagingTapplicanttypesSynopsisFactory], + ) + def test_process_applicant_type_but_no_opportunity_summary_non_hist( + self, + db_session, + transform_applicant_type, + factory_cls, + ): + source_record = factory_cls.create(orphaned_record=True) + + with pytest.raises( + ValueError, + match="Applicant type record cannot be processed as the opportunity summary for it does not exist", + ): + transform_applicant_type.process_link_applicant_type(source_record, None, None) + + @pytest.mark.parametrize( + "factory_cls", + [f.StagingTapplicanttypesForecastHistFactory, f.StagingTapplicanttypesSynopsisHistFactory], + ) + def test_process_applicant_type_but_no_opportunity_summary_hist( + self, + db_session, + transform_applicant_type, + factory_cls, + ): + source_record = factory_cls.create(orphaned_record=True, revision_number=12) + transform_applicant_type.process_link_applicant_type(source_record, None, None) + assert source_record.transformed_at is not None + assert source_record.transformation_notes == "orphaned_historical_record" diff --git a/api/tests/src/data_migration/transformation/subtask/test_transform_assistance_listing.py b/api/tests/src/data_migration/transformation/subtask/test_transform_assistance_listing.py new file mode 100644 index 000000000..91c6572e6 --- /dev/null +++ b/api/tests/src/data_migration/transformation/subtask/test_transform_assistance_listing.py @@ -0,0 +1,157 @@ +import pytest + +import src.data_migration.transformation.transform_constants as transform_constants +import tests.src.db.models.factories as f +from src.data_migration.transformation.subtask.transform_assistance_listing import ( + TransformAssistanceListing, +) +from src.db.models.opportunity_models import Opportunity, OpportunityAssistanceListing +from tests.src.data_migration.transformation.conftest import ( + BaseTransformTestClass, + setup_cfda, + validate_assistance_listing, +) + + +class TestTransformAssistanceListing(BaseTransformTestClass): + @pytest.fixture() + def transform_assistance_listing(self, transform_oracle_data_task): + return TransformAssistanceListing(transform_oracle_data_task) + + def test_process_opportunity_assistance_listings( + self, db_session, transform_assistance_listing + ): + opportunity1 = f.OpportunityFactory.create(opportunity_assistance_listings=[]) + cfda_insert1 = setup_cfda(create_existing=False, opportunity=opportunity1) + cfda_insert2 = setup_cfda(create_existing=False, opportunity=opportunity1) + cfda_update1 = setup_cfda(create_existing=True, opportunity=opportunity1) + cfda_delete1 = setup_cfda(create_existing=True, is_delete=True, opportunity=opportunity1) + cfda_update_already_processed1 = setup_cfda( + create_existing=True, is_already_processed=True, opportunity=opportunity1 + ) + + opportunity2 = f.OpportunityFactory.create(opportunity_assistance_listings=[]) + cfda_insert3 = setup_cfda(create_existing=False, opportunity=opportunity2) + cfda_update_already_processed2 = setup_cfda( + create_existing=True, is_already_processed=True, opportunity=opportunity2 + ) + cfda_delete_already_processed1 = setup_cfda( + create_existing=False, + is_already_processed=True, + is_delete=True, + opportunity=opportunity2, + ) + cfda_delete2 = setup_cfda(create_existing=True, is_delete=True, opportunity=opportunity2) + + opportunity3 = f.OpportunityFactory.create(opportunity_assistance_listings=[]) + cfda_update2 = setup_cfda(create_existing=True, opportunity=opportunity3) + cfda_delete_but_current_missing = setup_cfda( + create_existing=False, is_delete=True, opportunity=opportunity3 + ) + + cfda_insert_without_opportunity = setup_cfda( + create_existing=False, source_values={"opportunity_id": 12345678}, opportunity=None + ) + cfda_delete_without_opportunity = setup_cfda( + create_existing=False, source_values={"opportunity_id": 34567890}, opportunity=None + ) + + transform_assistance_listing.run_subtask() + + validate_assistance_listing(db_session, cfda_insert1) + validate_assistance_listing(db_session, cfda_insert2) + validate_assistance_listing(db_session, cfda_insert3) + validate_assistance_listing(db_session, cfda_update1) + validate_assistance_listing(db_session, cfda_update2) + validate_assistance_listing(db_session, cfda_delete1, expect_in_db=False) + validate_assistance_listing(db_session, cfda_delete2, expect_in_db=False) + + # Records that won't have been fetched + validate_assistance_listing( + db_session, + cfda_update_already_processed1, + expect_in_db=True, + expect_values_to_match=False, + ) + validate_assistance_listing( + db_session, + cfda_update_already_processed2, + expect_in_db=True, + expect_values_to_match=False, + ) + validate_assistance_listing(db_session, cfda_delete_already_processed1, expect_in_db=False) + + validate_assistance_listing(db_session, cfda_delete_but_current_missing, expect_in_db=False) + + validate_assistance_listing(db_session, cfda_insert_without_opportunity, expect_in_db=False) + validate_assistance_listing(db_session, cfda_delete_without_opportunity, expect_in_db=False) + + metrics = transform_assistance_listing.metrics + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 10 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 3 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_ORPHANED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + # Rerunning finds nothing - no metrics update + db_session.commit() # commit to end any existing transactions as run_subtask starts a new one + transform_assistance_listing.run_subtask() + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 10 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 3 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_ORPHANED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + def test_process_assistance_listing_orphaned_record( + self, db_session, transform_assistance_listing + ): + cfda_insert_without_opportunity = setup_cfda( + create_existing=False, source_values={"opportunity_id": 987654321}, opportunity=None + ) + + # Verify it gets marked as transformed + assert cfda_insert_without_opportunity.transformed_at is None + transform_assistance_listing.process_assistance_listing( + cfda_insert_without_opportunity, None, None + ) + assert cfda_insert_without_opportunity.transformed_at is not None + assert cfda_insert_without_opportunity.transformation_notes == "orphaned_cfda" + assert ( + transform_assistance_listing.metrics[transform_constants.Metrics.TOTAL_RECORDS_ORPHANED] + == 1 + ) + + # Verify nothing actually gets created + opportunity = ( + db_session.query(Opportunity) + .filter(Opportunity.opportunity_id == cfda_insert_without_opportunity.opportunity_id) + .one_or_none() + ) + assert opportunity is None + assistance_listing = ( + db_session.query(OpportunityAssistanceListing) + .filter( + OpportunityAssistanceListing.opportunity_assistance_listing_id + == cfda_insert_without_opportunity.opp_cfda_id + ) + .one_or_none() + ) + assert assistance_listing is None + + def test_process_assistance_listing_delete_but_current_missing( + self, db_session, transform_assistance_listing + ): + opportunity = f.OpportunityFactory.create(opportunity_assistance_listings=[]) + delete_but_current_missing = setup_cfda( + create_existing=False, is_delete=True, opportunity=opportunity + ) + + transform_assistance_listing.process_assistance_listing( + delete_but_current_missing, None, opportunity + ) + + validate_assistance_listing(db_session, delete_but_current_missing, expect_in_db=False) + assert delete_but_current_missing.transformed_at is not None + assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" diff --git a/api/tests/src/data_migration/transformation/subtask/test_transform_funding_category.py b/api/tests/src/data_migration/transformation/subtask/test_transform_funding_category.py new file mode 100644 index 000000000..c0aa04e71 --- /dev/null +++ b/api/tests/src/data_migration/transformation/subtask/test_transform_funding_category.py @@ -0,0 +1,374 @@ +import pytest + +import src.data_migration.transformation.transform_constants as transform_constants +import tests.src.db.models.factories as f +from src.constants.lookup_constants import FundingCategory +from src.data_migration.transformation.subtask.transform_funding_category import ( + TransformFundingCategory, +) +from tests.src.data_migration.transformation.conftest import ( + BaseTransformTestClass, + setup_funding_category, + validate_funding_category, +) + + +class TestTransformFundingCategory(BaseTransformTestClass): + @pytest.fixture() + def transform_funding_category(self, transform_oracle_data_task): + return TransformFundingCategory(transform_oracle_data_task) + + def test_process_funding_categories(self, db_session, transform_funding_category): + opportunity_summary_forecast = f.OpportunitySummaryFactory.create( + is_forecast=True, revision_number=None, no_link_values=True + ) + forecast_insert1 = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="RA", + ) + forecast_insert2 = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="AG", + ) + forecast_update1 = setup_funding_category( + create_existing=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="AR", + funding_category=FundingCategory.ARTS, + ) + forecast_delete1 = setup_funding_category( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="BC", + funding_category=FundingCategory.BUSINESS_AND_COMMERCE, + ) + forecast_delete2 = setup_funding_category( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="CD", + funding_category=FundingCategory.COMMUNITY_DEVELOPMENT, + ) + forecast_update_already_processed = setup_funding_category( + create_existing=True, + is_already_processed=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="CP", + funding_category=FundingCategory.CONSUMER_PROTECTION, + ) + + opportunity_summary_forecast_hist = f.OpportunitySummaryFactory.create( + is_forecast=True, revision_number=3, no_link_values=True + ) + forecast_hist_insert1 = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="DPR", + ) + forecast_hist_insert2 = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="ED", + ) + forecast_hist_update1 = setup_funding_category( + create_existing=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="ELT", + funding_category=FundingCategory.EMPLOYMENT_LABOR_AND_TRAINING, + ) + forecast_hist_delete1 = setup_funding_category( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="EN", + funding_category=FundingCategory.ENERGY, + ) + forecast_hist_delete_already_processed = setup_funding_category( + create_existing=False, + is_delete=True, + is_already_processed=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="ENV", + ) + + opportunity_summary_syn = f.OpportunitySummaryFactory.create( + is_forecast=False, revision_number=None, no_link_values=True + ) + syn_insert1 = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="FN", + ) + syn_insert2 = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="HL", + ) + syn_update1 = setup_funding_category( + create_existing=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="HO", + funding_category=FundingCategory.HOUSING, + ) + syn_delete1 = setup_funding_category( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="HU", + funding_category=FundingCategory.HUMANITIES, + ) + syn_delete2 = setup_funding_category( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="IIJ", + funding_category=FundingCategory.INFRASTRUCTURE_INVESTMENT_AND_JOBS_ACT, + ) + syn_delete_but_current_missing = setup_funding_category( + create_existing=False, + is_delete=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="IS", + ) + syn_update_already_processed = setup_funding_category( + create_existing=True, + is_already_processed=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="ISS", + funding_category=FundingCategory.INCOME_SECURITY_AND_SOCIAL_SERVICES, + ) + + opportunity_summary_syn_hist = f.OpportunitySummaryFactory.create( + is_forecast=False, revision_number=21, no_link_values=True + ) + syn_hist_insert1 = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="LJL", + ) + syn_hist_insert2 = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="NR", + ) + syn_hist_insert3 = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="OZ", + ) + syn_hist_update1 = setup_funding_category( + create_existing=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="RD", + funding_category=FundingCategory.REGIONAL_DEVELOPMENT, + ) + + syn_hist_delete1 = setup_funding_category( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="ST", + funding_category=FundingCategory.SCIENCE_TECHNOLOGY_AND_OTHER_RESEARCH_AND_DEVELOPMENT, + ) + syn_hist_delete2 = setup_funding_category( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="T", + funding_category=FundingCategory.TRANSPORTATION, + ) + syn_hist_insert_invalid_type = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="XYZ", + funding_category=FundingCategory.HEALTH, + ) + + transform_funding_category.run_subtask() + + validate_funding_category( + db_session, forecast_insert1, expected_funding_category=FundingCategory.RECOVERY_ACT + ) + validate_funding_category( + db_session, forecast_insert2, expected_funding_category=FundingCategory.AGRICULTURE + ) + validate_funding_category( + db_session, + forecast_hist_insert1, + expected_funding_category=FundingCategory.DISASTER_PREVENTION_AND_RELIEF, + ) + validate_funding_category( + db_session, forecast_hist_insert2, expected_funding_category=FundingCategory.EDUCATION + ) + validate_funding_category( + db_session, syn_insert1, expected_funding_category=FundingCategory.FOOD_AND_NUTRITION + ) + validate_funding_category( + db_session, syn_insert2, expected_funding_category=FundingCategory.HEALTH + ) + validate_funding_category( + db_session, + syn_hist_insert1, + expected_funding_category=FundingCategory.LAW_JUSTICE_AND_LEGAL_SERVICES, + ) + validate_funding_category( + db_session, + syn_hist_insert2, + expected_funding_category=FundingCategory.NATURAL_RESOURCES, + ) + validate_funding_category( + db_session, + syn_hist_insert3, + expected_funding_category=FundingCategory.OPPORTUNITY_ZONE_BENEFITS, + ) + + validate_funding_category( + db_session, forecast_update1, expected_funding_category=FundingCategory.ARTS + ) + validate_funding_category( + db_session, + forecast_hist_update1, + expected_funding_category=FundingCategory.EMPLOYMENT_LABOR_AND_TRAINING, + ) + validate_funding_category( + db_session, syn_update1, expected_funding_category=FundingCategory.HOUSING + ) + validate_funding_category( + db_session, + syn_hist_update1, + expected_funding_category=FundingCategory.REGIONAL_DEVELOPMENT, + ) + + validate_funding_category(db_session, forecast_delete1, expect_in_db=False) + validate_funding_category(db_session, forecast_delete2, expect_in_db=False) + validate_funding_category(db_session, forecast_hist_delete1, expect_in_db=False) + validate_funding_category(db_session, syn_delete1, expect_in_db=False) + validate_funding_category(db_session, syn_delete2, expect_in_db=False) + validate_funding_category(db_session, syn_hist_delete1, expect_in_db=False) + validate_funding_category(db_session, syn_hist_delete2, expect_in_db=False) + + validate_funding_category( + db_session, + forecast_update_already_processed, + expected_funding_category=FundingCategory.CONSUMER_PROTECTION, + expect_values_to_match=False, + ) + validate_funding_category( + db_session, forecast_hist_delete_already_processed, expect_in_db=False + ) + validate_funding_category( + db_session, + syn_update_already_processed, + expected_funding_category=FundingCategory.INCOME_SECURITY_AND_SOCIAL_SERVICES, + expect_values_to_match=False, + ) + + validate_funding_category( + db_session, syn_delete_but_current_missing, expect_in_db=False, was_processed=True + ) + validate_funding_category( + db_session, syn_hist_insert_invalid_type, expect_in_db=False, was_processed=False + ) + + metrics = transform_funding_category.metrics + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 22 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 7 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 9 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 4 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 1 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + # Rerunning will only attempt to re-process the errors, so total+errors goes up by 1 + db_session.commit() # commit to end any existing transactions as run_subtask starts a new one + transform_funding_category.run_subtask() + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 23 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 7 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 9 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 4 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 2 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + @pytest.mark.parametrize( + "is_forecast,revision_number", [(True, None), (False, None), (True, 1), (False, 70)] + ) + def test_process_funding_category_but_current_missing( + self, db_session, transform_funding_category, is_forecast, revision_number + ): + opportunity_summary = f.OpportunitySummaryFactory.create( + is_forecast=is_forecast, revision_number=revision_number, no_link_values=True + ) + delete_but_current_missing = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary, + legacy_lookup_value="00", + is_delete=True, + ) + + transform_funding_category.process_link_funding_category( + delete_but_current_missing, None, opportunity_summary + ) + + validate_funding_category(db_session, delete_but_current_missing, expect_in_db=False) + assert delete_but_current_missing.transformed_at is not None + assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" + + @pytest.mark.parametrize( + "is_forecast,revision_number,legacy_lookup_value", + [(True, None, "ab"), (False, None, "cd"), (True, 5, "ef"), (False, 10, "Ag")], + ) + def test_process_funding_category_but_invalid_lookup_value( + self, + db_session, + transform_funding_category, + is_forecast, + revision_number, + legacy_lookup_value, + ): + opportunity_summary = f.OpportunitySummaryFactory.create( + is_forecast=is_forecast, revision_number=revision_number, no_link_values=True + ) + insert_but_invalid_value = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary, + legacy_lookup_value=legacy_lookup_value, + ) + + with pytest.raises(ValueError, match="Unrecognized funding category"): + transform_funding_category.process_link_funding_category( + insert_but_invalid_value, None, opportunity_summary + ) + + @pytest.mark.parametrize( + "factory_cls", [f.StagingTfundactcatForecastFactory, f.StagingTfundactcatSynopsisFactory] + ) + def test_process_funding_category_but_no_opportunity_summary_non_hist( + self, + db_session, + transform_funding_category, + factory_cls, + ): + source_record = factory_cls.create(orphaned_record=True) + + with pytest.raises( + ValueError, + match="Funding category record cannot be processed as the opportunity summary for it does not exist", + ): + transform_funding_category.process_link_funding_category(source_record, None, None) + + @pytest.mark.parametrize( + "factory_cls", + [f.StagingTfundactcatForecastHistFactory, f.StagingTfundactcatSynopsisHistFactory], + ) + def test_process_funding_category_but_no_opportunity_summary_hist( + self, + db_session, + transform_funding_category, + factory_cls, + ): + source_record = factory_cls.create(orphaned_record=True, revision_number=12) + transform_funding_category.process_link_funding_category(source_record, None, None) + assert source_record.transformed_at is not None + assert source_record.transformation_notes == "orphaned_historical_record" diff --git a/api/tests/src/data_migration/transformation/subtask/test_transform_funding_instrument.py b/api/tests/src/data_migration/transformation/subtask/test_transform_funding_instrument.py new file mode 100644 index 000000000..6a85d9b2a --- /dev/null +++ b/api/tests/src/data_migration/transformation/subtask/test_transform_funding_instrument.py @@ -0,0 +1,298 @@ +import pytest + +import src.data_migration.transformation.transform_constants as transform_constants +import tests.src.db.models.factories as f +from src.constants.lookup_constants import FundingInstrument +from src.data_migration.transformation.subtask.transform_funding_instrument import ( + TransformFundingInstrument, +) +from tests.src.data_migration.transformation.conftest import ( + BaseTransformTestClass, + setup_funding_instrument, + validate_funding_instrument, +) + + +class TestTransformFundingInstrument(BaseTransformTestClass): + @pytest.fixture() + def transform_funding_instrument(self, transform_oracle_data_task): + return TransformFundingInstrument(transform_oracle_data_task) + + def test_process_funding_instruments(self, db_session, transform_funding_instrument): + opportunity_summary_forecast = f.OpportunitySummaryFactory.create( + is_forecast=True, revision_number=None, no_link_values=True + ) + forecast_insert1 = setup_funding_instrument( + create_existing=False, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="CA", + ) + forecast_update1 = setup_funding_instrument( + create_existing=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="G", + funding_instrument=FundingInstrument.GRANT, + ) + forecast_delete1 = setup_funding_instrument( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="PC", + funding_instrument=FundingInstrument.PROCUREMENT_CONTRACT, + ) + forecast_update_already_processed = setup_funding_instrument( + create_existing=True, + is_already_processed=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="O", + funding_instrument=FundingInstrument.OTHER, + ) + + opportunity_summary_forecast_hist = f.OpportunitySummaryFactory.create( + is_forecast=True, revision_number=3, no_link_values=True + ) + forecast_hist_insert1 = setup_funding_instrument( + create_existing=False, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="G", + ) + forecast_hist_delete1 = setup_funding_instrument( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="CA", + funding_instrument=FundingInstrument.COOPERATIVE_AGREEMENT, + ) + forecast_hist_delete_already_processed = setup_funding_instrument( + create_existing=False, + is_delete=True, + is_already_processed=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="O", + ) + syn_delete_but_current_missing = setup_funding_instrument( + create_existing=False, + is_delete=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="PC", + ) + + opportunity_summary_syn = f.OpportunitySummaryFactory.create( + is_forecast=False, revision_number=None, no_link_values=True + ) + syn_insert1 = setup_funding_instrument( + create_existing=False, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="O", + ) + syn_insert2 = setup_funding_instrument( + create_existing=False, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="G", + ) + syn_delete1 = setup_funding_instrument( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="CA", + funding_instrument=FundingInstrument.COOPERATIVE_AGREEMENT, + ) + syn_update_already_processed = setup_funding_instrument( + create_existing=True, + is_already_processed=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="PC", + funding_instrument=FundingInstrument.PROCUREMENT_CONTRACT, + ) + + opportunity_summary_syn_hist = f.OpportunitySummaryFactory.create( + is_forecast=False, revision_number=21, no_link_values=True + ) + syn_hist_insert1 = setup_funding_instrument( + create_existing=False, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="CA", + ) + syn_hist_update1 = setup_funding_instrument( + create_existing=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="O", + funding_instrument=FundingInstrument.OTHER, + ) + syn_hist_delete1 = setup_funding_instrument( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="PC", + funding_instrument=FundingInstrument.PROCUREMENT_CONTRACT, + ) + syn_hist_delete2 = setup_funding_instrument( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="G", + funding_instrument=FundingInstrument.GRANT, + ) + syn_hist_insert_invalid_type = setup_funding_instrument( + create_existing=False, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="X", + ) + + transform_funding_instrument.run_subtask() + + validate_funding_instrument( + db_session, + forecast_insert1, + expected_funding_instrument=FundingInstrument.COOPERATIVE_AGREEMENT, + ) + validate_funding_instrument( + db_session, forecast_hist_insert1, expected_funding_instrument=FundingInstrument.GRANT + ) + validate_funding_instrument( + db_session, syn_insert1, expected_funding_instrument=FundingInstrument.OTHER + ) + validate_funding_instrument( + db_session, syn_insert2, expected_funding_instrument=FundingInstrument.GRANT + ) + validate_funding_instrument( + db_session, + syn_hist_insert1, + expected_funding_instrument=FundingInstrument.COOPERATIVE_AGREEMENT, + ) + + validate_funding_instrument( + db_session, forecast_update1, expected_funding_instrument=FundingInstrument.GRANT + ) + validate_funding_instrument( + db_session, syn_hist_update1, expected_funding_instrument=FundingInstrument.OTHER + ) + + validate_funding_instrument(db_session, forecast_delete1, expect_in_db=False) + validate_funding_instrument(db_session, forecast_hist_delete1, expect_in_db=False) + validate_funding_instrument(db_session, syn_delete1, expect_in_db=False) + validate_funding_instrument(db_session, syn_hist_delete1, expect_in_db=False) + validate_funding_instrument(db_session, syn_hist_delete2, expect_in_db=False) + + validate_funding_instrument( + db_session, + forecast_update_already_processed, + expected_funding_instrument=FundingInstrument.OTHER, + expect_values_to_match=False, + ) + validate_funding_instrument( + db_session, forecast_hist_delete_already_processed, expect_in_db=False + ) + validate_funding_instrument( + db_session, + syn_update_already_processed, + expected_funding_instrument=FundingInstrument.PROCUREMENT_CONTRACT, + expect_values_to_match=False, + ) + + validate_funding_instrument( + db_session, syn_delete_but_current_missing, expect_in_db=False, was_processed=True + ) + validate_funding_instrument( + db_session, syn_hist_insert_invalid_type, expect_in_db=False, was_processed=False + ) + + metrics = transform_funding_instrument.metrics + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 14 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 5 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 5 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 1 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + # Rerunning will only attempt to re-process the errors, so total+errors goes up by 2 + db_session.commit() # commit to end any existing transactions as run_subtask starts a new one + transform_funding_instrument.run_subtask() + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 15 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 5 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 5 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 2 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + @pytest.mark.parametrize( + "is_forecast,revision_number", [(True, None), (False, None), (True, 1), (False, 4)] + ) + def test_process_funding_instrument_but_current_missing( + self, db_session, transform_funding_instrument, is_forecast, revision_number + ): + opportunity_summary = f.OpportunitySummaryFactory.create( + is_forecast=is_forecast, revision_number=revision_number, no_link_values=True + ) + delete_but_current_missing = setup_funding_instrument( + create_existing=False, + opportunity_summary=opportunity_summary, + legacy_lookup_value="G", + is_delete=True, + ) + + transform_funding_instrument.process_link_funding_instrument( + delete_but_current_missing, None, opportunity_summary + ) + + validate_funding_instrument(db_session, delete_but_current_missing, expect_in_db=False) + assert delete_but_current_missing.transformed_at is not None + assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" + + @pytest.mark.parametrize( + "is_forecast,revision_number,legacy_lookup_value", + [(True, None, "X"), (False, None, "4"), (True, 5, "Y"), (False, 10, "A")], + ) + def test_process_funding_instrument_but_invalid_lookup_value( + self, + db_session, + transform_funding_instrument, + is_forecast, + revision_number, + legacy_lookup_value, + ): + opportunity_summary = f.OpportunitySummaryFactory.create( + is_forecast=is_forecast, revision_number=revision_number, no_link_values=True + ) + insert_but_invalid_value = setup_funding_instrument( + create_existing=False, + opportunity_summary=opportunity_summary, + legacy_lookup_value=legacy_lookup_value, + ) + + with pytest.raises(ValueError, match="Unrecognized funding instrument"): + transform_funding_instrument.process_link_funding_instrument( + insert_but_invalid_value, None, opportunity_summary + ) + + @pytest.mark.parametrize( + "factory_cls", [f.StagingTfundinstrForecastFactory, f.StagingTfundinstrSynopsisFactory] + ) + def test_process_funding_instrument_but_no_opportunity_summary_non_hist( + self, + db_session, + transform_funding_instrument, + factory_cls, + ): + source_record = factory_cls.create(orphaned_record=True) + + with pytest.raises( + ValueError, + match="Funding instrument record cannot be processed as the opportunity summary for it does not exist", + ): + transform_funding_instrument.process_link_funding_instrument(source_record, None, None) + + @pytest.mark.parametrize( + "factory_cls", + [f.StagingTfundinstrForecastHistFactory, f.StagingTfundinstrSynopsisHistFactory], + ) + def test_process_funding_instrument_but_no_opportunity_summary_hist( + self, + db_session, + transform_funding_instrument, + factory_cls, + ): + source_record = factory_cls.create(orphaned_record=True, revision_number=12) + transform_funding_instrument.process_link_funding_instrument(source_record, None, None) + assert source_record.transformed_at is not None + assert source_record.transformation_notes == "orphaned_historical_record" diff --git a/api/tests/src/data_migration/transformation/subtask/test_transform_opportunity.py b/api/tests/src/data_migration/transformation/subtask/test_transform_opportunity.py new file mode 100644 index 000000000..ae89652ba --- /dev/null +++ b/api/tests/src/data_migration/transformation/subtask/test_transform_opportunity.py @@ -0,0 +1,110 @@ +import pytest + +import src.data_migration.transformation.transform_constants as transform_constants +from src.data_migration.transformation.subtask.transform_opportunity import TransformOpportunity +from tests.src.data_migration.transformation.conftest import ( + BaseTransformTestClass, + setup_opportunity, + validate_opportunity, +) + + +class TestTransformOpportunity(BaseTransformTestClass): + @pytest.fixture() + def transform_opportunity(self, transform_oracle_data_task): + return TransformOpportunity(transform_oracle_data_task) + + def test_process_opportunities(self, db_session, transform_opportunity): + ordinary_delete = setup_opportunity( + create_existing=True, is_delete=True, all_fields_null=True + ) + ordinary_delete2 = setup_opportunity( + create_existing=True, is_delete=True, all_fields_null=False + ) + delete_but_current_missing = setup_opportunity(create_existing=False, is_delete=True) + + basic_insert = setup_opportunity(create_existing=False) + basic_insert2 = setup_opportunity(create_existing=False, all_fields_null=True) + basic_insert3 = setup_opportunity(create_existing=False) + + basic_update = setup_opportunity( + create_existing=True, + ) + basic_update2 = setup_opportunity(create_existing=True, all_fields_null=True) + basic_update3 = setup_opportunity(create_existing=True, all_fields_null=True) + basic_update4 = setup_opportunity(create_existing=True) + + # Something else deleted it + already_processed_insert = setup_opportunity( + create_existing=False, is_already_processed=True + ) + already_processed_update = setup_opportunity( + create_existing=True, is_already_processed=True + ) + + insert_that_will_fail = setup_opportunity( + create_existing=False, source_values={"oppcategory": "X"} + ) + + transform_opportunity.run_subtask() + + validate_opportunity(db_session, ordinary_delete, expect_in_db=False) + validate_opportunity(db_session, ordinary_delete2, expect_in_db=False) + validate_opportunity(db_session, delete_but_current_missing, expect_in_db=False) + + validate_opportunity(db_session, basic_insert) + validate_opportunity(db_session, basic_insert2) + validate_opportunity(db_session, basic_insert3) + + validate_opportunity(db_session, basic_update) + validate_opportunity(db_session, basic_update2) + validate_opportunity(db_session, basic_update3) + validate_opportunity(db_session, basic_update4) + + validate_opportunity(db_session, already_processed_insert, expect_in_db=False) + validate_opportunity(db_session, already_processed_update, expect_values_to_match=False) + + validate_opportunity(db_session, insert_that_will_fail, expect_in_db=False) + + metrics = transform_opportunity.metrics + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 11 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 2 + # Note this insert counts the case where the category fails + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 3 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 4 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 2 + + # Rerunning does mostly nothing, it will attempt to re-process the two that errored + # but otherwise won't find anything else + db_session.commit() # commit to end any existing transactions as run_subtask starts a new one + transform_opportunity.run_subtask() + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 13 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 2 + # Note this insert counts the case where the category fails + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 3 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 4 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 4 + + def test_process_opportunity_delete_but_current_missing( + self, db_session, transform_opportunity + ): + # Verify an error is raised when we try to delete something that doesn't exist + delete_but_current_missing = setup_opportunity(create_existing=False, is_delete=True) + + with pytest.raises( + ValueError, match="Cannot delete opportunity record as it does not exist" + ): + transform_opportunity.process_opportunity(delete_but_current_missing, None) + + validate_opportunity(db_session, delete_but_current_missing, expect_in_db=False) + + def test_process_opportunity_invalid_category(self, db_session, transform_opportunity): + # This will error in the transform as that isn't a category we have configured + insert_that_will_fail = setup_opportunity( + create_existing=False, source_values={"oppcategory": "X"} + ) + + with pytest.raises(ValueError, match="Unrecognized opportunity category"): + transform_opportunity.process_opportunity(insert_that_will_fail, None) + + validate_opportunity(db_session, insert_that_will_fail, expect_in_db=False) diff --git a/api/tests/src/data_migration/transformation/subtask/test_transform_opportunity_summary.py b/api/tests/src/data_migration/transformation/subtask/test_transform_opportunity_summary.py new file mode 100644 index 000000000..5dcec4f56 --- /dev/null +++ b/api/tests/src/data_migration/transformation/subtask/test_transform_opportunity_summary.py @@ -0,0 +1,280 @@ +import pytest + +import src.data_migration.transformation.transform_constants as transform_constants +import tests.src.db.models.factories as f +from src.data_migration.transformation.subtask.transform_opportunity_summary import ( + TransformOpportunitySummary, +) +from tests.src.data_migration.transformation.conftest import ( + BaseTransformTestClass, + setup_synopsis_forecast, + validate_opportunity_summary, +) + + +class TestTransformOpportunitySummary(BaseTransformTestClass): + @pytest.fixture() + def transform_opportunity_summary(self, transform_oracle_data_task): + return TransformOpportunitySummary(transform_oracle_data_task) + + def test_process_opportunity_summaries(self, db_session, transform_opportunity_summary): + # Basic inserts + opportunity1 = f.OpportunityFactory.create( + no_current_summary=True, opportunity_assistance_listings=[] + ) + forecast_insert1 = setup_synopsis_forecast( + is_forecast=True, revision_number=None, create_existing=False, opportunity=opportunity1 + ) + synopsis_insert1 = setup_synopsis_forecast( + is_forecast=False, revision_number=None, create_existing=False, opportunity=opportunity1 + ) + forecast_hist_insert1 = setup_synopsis_forecast( + is_forecast=True, revision_number=1, create_existing=False, opportunity=opportunity1 + ) + synopsis_hist_insert1 = setup_synopsis_forecast( + is_forecast=False, revision_number=1, create_existing=False, opportunity=opportunity1 + ) + + # Mix of updates and inserts, somewhat resembling what happens when summary objects + # get moved to the historical table (we'd update the synopsis/forecast records, and create new historical) + opportunity2 = f.OpportunityFactory.create( + no_current_summary=True, opportunity_assistance_listings=[] + ) + forecast_update1 = setup_synopsis_forecast( + is_forecast=True, revision_number=None, create_existing=True, opportunity=opportunity2 + ) + synopsis_update1 = setup_synopsis_forecast( + is_forecast=False, revision_number=None, create_existing=True, opportunity=opportunity2 + ) + forecast_hist_update1 = setup_synopsis_forecast( + is_forecast=True, revision_number=1, create_existing=True, opportunity=opportunity2 + ) + synopsis_hist_update1 = setup_synopsis_forecast( + is_forecast=False, revision_number=1, create_existing=True, opportunity=opportunity2 + ) + forecast_hist_insert2 = setup_synopsis_forecast( + is_forecast=True, revision_number=2, create_existing=False, opportunity=opportunity2 + ) + synopsis_hist_insert2 = setup_synopsis_forecast( + is_forecast=False, revision_number=2, create_existing=False, opportunity=opportunity2 + ) + + # Mix of inserts, updates, and deletes + opportunity3 = f.OpportunityFactory.create( + no_current_summary=True, opportunity_assistance_listings=[] + ) + forecast_delete1 = setup_synopsis_forecast( + is_forecast=True, + revision_number=None, + create_existing=True, + is_delete=True, + opportunity=opportunity3, + ) + synopsis_delete1 = setup_synopsis_forecast( + is_forecast=False, + revision_number=None, + create_existing=True, + is_delete=True, + opportunity=opportunity3, + ) + forecast_hist_insert3 = setup_synopsis_forecast( + is_forecast=True, revision_number=2, create_existing=False, opportunity=opportunity3 + ) + synopsis_hist_update2 = setup_synopsis_forecast( + is_forecast=False, + revision_number=1, + create_existing=True, + source_values={"action_type": "D"}, + opportunity=opportunity3, + ) + + # A few error scenarios + opportunity4 = f.OpportunityFactory.create( + no_current_summary=True, opportunity_assistance_listings=[] + ) + forecast_delete_but_current_missing = setup_synopsis_forecast( + is_forecast=True, + revision_number=None, + create_existing=False, + is_delete=True, + opportunity=opportunity4, + ) + synopsis_update_invalid_yn_field = setup_synopsis_forecast( + is_forecast=False, + revision_number=None, + create_existing=True, + source_values={"sendmail": "E"}, + opportunity=opportunity4, + ) + synopsis_hist_insert_invalid_yn_field = setup_synopsis_forecast( + is_forecast=False, + revision_number=1, + create_existing=False, + source_values={"cost_sharing": "1"}, + opportunity=opportunity4, + ) + forecast_hist_update_invalid_action_type = setup_synopsis_forecast( + is_forecast=True, + revision_number=2, + create_existing=True, + source_values={"action_type": "X"}, + opportunity=opportunity4, + ) + + transform_opportunity_summary.run_subtask() + + validate_opportunity_summary(db_session, forecast_insert1) + validate_opportunity_summary(db_session, synopsis_insert1) + validate_opportunity_summary(db_session, forecast_hist_insert1) + validate_opportunity_summary(db_session, synopsis_hist_insert1) + validate_opportunity_summary(db_session, forecast_hist_insert2) + validate_opportunity_summary(db_session, synopsis_hist_insert2) + validate_opportunity_summary(db_session, forecast_hist_insert3) + + validate_opportunity_summary(db_session, forecast_update1) + validate_opportunity_summary(db_session, synopsis_update1) + validate_opportunity_summary(db_session, forecast_hist_update1) + validate_opportunity_summary(db_session, synopsis_hist_update1) + validate_opportunity_summary(db_session, synopsis_hist_update2) + + validate_opportunity_summary(db_session, forecast_delete1, expect_in_db=False) + validate_opportunity_summary(db_session, synopsis_delete1, expect_in_db=False) + + validate_opportunity_summary( + db_session, forecast_delete_but_current_missing, expect_in_db=False + ) + validate_opportunity_summary( + db_session, + synopsis_update_invalid_yn_field, + expect_in_db=True, + expect_values_to_match=False, + ) + validate_opportunity_summary( + db_session, synopsis_hist_insert_invalid_yn_field, expect_in_db=False + ) + validate_opportunity_summary( + db_session, + forecast_hist_update_invalid_action_type, + expect_in_db=True, + expect_values_to_match=False, + ) + + metrics = transform_opportunity_summary.metrics + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 18 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 7 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 5 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 3 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + # Rerunning will only attempt to re-process the errors, so total+errors goes up by 3 + db_session.commit() # commit to end any existing transactions as run_subtask starts a new one + transform_opportunity_summary.run_subtask() + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 21 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 7 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 5 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 6 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + @pytest.mark.parametrize( + "is_forecast,revision_number", [(True, None), (False, None), (True, 5), (False, 10)] + ) + def test_process_opportunity_summary_delete_but_current_missing( + self, db_session, transform_opportunity_summary, is_forecast, revision_number + ): + opportunity = f.OpportunityFactory.create( + no_current_summary=True, opportunity_assistance_listings=[] + ) + delete_but_current_missing = setup_synopsis_forecast( + is_forecast=is_forecast, + revision_number=revision_number, + create_existing=False, + is_delete=True, + opportunity=opportunity, + ) + + transform_opportunity_summary.process_opportunity_summary( + delete_but_current_missing, None, opportunity + ) + + validate_opportunity_summary(db_session, delete_but_current_missing, expect_in_db=False) + assert delete_but_current_missing.transformed_at is not None + assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" + + @pytest.mark.parametrize( + "is_forecast,revision_number,source_values,expected_error", + [ + (True, None, {"sendmail": "z"}, "Unexpected Y/N bool value: z"), + (False, None, {"cost_sharing": "v"}, "Unexpected Y/N bool value: v"), + (True, 5, {"action_type": "T"}, "Unexpected action type value: T"), + (False, 10, {"action_type": "5"}, "Unexpected action type value: 5"), + ], + ) + def test_process_opportunity_summary_invalid_value_errors( + self, + db_session, + transform_opportunity_summary, + is_forecast, + revision_number, + source_values, + expected_error, + ): + opportunity = f.OpportunityFactory.create( + no_current_summary=True, opportunity_assistance_listings=[] + ) + source_summary = setup_synopsis_forecast( + is_forecast=is_forecast, + revision_number=revision_number, + create_existing=False, + opportunity=opportunity, + source_values=source_values, + ) + + with pytest.raises(ValueError, match=expected_error): + transform_opportunity_summary.process_opportunity_summary( + source_summary, None, opportunity + ) + + @pytest.mark.parametrize("is_forecast", [True, False]) + def test_process_opportunity_summary_but_no_opportunity_non_hist( + self, + db_session, + transform_opportunity_summary, + is_forecast, + ): + source_record = setup_synopsis_forecast( + is_forecast=is_forecast, + revision_number=None, + create_existing=False, + opportunity=None, + source_values={"opportunity_id": 12121212}, + ) + + with pytest.raises( + ValueError, + match="Opportunity summary cannot be processed as the opportunity for it does not exist", + ): + transform_opportunity_summary.process_opportunity_summary(source_record, None, None) + + @pytest.mark.parametrize("is_forecast,revision_number", [(True, 10), (False, 9)]) + def test_process_opportunity_summary_but_no_opportunity_hist( + self, + db_session, + transform_opportunity_summary, + is_forecast, + revision_number, + ): + source_record = setup_synopsis_forecast( + is_forecast=is_forecast, + revision_number=revision_number, + create_existing=False, + opportunity=None, + source_values={"opportunity_id": 12121212}, + ) + + transform_opportunity_summary.process_opportunity_summary(source_record, None, None) + + validate_opportunity_summary(db_session, source_record, expect_in_db=False) + assert source_record.transformed_at is not None + assert source_record.transformation_notes == "orphaned_historical_record" diff --git a/api/tests/src/data_migration/transformation/test_transform_oracle_data_task.py b/api/tests/src/data_migration/transformation/test_transform_oracle_data_task.py index f4d07bfdd..21ca27c9a 100644 --- a/api/tests/src/data_migration/transformation/test_transform_oracle_data_task.py +++ b/api/tests/src/data_migration/transformation/test_transform_oracle_data_task.py @@ -1,2194 +1,24 @@ -from typing import Tuple - import pytest import tests.src.db.models.factories as f from src.constants.lookup_constants import ApplicantType, FundingCategory, FundingInstrument from src.data_migration.transformation.transform_oracle_data_task import TransformOracleDataTask from src.db.models import staging -from src.db.models.opportunity_models import ( - LinkOpportunitySummaryApplicantType, - LinkOpportunitySummaryFundingCategory, - LinkOpportunitySummaryFundingInstrument, - Opportunity, - OpportunityAssistanceListing, - OpportunitySummary, -) +from src.db.models.opportunity_models import Opportunity from tests.conftest import BaseTestClass - - -def setup_opportunity( - create_existing: bool, - is_delete: bool = False, - is_already_processed: bool = False, - source_values: dict | None = None, - all_fields_null: bool = False, -) -> staging.opportunity.Topportunity: - if source_values is None: - source_values = {} - - source_opportunity = f.StagingTopportunityFactory.create( - **source_values, - is_deleted=is_delete, - already_transformed=is_already_processed, - all_fields_null=all_fields_null, - cfdas=[], - ) - - if create_existing: - f.OpportunityFactory.create( - opportunity_id=source_opportunity.opportunity_id, - # set created_at/updated_at to an earlier time so its clear - # when they were last updated - timestamps_in_past=True, - ) - - return source_opportunity - - -def setup_cfda( - create_existing: bool, - is_delete: bool = False, - is_already_processed: bool = False, - source_values: dict | None = None, - all_fields_null: bool = False, - opportunity: Opportunity | None = None, -) -> staging.opportunity.TopportunityCfda: - if source_values is None: - source_values = {} - - # If you don't provide an opportunity, you need to provide an ID - if opportunity is not None: - source_values["opportunity_id"] = opportunity.opportunity_id - - source_cfda = f.StagingTopportunityCfdaFactory.create( - **source_values, - opportunity=None, # To override the factory trying to create something - is_deleted=is_delete, - already_transformed=is_already_processed, - all_fields_null=all_fields_null, - ) - - if create_existing: - f.OpportunityAssistanceListingFactory.create( - opportunity=opportunity, - opportunity_assistance_listing_id=source_cfda.opp_cfda_id, - # set created_at/updated_at to an earlier time so its clear - # when they were last updated - timestamps_in_past=True, - ) - - return source_cfda - - -def setup_synopsis_forecast( - is_forecast: bool, - revision_number: int | None, - create_existing: bool, - opportunity: Opportunity | None, - is_delete: bool = False, - is_already_processed: bool = False, - source_values: dict | None = None, -): - if source_values is None: - source_values = {} - - if is_forecast: - if revision_number is None: - factory_cls = f.StagingTforecastFactory - else: - factory_cls = f.StagingTforecastHistFactory - else: - if revision_number is None: - factory_cls = f.StagingTsynopsisFactory - else: - factory_cls = f.StagingTsynopsisHistFactory - - if revision_number is not None: - source_values["revision_number"] = revision_number - - if opportunity is not None: - source_values["opportunity_id"] = opportunity.opportunity_id - - source_summary = factory_cls.create( - **source_values, - opportunity=None, # To override the factory trying to create something - is_deleted=is_delete, - already_transformed=is_already_processed, - ) - - if create_existing: - f.OpportunitySummaryFactory.create( - opportunity=opportunity, is_forecast=is_forecast, revision_number=revision_number - ) - - return source_summary - - -def setup_applicant_type( - create_existing: bool, - opportunity_summary: OpportunitySummary, - legacy_lookup_value: str, - applicant_type: ApplicantType | None = None, - is_delete: bool = False, - is_already_processed: bool = False, - source_values: dict | None = None, -): - if create_existing and is_delete is False and applicant_type is None: - raise Exception( - "If create_existing is True, is_delete is False - must provide the properly converted / mapped value for applicant_type" - ) - - if source_values is None: - source_values = {} - - if opportunity_summary.is_forecast: - source_values["forecast"] = None - if opportunity_summary.revision_number is None: - factory_cls = f.StagingTapplicanttypesForecastFactory - else: - factory_cls = f.StagingTapplicanttypesForecastHistFactory - source_values["revision_number"] = opportunity_summary.revision_number - else: - source_values["synopsis"] = None - if opportunity_summary.revision_number is None: - factory_cls = f.StagingTapplicanttypesSynopsisFactory - else: - factory_cls = f.StagingTapplicanttypesSynopsisHistFactory - source_values["revision_number"] = opportunity_summary.revision_number - - source_applicant_type = factory_cls.create( - **source_values, - opportunity_id=opportunity_summary.opportunity_id, - is_deleted=is_delete, - already_transformed=is_already_processed, - at_id=legacy_lookup_value, - ) - - if create_existing: - if opportunity_summary.is_forecast: - legacy_id = source_applicant_type.at_frcst_id - else: - legacy_id = source_applicant_type.at_syn_id - - f.LinkOpportunitySummaryApplicantTypeFactory.create( - opportunity_summary=opportunity_summary, - legacy_applicant_type_id=legacy_id, - applicant_type=applicant_type, - ) - - return source_applicant_type - - -def setup_funding_instrument( - create_existing: bool, - opportunity_summary: OpportunitySummary, - legacy_lookup_value: str, - funding_instrument: FundingInstrument | None = None, - is_delete: bool = False, - is_already_processed: bool = False, - source_values: dict | None = None, -): - if create_existing and is_delete is False and funding_instrument is None: - raise Exception( - "If create_existing is True, is_delete is False - must provide the properly converted / mapped value for funding_instrument" - ) - - if source_values is None: - source_values = {} - - if opportunity_summary.is_forecast: - source_values["forecast"] = None - if opportunity_summary.revision_number is None: - factory_cls = f.StagingTfundinstrForecastFactory - else: - factory_cls = f.StagingTfundinstrForecastHistFactory - source_values["revision_number"] = opportunity_summary.revision_number - else: - source_values["synopsis"] = None - if opportunity_summary.revision_number is None: - factory_cls = f.StagingTfundinstrSynopsisFactory - else: - factory_cls = f.StagingTfundinstrSynopsisHistFactory - source_values["revision_number"] = opportunity_summary.revision_number - - source_funding_instrument = factory_cls.create( - **source_values, - opportunity_id=opportunity_summary.opportunity_id, - is_deleted=is_delete, - already_transformed=is_already_processed, - fi_id=legacy_lookup_value, - ) - - if create_existing: - if opportunity_summary.is_forecast: - legacy_id = source_funding_instrument.fi_frcst_id - else: - legacy_id = source_funding_instrument.fi_syn_id - - f.LinkOpportunitySummaryFundingInstrumentFactory.create( - opportunity_summary=opportunity_summary, - legacy_funding_instrument_id=legacy_id, - funding_instrument=funding_instrument, - ) - - return source_funding_instrument - - -def setup_funding_category( - create_existing: bool, - opportunity_summary: OpportunitySummary, - legacy_lookup_value: str, - funding_category: FundingCategory | None = None, - is_delete: bool = False, - is_already_processed: bool = False, - source_values: dict | None = None, -): - if create_existing and is_delete is False and funding_category is None: - raise Exception( - "If create_existing is True, is_delete is False - must provide the properly converted / mapped value for funding_category" - ) - - if source_values is None: - source_values = {} - - if opportunity_summary.is_forecast: - source_values["forecast"] = None - if opportunity_summary.revision_number is None: - factory_cls = f.StagingTfundactcatForecastFactory - else: - factory_cls = f.StagingTfundactcatForecastHistFactory - source_values["revision_number"] = opportunity_summary.revision_number - else: - source_values["synopsis"] = None - if opportunity_summary.revision_number is None: - factory_cls = f.StagingTfundactcatSynopsisFactory - else: - factory_cls = f.StagingTfundactcatSynopsisHistFactory - source_values["revision_number"] = opportunity_summary.revision_number - - source_funding_category = factory_cls.create( - **source_values, - opportunity_id=opportunity_summary.opportunity_id, - is_deleted=is_delete, - already_transformed=is_already_processed, - fac_id=legacy_lookup_value, - ) - - if create_existing: - if opportunity_summary.is_forecast: - legacy_id = source_funding_category.fac_frcst_id - else: - legacy_id = source_funding_category.fac_syn_id - - f.LinkOpportunitySummaryFundingCategoryFactory.create( - opportunity_summary=opportunity_summary, - legacy_funding_category_id=legacy_id, - funding_category=funding_category, - ) - - return source_funding_category - - -def validate_matching_fields( - source, destination, fields: list[Tuple[str, str]], expect_all_to_match: bool -): - mismatched_fields = [] - - for source_field, destination_field in fields: - source_value = getattr(source, source_field) - destination_value = getattr(destination, destination_field) - if source_value != destination_value: - mismatched_fields.append( - f"{source_field}/{destination_field}: '{source_value}' != '{destination_value}'" - ) - - # If a values weren't copied in an update - # then we should expect most things to not match, - # but randomness in the factories might cause some overlap - if expect_all_to_match: - assert ( - len(mismatched_fields) == 0 - ), f"Expected all fields to match between {source.__class__} and {destination.__class__}, but found mismatched fields: {','.join(mismatched_fields)}" - else: - assert ( - len(mismatched_fields) != 0 - ), f"Did not expect all fields to match between {source.__class__} and {destination.__class__}, but they did which means an unexpected update occurred" - - -def validate_opportunity( - db_session, - source_opportunity: staging.opportunity.Topportunity, - expect_in_db: bool = True, - expect_values_to_match: bool = True, -): - opportunity = ( - db_session.query(Opportunity) - .filter(Opportunity.opportunity_id == source_opportunity.opportunity_id) - .one_or_none() - ) - - if not expect_in_db: - assert opportunity is None - return - - assert opportunity is not None - # For fields that we expect to match 1:1, verify that they match as expected - validate_matching_fields( - source_opportunity, - opportunity, - [ - ("oppnumber", "opportunity_number"), - ("opptitle", "opportunity_title"), - ("owningagency", "agency"), - ("category_explanation", "category_explanation"), - ("revision_number", "revision_number"), - ("modified_comments", "modified_comments"), - ("publisheruid", "publisher_user_id"), - ("publisher_profile_id", "publisher_profile_id"), - ], - expect_values_to_match, - ) - - # Validation of fields that aren't copied exactly - if expect_values_to_match: - # Deliberately validating is_draft with a different calculation - if source_opportunity.is_draft == "N": - assert opportunity.is_draft is False - else: - assert opportunity.is_draft is True - - -def validate_assistance_listing( - db_session, - source_cfda: staging.opportunity.TopportunityCfda, - expect_in_db: bool = True, - expect_values_to_match: bool = True, -): - assistance_listing = ( - db_session.query(OpportunityAssistanceListing) - .filter( - OpportunityAssistanceListing.opportunity_assistance_listing_id - == source_cfda.opp_cfda_id - ) - .one_or_none() - ) - - if not expect_in_db: - assert assistance_listing is None - return - - assert assistance_listing is not None - # For fields that we expect to match 1:1, verify that they match as expected - validate_matching_fields( - source_cfda, - assistance_listing, - [ - ("cfdanumber", "assistance_listing_number"), - ("programtitle", "program_title"), - ], - expect_values_to_match, - ) - - -def get_summary_from_source(db_session, source_summary): - revision_number = None - is_forecast = source_summary.is_forecast - if isinstance(source_summary, (staging.synopsis.TsynopsisHist, staging.forecast.TforecastHist)): - revision_number = source_summary.revision_number - - opportunity_summary = ( - db_session.query(OpportunitySummary) - .filter( - OpportunitySummary.opportunity_id == source_summary.opportunity_id, - OpportunitySummary.revision_number == revision_number, - OpportunitySummary.is_forecast == is_forecast, - # Populate existing to force it to fetch updates from the DB - ) - .execution_options(populate_existing=True) - .one_or_none() - ) - - return opportunity_summary - - -def validate_opportunity_summary( - db_session, source_summary, expect_in_db: bool = True, expect_values_to_match: bool = True -): - opportunity_summary = get_summary_from_source(db_session, source_summary) - - if not expect_in_db: - assert opportunity_summary is None - return - - matching_fields = [ - ("version_nbr", "version_number"), - ("posting_date", "post_date"), - ("archive_date", "archive_date"), - ("fd_link_url", "additional_info_url"), - ("fd_link_desc", "additional_info_url_description"), - ("modification_comments", "modification_comments"), - ("oth_cat_fa_desc", "funding_category_description"), - ("applicant_elig_desc", "applicant_eligibility_description"), - ("ac_name", "agency_name"), - ("ac_email_addr", "agency_email_address"), - ("ac_email_desc", "agency_email_address_description"), - ("publisher_profile_id", "publisher_profile_id"), - ("publisheruid", "publisher_user_id"), - ("last_upd_id", "updated_by"), - ("creator_id", "created_by"), - ] - - if isinstance(source_summary, (staging.synopsis.Tsynopsis, staging.synopsis.TsynopsisHist)): - matching_fields.extend( - [ - ("syn_desc", "summary_description"), - ("a_sa_code", "agency_code"), - ("ac_phone_number", "agency_phone_number"), - ("agency_contact_desc", "agency_contact_description"), - ("response_date", "close_date"), - ("response_date_desc", "close_date_description"), - ("unarchive_date", "unarchive_date"), - ] - ) - else: # Forecast+ForecastHist - matching_fields.extend( - [ - ("forecast_desc", "summary_description"), - ("agency_code", "agency_code"), - ("ac_phone", "agency_phone_number"), - ("est_synopsis_posting_date", "forecasted_post_date"), - ("est_appl_response_date", "forecasted_close_date"), - ("est_appl_response_date_desc", "forecasted_close_date_description"), - ("est_award_date", "forecasted_award_date"), - ("est_project_start_date", "forecasted_project_start_date"), - ("fiscal_year", "fiscal_year"), - ] - ) - - # History only fields - is_deleted = False - if isinstance(source_summary, (staging.synopsis.TsynopsisHist, staging.forecast.TforecastHist)): - matching_fields.extend([("revision_number", "revision_number")]) - - is_deleted = source_summary.action_type == "D" - - assert opportunity_summary is not None - validate_matching_fields( - source_summary, opportunity_summary, matching_fields, expect_values_to_match - ) - - assert opportunity_summary.is_deleted == is_deleted - - -def validate_summary_and_nested( - db_session, - source_summary, - expected_applicant_types: list[ApplicantType], - expected_funding_categories: list[FundingCategory], - expected_funding_instruments: list[FundingInstrument], - expect_in_db: bool = True, - expect_values_to_match: bool = True, -): - validate_opportunity_summary(db_session, source_summary, expect_in_db, expect_values_to_match) - - if not expect_in_db: - return - - created_record = get_summary_from_source(db_session, source_summary) - - assert set(created_record.applicant_types) == set(expected_applicant_types) - assert set(created_record.funding_categories) == set(expected_funding_categories) - assert set(created_record.funding_instruments) == set(expected_funding_instruments) - - -def validate_applicant_type( - db_session, - source_applicant_type, - expect_in_db: bool = True, - expected_applicant_type: ApplicantType | None = None, - was_processed: bool = True, - expect_values_to_match: bool = True, -): - assert (source_applicant_type.transformed_at is not None) == was_processed - - # In order to properly find the link table value, need to first determine - # the opportunity summary in a subquery - opportunity_summary_id = ( - db_session.query(OpportunitySummary.opportunity_summary_id) - .filter( - OpportunitySummary.revision_number == source_applicant_type.revision_number, - OpportunitySummary.is_forecast == source_applicant_type.is_forecast, - OpportunitySummary.opportunity_id == source_applicant_type.opportunity_id, - ) - .scalar() - ) - - link_applicant_type = ( - db_session.query(LinkOpportunitySummaryApplicantType) - .filter( - LinkOpportunitySummaryApplicantType.legacy_applicant_type_id - == source_applicant_type.legacy_applicant_type_id, - LinkOpportunitySummaryApplicantType.opportunity_summary_id == opportunity_summary_id, - ) - .one_or_none() - ) - - if not expect_in_db: - assert link_applicant_type is None - return - - assert link_applicant_type is not None - assert link_applicant_type.applicant_type == expected_applicant_type - - validate_matching_fields( - source_applicant_type, - link_applicant_type, - [("creator_id", "created_by"), ("last_upd_id", "updated_by")], - expect_values_to_match, - ) - - -def validate_funding_instrument( - db_session, - source_funding_instrument, - expect_in_db: bool = True, - expected_funding_instrument: FundingInstrument | None = None, - was_processed: bool = True, - expect_values_to_match: bool = True, -): - assert (source_funding_instrument.transformed_at is not None) == was_processed - - # In order to properly find the link table value, need to first determine - # the opportunity summary in a subquery - opportunity_summary_id = ( - db_session.query(OpportunitySummary.opportunity_summary_id) - .filter( - OpportunitySummary.revision_number == source_funding_instrument.revision_number, - OpportunitySummary.is_forecast == source_funding_instrument.is_forecast, - OpportunitySummary.opportunity_id == source_funding_instrument.opportunity_id, - ) - .scalar() - ) - - link_funding_instrument = ( - db_session.query(LinkOpportunitySummaryFundingInstrument) - .filter( - LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id - == source_funding_instrument.legacy_funding_instrument_id, - LinkOpportunitySummaryFundingInstrument.opportunity_summary_id - == opportunity_summary_id, - ) - .one_or_none() - ) - - if not expect_in_db: - assert link_funding_instrument is None - return - - assert link_funding_instrument is not None - assert link_funding_instrument.funding_instrument == expected_funding_instrument - - validate_matching_fields( - source_funding_instrument, - link_funding_instrument, - [("creator_id", "created_by"), ("last_upd_id", "updated_by")], - expect_values_to_match, - ) - - -def validate_funding_category( - db_session, - source_funding_category, - expect_in_db: bool = True, - expected_funding_category: FundingCategory | None = None, - was_processed: bool = True, - expect_values_to_match: bool = True, -): - assert (source_funding_category.transformed_at is not None) == was_processed - - # In order to properly find the link table value, need to first determine - # the opportunity summary in a subquery - opportunity_summary_id = ( - db_session.query(OpportunitySummary.opportunity_summary_id) - .filter( - OpportunitySummary.revision_number == source_funding_category.revision_number, - OpportunitySummary.is_forecast == source_funding_category.is_forecast, - OpportunitySummary.opportunity_id == source_funding_category.opportunity_id, - ) - .scalar() - ) - - link_funding_category = ( - db_session.query(LinkOpportunitySummaryFundingCategory) - .filter( - LinkOpportunitySummaryFundingCategory.legacy_funding_category_id - == source_funding_category.legacy_funding_category_id, - LinkOpportunitySummaryFundingCategory.opportunity_summary_id == opportunity_summary_id, - ) - .one_or_none() - ) - - if not expect_in_db: - assert link_funding_category is None - return - - assert link_funding_category is not None - assert link_funding_category.funding_category == expected_funding_category - - validate_matching_fields( - source_funding_category, - link_funding_category, - [("creator_id", "created_by"), ("last_upd_id", "updated_by")], - expect_values_to_match, - ) - - -class TestTransformOpportunity(BaseTestClass): - @pytest.fixture() - def transform_oracle_data_task( - self, db_session, enable_factory_create, truncate_opportunities - ) -> TransformOracleDataTask: - return TransformOracleDataTask(db_session) - - def test_process_opportunities(self, db_session, transform_oracle_data_task): - ordinary_delete = setup_opportunity( - create_existing=True, is_delete=True, all_fields_null=True - ) - ordinary_delete2 = setup_opportunity( - create_existing=True, is_delete=True, all_fields_null=False - ) - delete_but_current_missing = setup_opportunity(create_existing=False, is_delete=True) - - basic_insert = setup_opportunity(create_existing=False) - basic_insert2 = setup_opportunity(create_existing=False, all_fields_null=True) - basic_insert3 = setup_opportunity(create_existing=False) - - basic_update = setup_opportunity( - create_existing=True, - ) - basic_update2 = setup_opportunity(create_existing=True, all_fields_null=True) - basic_update3 = setup_opportunity(create_existing=True, all_fields_null=True) - basic_update4 = setup_opportunity(create_existing=True) - - # Something else deleted it - already_processed_insert = setup_opportunity( - create_existing=False, is_already_processed=True - ) - already_processed_update = setup_opportunity( - create_existing=True, is_already_processed=True - ) - - insert_that_will_fail = setup_opportunity( - create_existing=False, source_values={"oppcategory": "X"} - ) - - transform_oracle_data_task.process_opportunities() - - validate_opportunity(db_session, ordinary_delete, expect_in_db=False) - validate_opportunity(db_session, ordinary_delete2, expect_in_db=False) - validate_opportunity(db_session, delete_but_current_missing, expect_in_db=False) - - validate_opportunity(db_session, basic_insert) - validate_opportunity(db_session, basic_insert2) - validate_opportunity(db_session, basic_insert3) - - validate_opportunity(db_session, basic_update) - validate_opportunity(db_session, basic_update2) - validate_opportunity(db_session, basic_update3) - validate_opportunity(db_session, basic_update4) - - validate_opportunity(db_session, already_processed_insert, expect_in_db=False) - validate_opportunity(db_session, already_processed_update, expect_values_to_match=False) - - validate_opportunity(db_session, insert_that_will_fail, expect_in_db=False) - - metrics = transform_oracle_data_task.metrics - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 11 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 2 - # Note this insert counts the case where the category fails - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 3 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 4 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 2 - - # Rerunning does mostly nothing, it will attempt to re-process the two that errored - # but otherwise won't find anything else - transform_oracle_data_task.process_opportunities() - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 13 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 2 - # Note this insert counts the case where the category fails - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 3 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 4 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 4 - - def test_process_opportunity_delete_but_current_missing( - self, db_session, transform_oracle_data_task - ): - # Verify an error is raised when we try to delete something that doesn't exist - delete_but_current_missing = setup_opportunity(create_existing=False, is_delete=True) - - with pytest.raises( - ValueError, match="Cannot delete opportunity record as it does not exist" - ): - transform_oracle_data_task.process_opportunity(delete_but_current_missing, None) - - validate_opportunity(db_session, delete_but_current_missing, expect_in_db=False) - - def test_process_opportunity_invalid_category(self, db_session, transform_oracle_data_task): - # This will error in the transform as that isn't a category we have configured - insert_that_will_fail = setup_opportunity( - create_existing=False, source_values={"oppcategory": "X"} - ) - - with pytest.raises(ValueError, match="Unrecognized opportunity category"): - transform_oracle_data_task.process_opportunity(insert_that_will_fail, None) - - validate_opportunity(db_session, insert_that_will_fail, expect_in_db=False) - - -class TestTransformAssistanceListing(BaseTestClass): - @pytest.fixture() - def transform_oracle_data_task( - self, db_session, enable_factory_create, truncate_opportunities - ) -> TransformOracleDataTask: - return TransformOracleDataTask(db_session) - - def test_process_opportunity_assistance_listings(self, db_session, transform_oracle_data_task): - opportunity1 = f.OpportunityFactory.create(opportunity_assistance_listings=[]) - cfda_insert1 = setup_cfda(create_existing=False, opportunity=opportunity1) - cfda_insert2 = setup_cfda(create_existing=False, opportunity=opportunity1) - cfda_update1 = setup_cfda(create_existing=True, opportunity=opportunity1) - cfda_delete1 = setup_cfda(create_existing=True, is_delete=True, opportunity=opportunity1) - cfda_update_already_processed1 = setup_cfda( - create_existing=True, is_already_processed=True, opportunity=opportunity1 - ) - - opportunity2 = f.OpportunityFactory.create(opportunity_assistance_listings=[]) - cfda_insert3 = setup_cfda(create_existing=False, opportunity=opportunity2) - cfda_update_already_processed2 = setup_cfda( - create_existing=True, is_already_processed=True, opportunity=opportunity2 - ) - cfda_delete_already_processed1 = setup_cfda( - create_existing=False, - is_already_processed=True, - is_delete=True, - opportunity=opportunity2, - ) - cfda_delete2 = setup_cfda(create_existing=True, is_delete=True, opportunity=opportunity2) - - opportunity3 = f.OpportunityFactory.create(opportunity_assistance_listings=[]) - cfda_update2 = setup_cfda(create_existing=True, opportunity=opportunity3) - cfda_delete_but_current_missing = setup_cfda( - create_existing=False, is_delete=True, opportunity=opportunity3 - ) - - cfda_insert_without_opportunity = setup_cfda( - create_existing=False, source_values={"opportunity_id": 12345678}, opportunity=None - ) - cfda_delete_without_opportunity = setup_cfda( - create_existing=False, source_values={"opportunity_id": 34567890}, opportunity=None - ) - - transform_oracle_data_task.process_assistance_listings() - - validate_assistance_listing(db_session, cfda_insert1) - validate_assistance_listing(db_session, cfda_insert2) - validate_assistance_listing(db_session, cfda_insert3) - validate_assistance_listing(db_session, cfda_update1) - validate_assistance_listing(db_session, cfda_update2) - validate_assistance_listing(db_session, cfda_delete1, expect_in_db=False) - validate_assistance_listing(db_session, cfda_delete2, expect_in_db=False) - - # Records that won't have been fetched - validate_assistance_listing( - db_session, - cfda_update_already_processed1, - expect_in_db=True, - expect_values_to_match=False, - ) - validate_assistance_listing( - db_session, - cfda_update_already_processed2, - expect_in_db=True, - expect_values_to_match=False, - ) - validate_assistance_listing(db_session, cfda_delete_already_processed1, expect_in_db=False) - - validate_assistance_listing(db_session, cfda_delete_but_current_missing, expect_in_db=False) - - validate_assistance_listing(db_session, cfda_insert_without_opportunity, expect_in_db=False) - validate_assistance_listing(db_session, cfda_delete_without_opportunity, expect_in_db=False) - - metrics = transform_oracle_data_task.metrics - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 10 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 3 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_ORPHANED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - # Rerunning finds nothing - no metrics update - transform_oracle_data_task.process_assistance_listings() - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 10 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 3 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_ORPHANED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - def test_process_assistance_listing_orphaned_record( - self, db_session, transform_oracle_data_task - ): - cfda_insert_without_opportunity = setup_cfda( - create_existing=False, source_values={"opportunity_id": 987654321}, opportunity=None - ) - - # Verify it gets marked as transformed - assert cfda_insert_without_opportunity.transformed_at is None - transform_oracle_data_task.process_assistance_listing( - cfda_insert_without_opportunity, None, None - ) - assert cfda_insert_without_opportunity.transformed_at is not None - assert cfda_insert_without_opportunity.transformation_notes == "orphaned_cfda" - assert ( - transform_oracle_data_task.metrics[ - transform_oracle_data_task.Metrics.TOTAL_RECORDS_ORPHANED - ] - == 1 - ) - - # Verify nothing actually gets created - opportunity = ( - db_session.query(Opportunity) - .filter(Opportunity.opportunity_id == cfda_insert_without_opportunity.opportunity_id) - .one_or_none() - ) - assert opportunity is None - assistance_listing = ( - db_session.query(OpportunityAssistanceListing) - .filter( - OpportunityAssistanceListing.opportunity_assistance_listing_id - == cfda_insert_without_opportunity.opp_cfda_id - ) - .one_or_none() - ) - assert assistance_listing is None - - def test_process_assistance_listing_delete_but_current_missing( - self, db_session, transform_oracle_data_task - ): - opportunity = f.OpportunityFactory.create(opportunity_assistance_listings=[]) - delete_but_current_missing = setup_cfda( - create_existing=False, is_delete=True, opportunity=opportunity - ) - - transform_oracle_data_task.process_assistance_listing( - delete_but_current_missing, None, opportunity - ) - - validate_assistance_listing(db_session, delete_but_current_missing, expect_in_db=False) - assert delete_but_current_missing.transformed_at is not None - assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" - - -class TestTransformOpportunitySummary(BaseTestClass): - @pytest.fixture() - def transform_oracle_data_task( - self, db_session, enable_factory_create, truncate_opportunities - ) -> TransformOracleDataTask: - return TransformOracleDataTask(db_session) - - def test_process_opportunity_summaries(self, db_session, transform_oracle_data_task): - # Basic inserts - opportunity1 = f.OpportunityFactory.create( - no_current_summary=True, opportunity_assistance_listings=[] - ) - forecast_insert1 = setup_synopsis_forecast( - is_forecast=True, revision_number=None, create_existing=False, opportunity=opportunity1 - ) - synopsis_insert1 = setup_synopsis_forecast( - is_forecast=False, revision_number=None, create_existing=False, opportunity=opportunity1 - ) - forecast_hist_insert1 = setup_synopsis_forecast( - is_forecast=True, revision_number=1, create_existing=False, opportunity=opportunity1 - ) - synopsis_hist_insert1 = setup_synopsis_forecast( - is_forecast=False, revision_number=1, create_existing=False, opportunity=opportunity1 - ) - - # Mix of updates and inserts, somewhat resembling what happens when summary objects - # get moved to the historical table (we'd update the synopsis/forecast records, and create new historical) - opportunity2 = f.OpportunityFactory.create( - no_current_summary=True, opportunity_assistance_listings=[] - ) - forecast_update1 = setup_synopsis_forecast( - is_forecast=True, revision_number=None, create_existing=True, opportunity=opportunity2 - ) - synopsis_update1 = setup_synopsis_forecast( - is_forecast=False, revision_number=None, create_existing=True, opportunity=opportunity2 - ) - forecast_hist_update1 = setup_synopsis_forecast( - is_forecast=True, revision_number=1, create_existing=True, opportunity=opportunity2 - ) - synopsis_hist_update1 = setup_synopsis_forecast( - is_forecast=False, revision_number=1, create_existing=True, opportunity=opportunity2 - ) - forecast_hist_insert2 = setup_synopsis_forecast( - is_forecast=True, revision_number=2, create_existing=False, opportunity=opportunity2 - ) - synopsis_hist_insert2 = setup_synopsis_forecast( - is_forecast=False, revision_number=2, create_existing=False, opportunity=opportunity2 - ) - - # Mix of inserts, updates, and deletes - opportunity3 = f.OpportunityFactory.create( - no_current_summary=True, opportunity_assistance_listings=[] - ) - forecast_delete1 = setup_synopsis_forecast( - is_forecast=True, - revision_number=None, - create_existing=True, - is_delete=True, - opportunity=opportunity3, - ) - synopsis_delete1 = setup_synopsis_forecast( - is_forecast=False, - revision_number=None, - create_existing=True, - is_delete=True, - opportunity=opportunity3, - ) - forecast_hist_insert3 = setup_synopsis_forecast( - is_forecast=True, revision_number=2, create_existing=False, opportunity=opportunity3 - ) - synopsis_hist_update2 = setup_synopsis_forecast( - is_forecast=False, - revision_number=1, - create_existing=True, - source_values={"action_type": "D"}, - opportunity=opportunity3, - ) - - # A few error scenarios - opportunity4 = f.OpportunityFactory.create( - no_current_summary=True, opportunity_assistance_listings=[] - ) - forecast_delete_but_current_missing = setup_synopsis_forecast( - is_forecast=True, - revision_number=None, - create_existing=False, - is_delete=True, - opportunity=opportunity4, - ) - synopsis_update_invalid_yn_field = setup_synopsis_forecast( - is_forecast=False, - revision_number=None, - create_existing=True, - source_values={"sendmail": "E"}, - opportunity=opportunity4, - ) - synopsis_hist_insert_invalid_yn_field = setup_synopsis_forecast( - is_forecast=False, - revision_number=1, - create_existing=False, - source_values={"cost_sharing": "1"}, - opportunity=opportunity4, - ) - forecast_hist_update_invalid_action_type = setup_synopsis_forecast( - is_forecast=True, - revision_number=2, - create_existing=True, - source_values={"action_type": "X"}, - opportunity=opportunity4, - ) - - transform_oracle_data_task.process_opportunity_summaries() - - validate_opportunity_summary(db_session, forecast_insert1) - validate_opportunity_summary(db_session, synopsis_insert1) - validate_opportunity_summary(db_session, forecast_hist_insert1) - validate_opportunity_summary(db_session, synopsis_hist_insert1) - validate_opportunity_summary(db_session, forecast_hist_insert2) - validate_opportunity_summary(db_session, synopsis_hist_insert2) - validate_opportunity_summary(db_session, forecast_hist_insert3) - - validate_opportunity_summary(db_session, forecast_update1) - validate_opportunity_summary(db_session, synopsis_update1) - validate_opportunity_summary(db_session, forecast_hist_update1) - validate_opportunity_summary(db_session, synopsis_hist_update1) - validate_opportunity_summary(db_session, synopsis_hist_update2) - - validate_opportunity_summary(db_session, forecast_delete1, expect_in_db=False) - validate_opportunity_summary(db_session, synopsis_delete1, expect_in_db=False) - - validate_opportunity_summary( - db_session, forecast_delete_but_current_missing, expect_in_db=False - ) - validate_opportunity_summary( - db_session, - synopsis_update_invalid_yn_field, - expect_in_db=True, - expect_values_to_match=False, - ) - validate_opportunity_summary( - db_session, synopsis_hist_insert_invalid_yn_field, expect_in_db=False - ) - validate_opportunity_summary( - db_session, - forecast_hist_update_invalid_action_type, - expect_in_db=True, - expect_values_to_match=False, - ) - - metrics = transform_oracle_data_task.metrics - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 18 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 7 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 5 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 3 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - # Rerunning will only attempt to re-process the errors, so total+errors goes up by 3 - transform_oracle_data_task.process_opportunity_summaries() - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 21 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 7 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 5 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 6 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - @pytest.mark.parametrize( - "is_forecast,revision_number", [(True, None), (False, None), (True, 5), (False, 10)] - ) - def test_process_opportunity_summary_delete_but_current_missing( - self, db_session, transform_oracle_data_task, is_forecast, revision_number - ): - opportunity = f.OpportunityFactory.create( - no_current_summary=True, opportunity_assistance_listings=[] - ) - delete_but_current_missing = setup_synopsis_forecast( - is_forecast=is_forecast, - revision_number=revision_number, - create_existing=False, - is_delete=True, - opportunity=opportunity, - ) - - transform_oracle_data_task.process_opportunity_summary( - delete_but_current_missing, None, opportunity - ) - - validate_opportunity_summary(db_session, delete_but_current_missing, expect_in_db=False) - assert delete_but_current_missing.transformed_at is not None - assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" - - @pytest.mark.parametrize( - "is_forecast,revision_number,source_values,expected_error", - [ - (True, None, {"sendmail": "z"}, "Unexpected Y/N bool value: z"), - (False, None, {"cost_sharing": "v"}, "Unexpected Y/N bool value: v"), - (True, 5, {"action_type": "T"}, "Unexpected action type value: T"), - (False, 10, {"action_type": "5"}, "Unexpected action type value: 5"), - ], - ) - def test_process_opportunity_summary_invalid_value_errors( - self, - db_session, - transform_oracle_data_task, - is_forecast, - revision_number, - source_values, - expected_error, - ): - opportunity = f.OpportunityFactory.create( - no_current_summary=True, opportunity_assistance_listings=[] - ) - source_summary = setup_synopsis_forecast( - is_forecast=is_forecast, - revision_number=revision_number, - create_existing=False, - opportunity=opportunity, - source_values=source_values, - ) - - with pytest.raises(ValueError, match=expected_error): - transform_oracle_data_task.process_opportunity_summary( - source_summary, None, opportunity - ) - - @pytest.mark.parametrize("is_forecast", [True, False]) - def test_process_opportunity_summary_but_no_opportunity_non_hist( - self, - db_session, - transform_oracle_data_task, - is_forecast, - ): - source_record = setup_synopsis_forecast( - is_forecast=is_forecast, - revision_number=None, - create_existing=False, - opportunity=None, - source_values={"opportunity_id": 12121212}, - ) - - with pytest.raises( - ValueError, - match="Opportunity summary cannot be processed as the opportunity for it does not exist", - ): - transform_oracle_data_task.process_opportunity_summary(source_record, None, None) - - @pytest.mark.parametrize("is_forecast,revision_number", [(True, 10), (False, 9)]) - def test_process_opportunity_summary_but_no_opportunity_hist( - self, - db_session, - transform_oracle_data_task, - is_forecast, - revision_number, - ): - source_record = setup_synopsis_forecast( - is_forecast=is_forecast, - revision_number=revision_number, - create_existing=False, - opportunity=None, - source_values={"opportunity_id": 12121212}, - ) - - transform_oracle_data_task.process_opportunity_summary(source_record, None, None) - - validate_opportunity_summary(db_session, source_record, expect_in_db=False) - assert source_record.transformed_at is not None - assert source_record.transformation_notes == "orphaned_historical_record" - - -class TestTransformApplicantType(BaseTestClass): - @pytest.fixture() - def transform_oracle_data_task( - self, db_session, enable_factory_create, truncate_opportunities - ) -> TransformOracleDataTask: - return TransformOracleDataTask(db_session) - - def test_process_applicant_types(self, db_session, transform_oracle_data_task): - opportunity_summary_forecast = f.OpportunitySummaryFactory.create( - is_forecast=True, revision_number=None, no_link_values=True - ) - forecast_insert1 = setup_applicant_type( - create_existing=False, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="00", - ) - forecast_update1 = setup_applicant_type( - create_existing=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="01", - applicant_type=ApplicantType.COUNTY_GOVERNMENTS, - ) - forecast_update2 = setup_applicant_type( - create_existing=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="02", - applicant_type=ApplicantType.CITY_OR_TOWNSHIP_GOVERNMENTS, - ) - forecast_delete1 = setup_applicant_type( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="04", - applicant_type=ApplicantType.SPECIAL_DISTRICT_GOVERNMENTS, - ) - forecast_delete2 = setup_applicant_type( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="05", - applicant_type=ApplicantType.INDEPENDENT_SCHOOL_DISTRICTS, - ) - forecast_update_already_processed = setup_applicant_type( - create_existing=True, - is_already_processed=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="06", - applicant_type=ApplicantType.PUBLIC_AND_STATE_INSTITUTIONS_OF_HIGHER_EDUCATION, - ) - - opportunity_summary_forecast_hist = f.OpportunitySummaryFactory.create( - is_forecast=True, revision_number=3, no_link_values=True - ) - forecast_hist_insert1 = setup_applicant_type( - create_existing=False, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="07", - ) - forecast_hist_update1 = setup_applicant_type( - create_existing=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="08", - applicant_type=ApplicantType.PUBLIC_AND_INDIAN_HOUSING_AUTHORITIES, - ) - forecast_hist_update2 = setup_applicant_type( - create_existing=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="11", - applicant_type=ApplicantType.OTHER_NATIVE_AMERICAN_TRIBAL_ORGANIZATIONS, - ) - forecast_hist_delete1 = setup_applicant_type( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="12", - applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITH_501C3, - ) - forecast_hist_delete_already_processed = setup_applicant_type( - create_existing=False, - is_delete=True, - is_already_processed=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="13", - ) - forecast_hist_duplicate_insert = setup_applicant_type( - create_existing=False, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="08", - ) - - opportunity_summary_syn = f.OpportunitySummaryFactory.create( - is_forecast=False, revision_number=None, no_link_values=True - ) - syn_insert1 = setup_applicant_type( - create_existing=False, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="20", - ) - syn_insert2 = setup_applicant_type( - create_existing=False, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="21", - ) - syn_update1 = setup_applicant_type( - create_existing=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="22", - applicant_type=ApplicantType.FOR_PROFIT_ORGANIZATIONS_OTHER_THAN_SMALL_BUSINESSES, - ) - syn_update2 = setup_applicant_type( - create_existing=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="23", - applicant_type=ApplicantType.SMALL_BUSINESSES, - ) - syn_delete1 = setup_applicant_type( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="25", - applicant_type=ApplicantType.OTHER, - ) - syn_delete2 = setup_applicant_type( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="99", - applicant_type=ApplicantType.UNRESTRICTED, - ) - syn_delete_but_current_missing = setup_applicant_type( - create_existing=False, - is_delete=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="07", - ) - syn_update_already_processed = setup_applicant_type( - create_existing=True, - is_already_processed=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="08", - applicant_type=ApplicantType.PUBLIC_AND_INDIAN_HOUSING_AUTHORITIES, - ) - - opportunity_summary_syn_hist = f.OpportunitySummaryFactory.create( - is_forecast=False, revision_number=21, no_link_values=True - ) - syn_hist_insert1 = setup_applicant_type( - create_existing=False, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="11", - ) - syn_hist_update1 = setup_applicant_type( - create_existing=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="12", - applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITH_501C3, - ) - syn_hist_update2 = setup_applicant_type( - create_existing=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="13", - applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITHOUT_501C3, - ) - syn_hist_delete1 = setup_applicant_type( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="25", - applicant_type=ApplicantType.OTHER, - ) - syn_hist_delete2 = setup_applicant_type( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="99", - applicant_type=ApplicantType.UNRESTRICTED, - ) - syn_hist_insert_invalid_type = setup_applicant_type( - create_existing=False, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="X", - applicant_type=ApplicantType.STATE_GOVERNMENTS, - ) - - transform_oracle_data_task.process_link_applicant_types() - - validate_applicant_type( - db_session, forecast_insert1, expected_applicant_type=ApplicantType.STATE_GOVERNMENTS - ) - validate_applicant_type( - db_session, - forecast_hist_insert1, - expected_applicant_type=ApplicantType.FEDERALLY_RECOGNIZED_NATIVE_AMERICAN_TRIBAL_GOVERNMENTS, - ) - validate_applicant_type( - db_session, - syn_insert1, - expected_applicant_type=ApplicantType.PRIVATE_INSTITUTIONS_OF_HIGHER_EDUCATION, - ) - validate_applicant_type( - db_session, syn_insert2, expected_applicant_type=ApplicantType.INDIVIDUALS - ) - validate_applicant_type( - db_session, - syn_hist_insert1, - expected_applicant_type=ApplicantType.OTHER_NATIVE_AMERICAN_TRIBAL_ORGANIZATIONS, - ) - - validate_applicant_type( - db_session, forecast_update1, expected_applicant_type=ApplicantType.COUNTY_GOVERNMENTS - ) - validate_applicant_type( - db_session, - forecast_update2, - expected_applicant_type=ApplicantType.CITY_OR_TOWNSHIP_GOVERNMENTS, - ) - validate_applicant_type( - db_session, - forecast_hist_update1, - expected_applicant_type=ApplicantType.PUBLIC_AND_INDIAN_HOUSING_AUTHORITIES, - ) - validate_applicant_type( - db_session, - forecast_hist_update2, - expected_applicant_type=ApplicantType.OTHER_NATIVE_AMERICAN_TRIBAL_ORGANIZATIONS, - ) - validate_applicant_type( - db_session, - syn_update1, - expected_applicant_type=ApplicantType.FOR_PROFIT_ORGANIZATIONS_OTHER_THAN_SMALL_BUSINESSES, - ) - validate_applicant_type( - db_session, syn_update2, expected_applicant_type=ApplicantType.SMALL_BUSINESSES - ) - validate_applicant_type( - db_session, - syn_hist_update1, - expected_applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITH_501C3, - ) - validate_applicant_type( - db_session, - syn_hist_update2, - expected_applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITHOUT_501C3, - ) - - validate_applicant_type(db_session, forecast_delete1, expect_in_db=False) - validate_applicant_type(db_session, forecast_delete2, expect_in_db=False) - validate_applicant_type(db_session, forecast_hist_delete1, expect_in_db=False) - validate_applicant_type(db_session, syn_delete1, expect_in_db=False) - validate_applicant_type(db_session, syn_delete2, expect_in_db=False) - validate_applicant_type(db_session, syn_hist_delete1, expect_in_db=False) - validate_applicant_type(db_session, syn_hist_delete2, expect_in_db=False) - - validate_applicant_type( - db_session, - forecast_update_already_processed, - expected_applicant_type=ApplicantType.PUBLIC_AND_STATE_INSTITUTIONS_OF_HIGHER_EDUCATION, - expect_values_to_match=False, - ) - validate_applicant_type( - db_session, forecast_hist_delete_already_processed, expect_in_db=False - ) - validate_applicant_type( - db_session, - syn_update_already_processed, - expected_applicant_type=ApplicantType.PUBLIC_AND_INDIAN_HOUSING_AUTHORITIES, - expect_values_to_match=False, - ) - - validate_applicant_type( - db_session, syn_delete_but_current_missing, expect_in_db=False, was_processed=True - ) - validate_applicant_type( - db_session, syn_hist_insert_invalid_type, expect_in_db=False, was_processed=False - ) - - validate_applicant_type( - db_session, forecast_hist_duplicate_insert, expect_in_db=False, was_processed=True - ) - - metrics = transform_oracle_data_task.metrics - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 23 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 7 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 5 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 8 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 1 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED] == 1 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - # Rerunning will only attempt to re-process the errors, so total+errors goes up by 1 - transform_oracle_data_task.process_link_applicant_types() - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 24 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 7 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 5 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 8 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED] == 1 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - @pytest.mark.parametrize( - "is_forecast,revision_number", [(True, None), (False, None), (True, 5), (False, 10)] - ) - def test_process_applicant_types_but_current_missing( - self, db_session, transform_oracle_data_task, is_forecast, revision_number - ): - opportunity_summary = f.OpportunitySummaryFactory.create( - is_forecast=is_forecast, revision_number=revision_number, no_link_values=True - ) - delete_but_current_missing = setup_applicant_type( - create_existing=False, - opportunity_summary=opportunity_summary, - legacy_lookup_value="00", - is_delete=True, - ) - - transform_oracle_data_task.process_link_applicant_type( - delete_but_current_missing, None, opportunity_summary - ) - - validate_applicant_type(db_session, delete_but_current_missing, expect_in_db=False) - assert delete_but_current_missing.transformed_at is not None - assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" - - @pytest.mark.parametrize( - "is_forecast,revision_number,legacy_lookup_value", - [(True, None, "90"), (False, None, "xx"), (True, 5, "50"), (False, 10, "1")], - ) - def test_process_applicant_types_but_invalid_lookup_value( - self, - db_session, - transform_oracle_data_task, - is_forecast, - revision_number, - legacy_lookup_value, - ): - opportunity_summary = f.OpportunitySummaryFactory.create( - is_forecast=is_forecast, revision_number=revision_number, no_link_values=True - ) - insert_but_invalid_value = setup_applicant_type( - create_existing=False, - opportunity_summary=opportunity_summary, - legacy_lookup_value=legacy_lookup_value, - ) - - with pytest.raises(ValueError, match="Unrecognized applicant type"): - transform_oracle_data_task.process_link_applicant_type( - insert_but_invalid_value, None, opportunity_summary - ) - - @pytest.mark.parametrize( - "factory_cls", - [f.StagingTapplicanttypesForecastFactory, f.StagingTapplicanttypesSynopsisFactory], - ) - def test_process_applicant_type_but_no_opportunity_summary_non_hist( - self, - db_session, - transform_oracle_data_task, - factory_cls, - ): - source_record = factory_cls.create(orphaned_record=True) - - with pytest.raises( - ValueError, - match="Applicant type record cannot be processed as the opportunity summary for it does not exist", - ): - transform_oracle_data_task.process_link_applicant_type(source_record, None, None) - - @pytest.mark.parametrize( - "factory_cls", - [f.StagingTapplicanttypesForecastHistFactory, f.StagingTapplicanttypesSynopsisHistFactory], - ) - def test_process_applicant_type_but_no_opportunity_summary_hist( - self, - db_session, - transform_oracle_data_task, - factory_cls, - ): - source_record = factory_cls.create(orphaned_record=True, revision_number=12) - transform_oracle_data_task.process_link_applicant_type(source_record, None, None) - assert source_record.transformed_at is not None - assert source_record.transformation_notes == "orphaned_historical_record" - - -class TestTransformFundingInstrument(BaseTestClass): - @pytest.fixture() - def transform_oracle_data_task( - self, db_session, enable_factory_create, truncate_opportunities - ) -> TransformOracleDataTask: - return TransformOracleDataTask(db_session) - - def test_process_funding_instruments(self, db_session, transform_oracle_data_task): - opportunity_summary_forecast = f.OpportunitySummaryFactory.create( - is_forecast=True, revision_number=None, no_link_values=True - ) - forecast_insert1 = setup_funding_instrument( - create_existing=False, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="CA", - ) - forecast_update1 = setup_funding_instrument( - create_existing=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="G", - funding_instrument=FundingInstrument.GRANT, - ) - forecast_delete1 = setup_funding_instrument( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="PC", - funding_instrument=FundingInstrument.PROCUREMENT_CONTRACT, - ) - forecast_update_already_processed = setup_funding_instrument( - create_existing=True, - is_already_processed=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="O", - funding_instrument=FundingInstrument.OTHER, - ) - - opportunity_summary_forecast_hist = f.OpportunitySummaryFactory.create( - is_forecast=True, revision_number=3, no_link_values=True - ) - forecast_hist_insert1 = setup_funding_instrument( - create_existing=False, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="G", - ) - forecast_hist_delete1 = setup_funding_instrument( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="CA", - funding_instrument=FundingInstrument.COOPERATIVE_AGREEMENT, - ) - forecast_hist_delete_already_processed = setup_funding_instrument( - create_existing=False, - is_delete=True, - is_already_processed=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="O", - ) - syn_delete_but_current_missing = setup_funding_instrument( - create_existing=False, - is_delete=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="PC", - ) - - opportunity_summary_syn = f.OpportunitySummaryFactory.create( - is_forecast=False, revision_number=None, no_link_values=True - ) - syn_insert1 = setup_funding_instrument( - create_existing=False, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="O", - ) - syn_insert2 = setup_funding_instrument( - create_existing=False, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="G", - ) - syn_delete1 = setup_funding_instrument( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="CA", - funding_instrument=FundingInstrument.COOPERATIVE_AGREEMENT, - ) - syn_update_already_processed = setup_funding_instrument( - create_existing=True, - is_already_processed=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="PC", - funding_instrument=FundingInstrument.PROCUREMENT_CONTRACT, - ) - - opportunity_summary_syn_hist = f.OpportunitySummaryFactory.create( - is_forecast=False, revision_number=21, no_link_values=True - ) - syn_hist_insert1 = setup_funding_instrument( - create_existing=False, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="CA", - ) - syn_hist_update1 = setup_funding_instrument( - create_existing=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="O", - funding_instrument=FundingInstrument.OTHER, - ) - syn_hist_delete1 = setup_funding_instrument( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="PC", - funding_instrument=FundingInstrument.PROCUREMENT_CONTRACT, - ) - syn_hist_delete2 = setup_funding_instrument( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="G", - funding_instrument=FundingInstrument.GRANT, - ) - syn_hist_insert_invalid_type = setup_funding_instrument( - create_existing=False, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="X", - ) - - transform_oracle_data_task.process_link_funding_instruments() - - validate_funding_instrument( - db_session, - forecast_insert1, - expected_funding_instrument=FundingInstrument.COOPERATIVE_AGREEMENT, - ) - validate_funding_instrument( - db_session, forecast_hist_insert1, expected_funding_instrument=FundingInstrument.GRANT - ) - validate_funding_instrument( - db_session, syn_insert1, expected_funding_instrument=FundingInstrument.OTHER - ) - validate_funding_instrument( - db_session, syn_insert2, expected_funding_instrument=FundingInstrument.GRANT - ) - validate_funding_instrument( - db_session, - syn_hist_insert1, - expected_funding_instrument=FundingInstrument.COOPERATIVE_AGREEMENT, - ) - - validate_funding_instrument( - db_session, forecast_update1, expected_funding_instrument=FundingInstrument.GRANT - ) - validate_funding_instrument( - db_session, syn_hist_update1, expected_funding_instrument=FundingInstrument.OTHER - ) - - validate_funding_instrument(db_session, forecast_delete1, expect_in_db=False) - validate_funding_instrument(db_session, forecast_hist_delete1, expect_in_db=False) - validate_funding_instrument(db_session, syn_delete1, expect_in_db=False) - validate_funding_instrument(db_session, syn_hist_delete1, expect_in_db=False) - validate_funding_instrument(db_session, syn_hist_delete2, expect_in_db=False) - - validate_funding_instrument( - db_session, - forecast_update_already_processed, - expected_funding_instrument=FundingInstrument.OTHER, - expect_values_to_match=False, - ) - validate_funding_instrument( - db_session, forecast_hist_delete_already_processed, expect_in_db=False - ) - validate_funding_instrument( - db_session, - syn_update_already_processed, - expected_funding_instrument=FundingInstrument.PROCUREMENT_CONTRACT, - expect_values_to_match=False, - ) - - validate_funding_instrument( - db_session, syn_delete_but_current_missing, expect_in_db=False, was_processed=True - ) - validate_funding_instrument( - db_session, syn_hist_insert_invalid_type, expect_in_db=False, was_processed=False - ) - - metrics = transform_oracle_data_task.metrics - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 14 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 5 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 5 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 1 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - # Rerunning will only attempt to re-process the errors, so total+errors goes up by 2 - transform_oracle_data_task.process_link_funding_instruments() - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 15 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 5 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 5 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - @pytest.mark.parametrize( - "is_forecast,revision_number", [(True, None), (False, None), (True, 1), (False, 4)] - ) - def test_process_funding_instrument_but_current_missing( - self, db_session, transform_oracle_data_task, is_forecast, revision_number - ): - opportunity_summary = f.OpportunitySummaryFactory.create( - is_forecast=is_forecast, revision_number=revision_number, no_link_values=True - ) - delete_but_current_missing = setup_funding_instrument( - create_existing=False, - opportunity_summary=opportunity_summary, - legacy_lookup_value="G", - is_delete=True, - ) - - transform_oracle_data_task.process_link_funding_instrument( - delete_but_current_missing, None, opportunity_summary - ) - - validate_funding_instrument(db_session, delete_but_current_missing, expect_in_db=False) - assert delete_but_current_missing.transformed_at is not None - assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" - - @pytest.mark.parametrize( - "is_forecast,revision_number,legacy_lookup_value", - [(True, None, "X"), (False, None, "4"), (True, 5, "Y"), (False, 10, "A")], - ) - def test_process_funding_instrument_but_invalid_lookup_value( - self, - db_session, - transform_oracle_data_task, - is_forecast, - revision_number, - legacy_lookup_value, - ): - opportunity_summary = f.OpportunitySummaryFactory.create( - is_forecast=is_forecast, revision_number=revision_number, no_link_values=True - ) - insert_but_invalid_value = setup_funding_instrument( - create_existing=False, - opportunity_summary=opportunity_summary, - legacy_lookup_value=legacy_lookup_value, - ) - - with pytest.raises(ValueError, match="Unrecognized funding instrument"): - transform_oracle_data_task.process_link_funding_instrument( - insert_but_invalid_value, None, opportunity_summary - ) - - @pytest.mark.parametrize( - "factory_cls", [f.StagingTfundinstrForecastFactory, f.StagingTfundinstrSynopsisFactory] - ) - def test_process_funding_instrument_but_no_opportunity_summary_non_hist( - self, - db_session, - transform_oracle_data_task, - factory_cls, - ): - source_record = factory_cls.create(orphaned_record=True) - - with pytest.raises( - ValueError, - match="Funding instrument record cannot be processed as the opportunity summary for it does not exist", - ): - transform_oracle_data_task.process_link_funding_instrument(source_record, None, None) - - @pytest.mark.parametrize( - "factory_cls", - [f.StagingTfundinstrForecastHistFactory, f.StagingTfundinstrSynopsisHistFactory], - ) - def test_process_funding_instrument_but_no_opportunity_summary_hist( - self, - db_session, - transform_oracle_data_task, - factory_cls, - ): - source_record = factory_cls.create(orphaned_record=True, revision_number=12) - transform_oracle_data_task.process_link_funding_instrument(source_record, None, None) - assert source_record.transformed_at is not None - assert source_record.transformation_notes == "orphaned_historical_record" - - -class TestTransformFundingCategory(BaseTestClass): - @pytest.fixture() - def transform_oracle_data_task( - self, db_session, enable_factory_create, truncate_opportunities - ) -> TransformOracleDataTask: - return TransformOracleDataTask(db_session) - - def test_process_funding_categories(self, db_session, transform_oracle_data_task): - opportunity_summary_forecast = f.OpportunitySummaryFactory.create( - is_forecast=True, revision_number=None, no_link_values=True - ) - forecast_insert1 = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="RA", - ) - forecast_insert2 = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="AG", - ) - forecast_update1 = setup_funding_category( - create_existing=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="AR", - funding_category=FundingCategory.ARTS, - ) - forecast_delete1 = setup_funding_category( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="BC", - funding_category=FundingCategory.BUSINESS_AND_COMMERCE, - ) - forecast_delete2 = setup_funding_category( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="CD", - funding_category=FundingCategory.COMMUNITY_DEVELOPMENT, - ) - forecast_update_already_processed = setup_funding_category( - create_existing=True, - is_already_processed=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="CP", - funding_category=FundingCategory.CONSUMER_PROTECTION, - ) - - opportunity_summary_forecast_hist = f.OpportunitySummaryFactory.create( - is_forecast=True, revision_number=3, no_link_values=True - ) - forecast_hist_insert1 = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="DPR", - ) - forecast_hist_insert2 = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="ED", - ) - forecast_hist_update1 = setup_funding_category( - create_existing=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="ELT", - funding_category=FundingCategory.EMPLOYMENT_LABOR_AND_TRAINING, - ) - forecast_hist_delete1 = setup_funding_category( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="EN", - funding_category=FundingCategory.ENERGY, - ) - forecast_hist_delete_already_processed = setup_funding_category( - create_existing=False, - is_delete=True, - is_already_processed=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="ENV", - ) - - opportunity_summary_syn = f.OpportunitySummaryFactory.create( - is_forecast=False, revision_number=None, no_link_values=True - ) - syn_insert1 = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="FN", - ) - syn_insert2 = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="HL", - ) - syn_update1 = setup_funding_category( - create_existing=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="HO", - funding_category=FundingCategory.HOUSING, - ) - syn_delete1 = setup_funding_category( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="HU", - funding_category=FundingCategory.HUMANITIES, - ) - syn_delete2 = setup_funding_category( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="IIJ", - funding_category=FundingCategory.INFRASTRUCTURE_INVESTMENT_AND_JOBS_ACT, - ) - syn_delete_but_current_missing = setup_funding_category( - create_existing=False, - is_delete=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="IS", - ) - syn_update_already_processed = setup_funding_category( - create_existing=True, - is_already_processed=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="ISS", - funding_category=FundingCategory.INCOME_SECURITY_AND_SOCIAL_SERVICES, - ) - - opportunity_summary_syn_hist = f.OpportunitySummaryFactory.create( - is_forecast=False, revision_number=21, no_link_values=True - ) - syn_hist_insert1 = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="LJL", - ) - syn_hist_insert2 = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="NR", - ) - syn_hist_insert3 = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="OZ", - ) - syn_hist_update1 = setup_funding_category( - create_existing=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="RD", - funding_category=FundingCategory.REGIONAL_DEVELOPMENT, - ) - - syn_hist_delete1 = setup_funding_category( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="ST", - funding_category=FundingCategory.SCIENCE_TECHNOLOGY_AND_OTHER_RESEARCH_AND_DEVELOPMENT, - ) - syn_hist_delete2 = setup_funding_category( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="T", - funding_category=FundingCategory.TRANSPORTATION, - ) - syn_hist_insert_invalid_type = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="XYZ", - funding_category=FundingCategory.HEALTH, - ) - - transform_oracle_data_task.process_link_funding_categories() - - validate_funding_category( - db_session, forecast_insert1, expected_funding_category=FundingCategory.RECOVERY_ACT - ) - validate_funding_category( - db_session, forecast_insert2, expected_funding_category=FundingCategory.AGRICULTURE - ) - validate_funding_category( - db_session, - forecast_hist_insert1, - expected_funding_category=FundingCategory.DISASTER_PREVENTION_AND_RELIEF, - ) - validate_funding_category( - db_session, forecast_hist_insert2, expected_funding_category=FundingCategory.EDUCATION - ) - validate_funding_category( - db_session, syn_insert1, expected_funding_category=FundingCategory.FOOD_AND_NUTRITION - ) - validate_funding_category( - db_session, syn_insert2, expected_funding_category=FundingCategory.HEALTH - ) - validate_funding_category( - db_session, - syn_hist_insert1, - expected_funding_category=FundingCategory.LAW_JUSTICE_AND_LEGAL_SERVICES, - ) - validate_funding_category( - db_session, - syn_hist_insert2, - expected_funding_category=FundingCategory.NATURAL_RESOURCES, - ) - validate_funding_category( - db_session, - syn_hist_insert3, - expected_funding_category=FundingCategory.OPPORTUNITY_ZONE_BENEFITS, - ) - - validate_funding_category( - db_session, forecast_update1, expected_funding_category=FundingCategory.ARTS - ) - validate_funding_category( - db_session, - forecast_hist_update1, - expected_funding_category=FundingCategory.EMPLOYMENT_LABOR_AND_TRAINING, - ) - validate_funding_category( - db_session, syn_update1, expected_funding_category=FundingCategory.HOUSING - ) - validate_funding_category( - db_session, - syn_hist_update1, - expected_funding_category=FundingCategory.REGIONAL_DEVELOPMENT, - ) - - validate_funding_category(db_session, forecast_delete1, expect_in_db=False) - validate_funding_category(db_session, forecast_delete2, expect_in_db=False) - validate_funding_category(db_session, forecast_hist_delete1, expect_in_db=False) - validate_funding_category(db_session, syn_delete1, expect_in_db=False) - validate_funding_category(db_session, syn_delete2, expect_in_db=False) - validate_funding_category(db_session, syn_hist_delete1, expect_in_db=False) - validate_funding_category(db_session, syn_hist_delete2, expect_in_db=False) - - validate_funding_category( - db_session, - forecast_update_already_processed, - expected_funding_category=FundingCategory.CONSUMER_PROTECTION, - expect_values_to_match=False, - ) - validate_funding_category( - db_session, forecast_hist_delete_already_processed, expect_in_db=False - ) - validate_funding_category( - db_session, - syn_update_already_processed, - expected_funding_category=FundingCategory.INCOME_SECURITY_AND_SOCIAL_SERVICES, - expect_values_to_match=False, - ) - - validate_funding_category( - db_session, syn_delete_but_current_missing, expect_in_db=False, was_processed=True - ) - validate_funding_category( - db_session, syn_hist_insert_invalid_type, expect_in_db=False, was_processed=False - ) - - metrics = transform_oracle_data_task.metrics - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 22 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 7 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 9 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 4 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 1 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - # Rerunning will only attempt to re-process the errors, so total+errors goes up by 1 - transform_oracle_data_task.process_link_funding_categories() - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 23 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 7 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 9 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 4 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - @pytest.mark.parametrize( - "is_forecast,revision_number", [(True, None), (False, None), (True, 1), (False, 70)] - ) - def test_process_funding_category_but_current_missing( - self, db_session, transform_oracle_data_task, is_forecast, revision_number - ): - opportunity_summary = f.OpportunitySummaryFactory.create( - is_forecast=is_forecast, revision_number=revision_number, no_link_values=True - ) - delete_but_current_missing = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary, - legacy_lookup_value="00", - is_delete=True, - ) - - transform_oracle_data_task.process_link_funding_category( - delete_but_current_missing, None, opportunity_summary - ) - - validate_funding_category(db_session, delete_but_current_missing, expect_in_db=False) - assert delete_but_current_missing.transformed_at is not None - assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" - - @pytest.mark.parametrize( - "is_forecast,revision_number,legacy_lookup_value", - [(True, None, "ab"), (False, None, "cd"), (True, 5, "ef"), (False, 10, "Ag")], - ) - def test_process_funding_category_but_invalid_lookup_value( - self, - db_session, - transform_oracle_data_task, - is_forecast, - revision_number, - legacy_lookup_value, - ): - opportunity_summary = f.OpportunitySummaryFactory.create( - is_forecast=is_forecast, revision_number=revision_number, no_link_values=True - ) - insert_but_invalid_value = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary, - legacy_lookup_value=legacy_lookup_value, - ) - - with pytest.raises(ValueError, match="Unrecognized funding category"): - transform_oracle_data_task.process_link_funding_category( - insert_but_invalid_value, None, opportunity_summary - ) - - @pytest.mark.parametrize( - "factory_cls", [f.StagingTfundactcatForecastFactory, f.StagingTfundactcatSynopsisFactory] - ) - def test_process_funding_category_but_no_opportunity_summary_non_hist( - self, - db_session, - transform_oracle_data_task, - factory_cls, - ): - source_record = factory_cls.create(orphaned_record=True) - - with pytest.raises( - ValueError, - match="Funding category record cannot be processed as the opportunity summary for it does not exist", - ): - transform_oracle_data_task.process_link_funding_category(source_record, None, None) - - @pytest.mark.parametrize( - "factory_cls", - [f.StagingTfundactcatForecastHistFactory, f.StagingTfundactcatSynopsisHistFactory], - ) - def test_process_funding_category_but_no_opportunity_summary_hist( - self, - db_session, - transform_oracle_data_task, - factory_cls, - ): - source_record = factory_cls.create(orphaned_record=True, revision_number=12) - transform_oracle_data_task.process_link_funding_category(source_record, None, None) - assert source_record.transformed_at is not None - assert source_record.transformation_notes == "orphaned_historical_record" +from tests.src.data_migration.transformation.conftest import ( + get_summary_from_source, + setup_cfda, + setup_opportunity, + setup_synopsis_forecast, + validate_applicant_type, + validate_assistance_listing, + validate_funding_category, + validate_funding_instrument, + validate_opportunity, + validate_opportunity_summary, + validate_summary_and_nested, +) class TestTransformFullRunTask(BaseTestClass): @@ -2686,7 +516,6 @@ def test_delete_opportunity_with_deleted_children(self, db_session, transform_or db_session.expire_all() transform_oracle_data_task.run_task() - print(transform_oracle_data_task.metrics) # verify everything is not in the DB validate_opportunity(db_session, opportunity, expect_in_db=False) diff --git a/api/tests/src/data_migration/transformation/test_transform_util.py b/api/tests/src/data_migration/transformation/test_transform_util.py index 05edcdf3c..52c117aef 100644 --- a/api/tests/src/data_migration/transformation/test_transform_util.py +++ b/api/tests/src/data_migration/transformation/test_transform_util.py @@ -93,7 +93,7 @@ def test_convert_yn_boolean_unexpected_value(value): @pytest.mark.parametrize( - "value,expected_value", [("D", True), ("U", False), ("", None), (None, None)] + "value,expected_value", [("D", True), ("U", False), ("", False), (None, False)] ) def test_convert_action_type_to_is_deleted(value, expected_value): assert transform_util.convert_action_type_to_is_deleted(value) == expected_value