diff --git a/api/src/data_migration/transformation/__init__.py b/api/src/data_migration/transformation/__init__.py index c3c7751b1..e69de29bb 100644 --- a/api/src/data_migration/transformation/__init__.py +++ b/api/src/data_migration/transformation/__init__.py @@ -1,39 +0,0 @@ -from typing import TypeAlias - -from src.db.models.staging.forecast import ( - TapplicanttypesForecast, - TapplicanttypesForecastHist, - Tforecast, - TforecastHist, - TfundactcatForecast, - TfundactcatForecastHist, - TfundinstrForecast, - TfundinstrForecastHist, -) -from src.db.models.staging.synopsis import ( - TapplicanttypesSynopsis, - TapplicanttypesSynopsisHist, - TfundactcatSynopsis, - TfundactcatSynopsisHist, - TfundinstrSynopsis, - TfundinstrSynopsisHist, - Tsynopsis, - TsynopsisHist, -) - -SourceSummary: TypeAlias = Tforecast | Tsynopsis | TforecastHist | TsynopsisHist - -SourceApplicantType: TypeAlias = ( - TapplicanttypesForecast - | TapplicanttypesForecastHist - | TapplicanttypesSynopsis - | TapplicanttypesSynopsisHist -) - -SourceFundingCategory: TypeAlias = ( - TfundactcatForecast | TfundactcatForecastHist | TfundactcatSynopsis | TfundactcatSynopsisHist -) - -SourceFundingInstrument: TypeAlias = ( - TfundinstrForecastHist | TfundinstrForecast | TfundinstrSynopsisHist | TfundinstrSynopsis -) diff --git a/api/src/data_migration/transformation/subtask/__init__.py b/api/src/data_migration/transformation/subtask/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/api/src/data_migration/transformation/subtask/abstract_transform_subtask.py b/api/src/data_migration/transformation/subtask/abstract_transform_subtask.py new file mode 100644 index 000000000..91c12bdee --- /dev/null +++ b/api/src/data_migration/transformation/subtask/abstract_transform_subtask.py @@ -0,0 +1,180 @@ +import abc +import logging +from datetime import datetime +from typing import Any, Sequence, Tuple, Type, cast + +from sqlalchemy import and_, select +from sqlalchemy.orm import selectinload + +import src.data_migration.transformation.transform_constants as transform_constants +from src.db.models.opportunity_models import Opportunity, OpportunitySummary +from src.task.subtask import SubTask +from src.task.task import Task + +logger = logging.getLogger(__name__) + + +class AbstractTransformSubTask(SubTask): + def __init__(self, task: Task): + super().__init__(task) + + # This is a bit of a hacky way of making sure the task passed into this method + # is the TransformOracleDataTask class. We could make this init function take in that + # type specifically, but we'd run into circular type dependencies which are complex to resolve + transform_time = getattr(task, "transform_time", None) + if transform_time is None: + raise Exception("Task passed into AbstractTransformSubTask must have a transform_time") + + self.transform_time: datetime = transform_time + + def run_subtask(self) -> None: + with self.db_session.begin(): + self.transform_records() + logger.info( + "Finished running transformations for %s - committing results", self.cls_name() + ) + + # As a safety net, expire all references in the session + # after running. This avoids any potential complexities in + # cached data between separate subtasks running. + # By default sessions actually do this when committing, but + # our db session creation logic disables it, so it's the ordinary behavior. + self.db_session.expire_all() + + @abc.abstractmethod + def transform_records(self) -> None: + pass + + def _handle_delete( + self, + source: transform_constants.S, + target: transform_constants.D | None, + record_type: str, + extra: dict, + error_on_missing_target: bool = False, + ) -> None: + # If the target we want to delete is None, we have nothing to delete + if target is None: + # In some scenarios we want to error when this happens + if error_on_missing_target: + raise ValueError("Cannot delete %s record as it does not exist" % record_type) + + # In a lot of scenarios, we actually just want to log a message as it is expected to happen + # For example, if we are deleting an opportunity_summary record, and already deleted the opportunity, + # then SQLAlchemy would have deleted the opportunity_summary for us already. When we later go to delete + # it, we'd hit this case, which isn't a problem. + logger.info("Cannot delete %s record as it does not exist", record_type, extra=extra) + source.transformation_notes = transform_constants.ORPHANED_DELETE_RECORD + self.increment( + transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED, prefix=record_type + ) + return + + logger.info("Deleting %s record", record_type, extra=extra) + self.increment(transform_constants.Metrics.TOTAL_RECORDS_DELETED, prefix=record_type) + self.db_session.delete(target) + + def _is_orphaned_historical( + self, + parent_record: Opportunity | OpportunitySummary | None, + source_record: transform_constants.SourceAny, + ) -> bool: + return parent_record is None and source_record.is_historical_table + + def _handle_orphaned_historical( + self, source_record: transform_constants.SourceAny, record_type: str, extra: dict + ) -> None: + logger.warning( + "Historical %s does not have a corresponding parent record - cannot import, but will mark as processed", + record_type, + extra=extra, + ) + self.increment( + transform_constants.Metrics.TOTAL_HISTORICAL_ORPHANS_SKIPPED, prefix=record_type + ) + source_record.transformation_notes = transform_constants.ORPHANED_HISTORICAL_RECORD + + def fetch( + self, + source_model: Type[transform_constants.S], + destination_model: Type[transform_constants.D], + join_clause: Sequence, + ) -> list[Tuple[transform_constants.S, transform_constants.D | None]]: + # The real type is: Sequence[Row[Tuple[S, D | None]]] + # but MyPy is weird about this and the Row+Tuple causes some + # confusion in the parsing so it ends up assuming everything is Any + # So just cast it to a simpler type that doesn't confuse anything + return cast( + list[Tuple[transform_constants.S, transform_constants.D | None]], + self.db_session.execute( + select(source_model, destination_model) + .join(destination_model, and_(*join_clause), isouter=True) + .where(source_model.transformed_at.is_(None)) + .execution_options(yield_per=5000) + ), + ) + + def fetch_with_opportunity( + self, + source_model: Type[transform_constants.S], + destination_model: Type[transform_constants.D], + join_clause: Sequence, + ) -> list[Tuple[transform_constants.S, transform_constants.D | None, Opportunity | None]]: + # Similar to the above fetch function, but also grabs an opportunity record + # Note that this requires your source_model to have an opportunity_id field defined. + + return cast( + list[Tuple[transform_constants.S, transform_constants.D | None, Opportunity | None]], + self.db_session.execute( + select(source_model, destination_model, Opportunity) + .join(destination_model, and_(*join_clause), isouter=True) + .join( + Opportunity, + source_model.opportunity_id == Opportunity.opportunity_id, # type: ignore[attr-defined] + isouter=True, + ) + .where(source_model.transformed_at.is_(None)) + .execution_options(yield_per=5000) + ), + ) + + def fetch_with_opportunity_summary( + self, + source_model: Type[transform_constants.S], + destination_model: Type[transform_constants.D], + join_clause: Sequence, + is_forecast: bool, + is_historical_table: bool, + relationship_load_value: Any, + ) -> list[ + Tuple[transform_constants.S, transform_constants.D | None, OpportunitySummary | None] + ]: + # setup the join clause for getting the opportunity summary + + opportunity_summary_join_clause = [ + source_model.opportunity_id == OpportunitySummary.opportunity_id, # type: ignore[attr-defined] + OpportunitySummary.is_forecast.is_(is_forecast), + ] + + if is_historical_table: + opportunity_summary_join_clause.append( + source_model.revision_number == OpportunitySummary.revision_number # type: ignore[attr-defined] + ) + else: + opportunity_summary_join_clause.append(OpportunitySummary.revision_number.is_(None)) + + return cast( + list[ + Tuple[ + transform_constants.S, transform_constants.D | None, OpportunitySummary | None + ] + ], + self.db_session.execute( + select(source_model, destination_model, OpportunitySummary) + .join(OpportunitySummary, and_(*opportunity_summary_join_clause), isouter=True) + .join(destination_model, and_(*join_clause), isouter=True) + .where(source_model.transformed_at.is_(None)) + .options(selectinload(relationship_load_value)) + .execution_options(yield_per=5000, populate_existing=True) + ), + ) diff --git a/api/src/data_migration/transformation/subtask/transform_applicant_type.py b/api/src/data_migration/transformation/subtask/transform_applicant_type.py new file mode 100644 index 000000000..fb9bb5802 --- /dev/null +++ b/api/src/data_migration/transformation/subtask/transform_applicant_type.py @@ -0,0 +1,189 @@ +import logging +from typing import Sequence, Tuple + +import src.data_migration.transformation.transform_constants as transform_constants +import src.data_migration.transformation.transform_util as transform_util +from src.data_migration.transformation.subtask.abstract_transform_subtask import ( + AbstractTransformSubTask, +) +from src.db.models.opportunity_models import LinkOpportunitySummaryApplicantType, OpportunitySummary +from src.db.models.staging.forecast import TapplicanttypesForecast, TapplicanttypesForecastHist +from src.db.models.staging.synopsis import TapplicanttypesSynopsis, TapplicanttypesSynopsisHist + +logger = logging.getLogger(__name__) + + +class TransformApplicantType(AbstractTransformSubTask): + def transform_records(self) -> None: + link_table = LinkOpportunitySummaryApplicantType + relationship_load_value = OpportunitySummary.link_applicant_types + + logger.info("Processing forecast applicant types") + forecast_applicant_type_records = self.fetch_with_opportunity_summary( + TapplicanttypesForecast, + link_table, + [ + TapplicanttypesForecast.at_frcst_id + == LinkOpportunitySummaryApplicantType.legacy_applicant_type_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryApplicantType.opportunity_summary_id, + ], + is_forecast=True, + is_historical_table=False, + relationship_load_value=relationship_load_value, + ) + self.process_link_applicant_types_group(forecast_applicant_type_records) + + logger.info("Processing historical forecast applicant types") + forecast_applicant_type_hist_records = self.fetch_with_opportunity_summary( + TapplicanttypesForecastHist, + link_table, + [ + TapplicanttypesForecastHist.at_frcst_id + == LinkOpportunitySummaryApplicantType.legacy_applicant_type_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryApplicantType.opportunity_summary_id, + ], + is_forecast=True, + is_historical_table=True, + relationship_load_value=relationship_load_value, + ) + self.process_link_applicant_types_group(forecast_applicant_type_hist_records) + + logger.info("Processing synopsis applicant types") + synopsis_applicant_type_records = self.fetch_with_opportunity_summary( + TapplicanttypesSynopsis, + link_table, + [ + TapplicanttypesSynopsis.at_syn_id + == LinkOpportunitySummaryApplicantType.legacy_applicant_type_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryApplicantType.opportunity_summary_id, + ], + is_forecast=False, + is_historical_table=False, + relationship_load_value=relationship_load_value, + ) + self.process_link_applicant_types_group(synopsis_applicant_type_records) + + logger.info("Processing historical synopsis applicant types") + synopsis_applicant_type_hist_records = self.fetch_with_opportunity_summary( + TapplicanttypesSynopsisHist, + link_table, + [ + TapplicanttypesSynopsisHist.at_syn_id + == LinkOpportunitySummaryApplicantType.legacy_applicant_type_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryApplicantType.opportunity_summary_id, + ], + is_forecast=False, + is_historical_table=True, + relationship_load_value=relationship_load_value, + ) + self.process_link_applicant_types_group(synopsis_applicant_type_hist_records) + + def process_link_applicant_types_group( + self, + records: Sequence[ + Tuple[ + transform_constants.SourceApplicantType, + LinkOpportunitySummaryApplicantType | None, + OpportunitySummary | None, + ] + ], + ) -> None: + for source_applicant_type, target_applicant_type, opportunity_summary in records: + try: + self.process_link_applicant_type( + source_applicant_type, target_applicant_type, opportunity_summary + ) + except ValueError: + self.increment( + transform_constants.Metrics.TOTAL_ERROR_COUNT, + prefix=transform_constants.APPLICANT_TYPE, + ) + logger.exception( + "Failed to process opportunity summary applicant type", + extra=transform_util.get_log_extra_applicant_type(source_applicant_type), + ) + + def process_link_applicant_type( + self, + source_applicant_type: transform_constants.SourceApplicantType, + target_applicant_type: LinkOpportunitySummaryApplicantType | None, + opportunity_summary: OpportunitySummary | None, + ) -> None: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_PROCESSED, + prefix=transform_constants.APPLICANT_TYPE, + ) + extra = transform_util.get_log_extra_applicant_type(source_applicant_type) + logger.info("Processing applicant type", extra=extra) + + if source_applicant_type.is_deleted: + self._handle_delete( + source_applicant_type, + target_applicant_type, + transform_constants.APPLICANT_TYPE, + extra, + ) + + # Historical records are linked to other historical records, however + # we don't import historical opportunity records, so if the opportunity + # was deleted, we won't have created the opportunity summary. Whenever we do + # support historical opportunities, we'll have these all marked with a + # flag that we can use to reprocess these. + elif self._is_orphaned_historical(opportunity_summary, source_applicant_type): + self._handle_orphaned_historical( + source_applicant_type, transform_constants.APPLICANT_TYPE, extra + ) + + elif opportunity_summary is None: + # This shouldn't be possible as the incoming data has foreign keys, but as a safety net + # we'll make sure the opportunity actually exists + raise ValueError( + "Applicant type record cannot be processed as the opportunity summary for it does not exist" + ) + else: + # To avoid incrementing metrics for records we fail to transform, record + # here whether it's an insert/update and we'll increment after transforming + is_insert = target_applicant_type is None + + logger.info("Transforming and upserting applicant type", extra=extra) + transformed_applicant_type = transform_util.convert_opportunity_summary_applicant_type( + source_applicant_type, target_applicant_type, opportunity_summary + ) + + # Before we insert, we have to still be certain we're not adding a duplicate record + # because the primary key of the legacy tables is the legacy ID + lookup value + opportunity ID + # its possible for the same lookup value to appear multiple times because the legacy ID is different + # This would hit a conflict in our DBs primary key, so we need to verify that won't happen + if ( + is_insert + and transformed_applicant_type.applicant_type in opportunity_summary.applicant_types + ): + self.increment( + transform_constants.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED, + prefix=transform_constants.APPLICANT_TYPE, + ) + logger.warning( + "Skipping applicant type record", + extra=extra | {"applicant_type": transformed_applicant_type.applicant_type}, + ) + elif is_insert: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_INSERTED, + prefix=transform_constants.APPLICANT_TYPE, + ) + # We append to the relationship so SQLAlchemy immediately attaches it to its cached + # opportunity summary object so that the above check works when we receive dupes in the same batch + opportunity_summary.link_applicant_types.append(transformed_applicant_type) + else: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_UPDATED, + prefix=transform_constants.APPLICANT_TYPE, + ) + self.db_session.merge(transformed_applicant_type) + + logger.info("Processed applicant type", extra=extra) + source_applicant_type.transformed_at = self.transform_time diff --git a/api/src/data_migration/transformation/subtask/transform_assistance_listing.py b/api/src/data_migration/transformation/subtask/transform_assistance_listing.py new file mode 100644 index 000000000..c429c5146 --- /dev/null +++ b/api/src/data_migration/transformation/subtask/transform_assistance_listing.py @@ -0,0 +1,113 @@ +import logging +from typing import Tuple + +import src.data_migration.transformation.transform_constants as transform_constants +import src.data_migration.transformation.transform_util as transform_util +from src.data_migration.transformation.subtask.abstract_transform_subtask import ( + AbstractTransformSubTask, +) +from src.db.models.opportunity_models import Opportunity, OpportunityAssistanceListing +from src.db.models.staging.opportunity import TopportunityCfda + +logger = logging.getLogger(__name__) + + +class TransformAssistanceListing(AbstractTransformSubTask): + def transform_records(self) -> None: + assistance_listings: list[ + Tuple[TopportunityCfda, OpportunityAssistanceListing | None, Opportunity | None] + ] = self.fetch_with_opportunity( + TopportunityCfda, + OpportunityAssistanceListing, + [ + TopportunityCfda.opp_cfda_id + == OpportunityAssistanceListing.opportunity_assistance_listing_id + ], + ) + + for ( + source_assistance_listing, + target_assistance_listing, + opportunity, + ) in assistance_listings: + try: + self.process_assistance_listing( + source_assistance_listing, target_assistance_listing, opportunity + ) + except ValueError: + self.increment( + transform_constants.Metrics.TOTAL_ERROR_COUNT, + prefix=transform_constants.ASSISTANCE_LISTING, + ) + logger.exception( + "Failed to process assistance listing", + extra={ + "opportunity_assistance_listing_id": source_assistance_listing.opp_cfda_id + }, + ) + + def process_assistance_listing( + self, + source_assistance_listing: TopportunityCfda, + target_assistance_listing: OpportunityAssistanceListing | None, + opportunity: Opportunity | None, + ) -> None: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_PROCESSED, + prefix=transform_constants.ASSISTANCE_LISTING, + ) + extra = { + "opportunity_assistance_listing_id": source_assistance_listing.opp_cfda_id, + "opportunity_id": source_assistance_listing.opportunity_id, + } + logger.info("Processing assistance listing", extra=extra) + + if source_assistance_listing.is_deleted: + self._handle_delete( + source_assistance_listing, + target_assistance_listing, + transform_constants.ASSISTANCE_LISTING, + extra, + ) + + elif opportunity is None: + # The Oracle system we're importing these from does not have a foreign key between + # the opportunity ID in the TOPPORTUNITY_CFDA table and the TOPPORTUNITY table. + # There are many (2306 as of writing) orphaned CFDA records, created between 2007 and 2011 + # We don't want to continuously process these, so won't error for these, and will just + # mark them as transformed below. + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_ORPHANED, + prefix=transform_constants.ASSISTANCE_LISTING, + ) + logger.info( + "Assistance listing is orphaned and does not connect to any opportunity", + extra=extra, + ) + source_assistance_listing.transformation_notes = transform_constants.ORPHANED_CFDA + + else: + # To avoid incrementing metrics for records we fail to transform, record + # here whether it's an insert/update and we'll increment after transforming + is_insert = target_assistance_listing is None + + logger.info("Transforming and upserting assistance listing", extra=extra) + transformed_assistance_listing = transform_util.transform_assistance_listing( + source_assistance_listing, target_assistance_listing + ) + + if is_insert: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_INSERTED, + prefix=transform_constants.ASSISTANCE_LISTING, + ) + self.db_session.add(transformed_assistance_listing) + else: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_UPDATED, + prefix=transform_constants.ASSISTANCE_LISTING, + ) + self.db_session.merge(transformed_assistance_listing) + + logger.info("Processed assistance listing", extra=extra) + source_assistance_listing.transformed_at = self.transform_time diff --git a/api/src/data_migration/transformation/subtask/transform_funding_category.py b/api/src/data_migration/transformation/subtask/transform_funding_category.py new file mode 100644 index 000000000..70461cea2 --- /dev/null +++ b/api/src/data_migration/transformation/subtask/transform_funding_category.py @@ -0,0 +1,196 @@ +import logging +from typing import Sequence, Tuple + +import src.data_migration.transformation.transform_constants as transform_constants +import src.data_migration.transformation.transform_util as transform_util +from src.data_migration.transformation.subtask.abstract_transform_subtask import ( + AbstractTransformSubTask, +) +from src.db.models.opportunity_models import ( + LinkOpportunitySummaryFundingCategory, + OpportunitySummary, +) +from src.db.models.staging.forecast import TfundactcatForecast, TfundactcatForecastHist +from src.db.models.staging.synopsis import TfundactcatSynopsis, TfundactcatSynopsisHist + +logger = logging.getLogger(__name__) + + +class TransformFundingCategory(AbstractTransformSubTask): + def transform_records(self) -> None: + link_table = LinkOpportunitySummaryFundingCategory + relationship_load_value = OpportunitySummary.link_funding_categories + + logger.info("Processing forecast funding categories") + forecast_funding_category_records = self.fetch_with_opportunity_summary( + TfundactcatForecast, + link_table, + [ + TfundactcatForecast.fac_frcst_id + == LinkOpportunitySummaryFundingCategory.legacy_funding_category_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryFundingCategory.opportunity_summary_id, + ], + is_forecast=True, + is_historical_table=False, + relationship_load_value=relationship_load_value, + ) + self.process_link_funding_categories_group(forecast_funding_category_records) + + logger.info("Processing historical forecast funding categories") + forecast_funding_category_hist_records = self.fetch_with_opportunity_summary( + TfundactcatForecastHist, + link_table, + [ + TfundactcatForecastHist.fac_frcst_id + == LinkOpportunitySummaryFundingCategory.legacy_funding_category_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryFundingCategory.opportunity_summary_id, + ], + is_forecast=True, + is_historical_table=True, + relationship_load_value=relationship_load_value, + ) + self.process_link_funding_categories_group(forecast_funding_category_hist_records) + + logger.info("Processing synopsis funding categories") + synopsis_funding_category_records = self.fetch_with_opportunity_summary( + TfundactcatSynopsis, + link_table, + [ + TfundactcatSynopsis.fac_syn_id + == LinkOpportunitySummaryFundingCategory.legacy_funding_category_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryFundingCategory.opportunity_summary_id, + ], + is_forecast=False, + is_historical_table=False, + relationship_load_value=relationship_load_value, + ) + self.process_link_funding_categories_group(synopsis_funding_category_records) + + logger.info("Processing historical synopsis funding categories") + synopsis_funding_category_hist_records = self.fetch_with_opportunity_summary( + TfundactcatSynopsisHist, + link_table, + [ + TfundactcatSynopsisHist.fac_syn_id + == LinkOpportunitySummaryFundingCategory.legacy_funding_category_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryFundingCategory.opportunity_summary_id, + ], + is_forecast=False, + is_historical_table=True, + relationship_load_value=relationship_load_value, + ) + self.process_link_funding_categories_group(synopsis_funding_category_hist_records) + + def process_link_funding_categories_group( + self, + records: Sequence[ + Tuple[ + transform_constants.SourceFundingCategory, + LinkOpportunitySummaryFundingCategory | None, + OpportunitySummary | None, + ] + ], + ) -> None: + for source_funding_category, target_funding_category, opportunity_summary in records: + try: + self.process_link_funding_category( + source_funding_category, target_funding_category, opportunity_summary + ) + except ValueError: + self.increment( + transform_constants.Metrics.TOTAL_ERROR_COUNT, + prefix=transform_constants.FUNDING_CATEGORY, + ) + logger.exception( + "Failed to process opportunity summary funding category", + extra=transform_util.get_log_extra_funding_category(source_funding_category), + ) + + def process_link_funding_category( + self, + source_funding_category: transform_constants.SourceFundingCategory, + target_funding_category: LinkOpportunitySummaryFundingCategory | None, + opportunity_summary: OpportunitySummary | None, + ) -> None: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_PROCESSED, + prefix=transform_constants.FUNDING_CATEGORY, + ) + extra = transform_util.get_log_extra_funding_category(source_funding_category) + logger.info("Processing funding category", extra=extra) + + if source_funding_category.is_deleted: + self._handle_delete( + source_funding_category, + target_funding_category, + transform_constants.FUNDING_CATEGORY, + extra, + ) + + # Historical records are linked to other historical records, however + # we don't import historical opportunity records, so if the opportunity + # was deleted, we won't have created the opportunity summary. Whenever we do + # support historical opportunities, we'll have these all marked with a + # flag that we can use to reprocess these. + elif self._is_orphaned_historical(opportunity_summary, source_funding_category): + self._handle_orphaned_historical( + source_funding_category, transform_constants.FUNDING_CATEGORY, extra + ) + + elif opportunity_summary is None: + # This shouldn't be possible as the incoming data has foreign keys, but as a safety net + # we'll make sure the opportunity actually exists + raise ValueError( + "Funding category record cannot be processed as the opportunity summary for it does not exist" + ) + else: + # To avoid incrementing metrics for records we fail to transform, record + # here whether it's an insert/update and we'll increment after transforming + is_insert = target_funding_category is None + + logger.info("Transforming and upserting funding category", extra=extra) + transformed_funding_category = ( + transform_util.convert_opportunity_summary_funding_category( + source_funding_category, target_funding_category, opportunity_summary + ) + ) + + # Before we insert, we have to still be certain we're not adding a duplicate record + # because the primary key of the legacy tables is the legacy ID + lookup value + opportunity ID + # its possible for the same lookup value to appear multiple times because the legacy ID is different + # This would hit a conflict in our DBs primary key, so we need to verify that won't happen + if ( + is_insert + and transformed_funding_category.funding_category + in opportunity_summary.funding_categories + ): + self.increment( + transform_constants.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED, + prefix=transform_constants.FUNDING_CATEGORY, + ) + logger.warning( + "Skipping funding category record", + extra=extra + | {"funding_category": transformed_funding_category.funding_category}, + ) + elif is_insert: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_INSERTED, + prefix=transform_constants.FUNDING_CATEGORY, + ) + # We append to the relationship so SQLAlchemy immediately attaches it to its cached + # opportunity summary object so that the above check works when we receive dupes in the same batch + opportunity_summary.link_funding_categories.append(transformed_funding_category) + else: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_UPDATED, + prefix=transform_constants.FUNDING_CATEGORY, + ) + self.db_session.merge(transformed_funding_category) + + logger.info("Processed funding category", extra=extra) + source_funding_category.transformed_at = self.transform_time diff --git a/api/src/data_migration/transformation/subtask/transform_funding_instrument.py b/api/src/data_migration/transformation/subtask/transform_funding_instrument.py new file mode 100644 index 000000000..9c15161e1 --- /dev/null +++ b/api/src/data_migration/transformation/subtask/transform_funding_instrument.py @@ -0,0 +1,199 @@ +import logging +from typing import Sequence, Tuple + +import src.data_migration.transformation.transform_constants as transform_constants +import src.data_migration.transformation.transform_util as transform_util +from src.data_migration.transformation.subtask.abstract_transform_subtask import ( + AbstractTransformSubTask, +) +from src.db.models.opportunity_models import ( + LinkOpportunitySummaryFundingInstrument, + OpportunitySummary, +) +from src.db.models.staging.forecast import TfundinstrForecast, TfundinstrForecastHist +from src.db.models.staging.synopsis import TfundinstrSynopsis, TfundinstrSynopsisHist + +logger = logging.getLogger(__name__) + + +class TransformFundingInstrument(AbstractTransformSubTask): + def transform_records(self) -> None: + link_table = LinkOpportunitySummaryFundingInstrument + relationship_load_value = OpportunitySummary.link_funding_instruments + + logger.info("Processing forecast funding instruments") + forecast_funding_instrument_records = self.fetch_with_opportunity_summary( + TfundinstrForecast, + link_table, + [ + TfundinstrForecast.fi_frcst_id + == LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryFundingInstrument.opportunity_summary_id, + ], + is_forecast=True, + is_historical_table=False, + relationship_load_value=relationship_load_value, + ) + self.process_link_funding_instruments_group(forecast_funding_instrument_records) + + logger.info("Processing historical forecast funding instruments") + forecast_funding_instrument_hist_records = self.fetch_with_opportunity_summary( + TfundinstrForecastHist, + link_table, + [ + TfundinstrForecastHist.fi_frcst_id + == LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryFundingInstrument.opportunity_summary_id, + ], + is_forecast=True, + is_historical_table=True, + relationship_load_value=relationship_load_value, + ) + self.process_link_funding_instruments_group(forecast_funding_instrument_hist_records) + + logger.info("Processing synopsis funding instruments") + synopsis_funding_instrument_records = self.fetch_with_opportunity_summary( + TfundinstrSynopsis, + link_table, + [ + TfundinstrSynopsis.fi_syn_id + == LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryFundingInstrument.opportunity_summary_id, + ], + is_forecast=False, + is_historical_table=False, + relationship_load_value=relationship_load_value, + ) + self.process_link_funding_instruments_group(synopsis_funding_instrument_records) + + logger.info("Processing historical synopsis funding instruments") + synopsis_funding_instrument_hist_records = self.fetch_with_opportunity_summary( + TfundinstrSynopsisHist, + link_table, + [ + TfundinstrSynopsisHist.fi_syn_id + == LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id, + OpportunitySummary.opportunity_summary_id + == LinkOpportunitySummaryFundingInstrument.opportunity_summary_id, + ], + is_forecast=False, + is_historical_table=True, + relationship_load_value=relationship_load_value, + ) + self.process_link_funding_instruments_group(synopsis_funding_instrument_hist_records) + + def process_link_funding_instruments_group( + self, + records: Sequence[ + Tuple[ + transform_constants.SourceFundingInstrument, + LinkOpportunitySummaryFundingInstrument | None, + OpportunitySummary | None, + ] + ], + ) -> None: + for source_funding_instrument, target_funding_instrument, opportunity_summary in records: + try: + self.process_link_funding_instrument( + source_funding_instrument, target_funding_instrument, opportunity_summary + ) + except ValueError: + self.increment( + transform_constants.Metrics.TOTAL_ERROR_COUNT, + prefix=transform_constants.FUNDING_INSTRUMENT, + ) + logger.exception( + "Failed to process opportunity summary funding instrument", + extra=transform_util.get_log_extra_funding_instrument( + source_funding_instrument + ), + ) + + def process_link_funding_instrument( + self, + source_funding_instrument: transform_constants.SourceFundingInstrument, + target_funding_instrument: LinkOpportunitySummaryFundingInstrument | None, + opportunity_summary: OpportunitySummary | None, + ) -> None: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_PROCESSED, + prefix=transform_constants.FUNDING_INSTRUMENT, + ) + extra = transform_util.get_log_extra_funding_instrument(source_funding_instrument) + logger.info("Processing funding instrument", extra=extra) + + if source_funding_instrument.is_deleted: + self._handle_delete( + source_funding_instrument, + target_funding_instrument, + transform_constants.FUNDING_INSTRUMENT, + extra, + ) + + # Historical records are linked to other historical records, however + # we don't import historical opportunity records, so if the opportunity + # was deleted, we won't have created the opportunity summary. Whenever we do + # support historical opportunities, we'll have these all marked with a + # flag that we can use to reprocess these. + elif self._is_orphaned_historical(opportunity_summary, source_funding_instrument): + self._handle_orphaned_historical( + source_funding_instrument, transform_constants.FUNDING_INSTRUMENT, extra + ) + + elif opportunity_summary is None: + # This shouldn't be possible as the incoming data has foreign keys, but as a safety net + # we'll make sure the opportunity actually exists + raise ValueError( + "Funding instrument record cannot be processed as the opportunity summary for it does not exist" + ) + + else: + # To avoid incrementing metrics for records we fail to transform, record + # here whether it's an insert/update and we'll increment after transforming + is_insert = target_funding_instrument is None + + logger.info("Transforming and upserting funding instrument", extra=extra) + transformed_funding_instrument = ( + transform_util.convert_opportunity_summary_funding_instrument( + source_funding_instrument, target_funding_instrument, opportunity_summary + ) + ) + + # Before we insert, we have to still be certain we're not adding a duplicate record + # because the primary key of the legacy tables is the legacy ID + lookup value + opportunity ID + # its possible for the same lookup value to appear multiple times because the legacy ID is different + # This would hit a conflict in our DBs primary key, so we need to verify that won't happen + if ( + is_insert + and transformed_funding_instrument.funding_instrument + in opportunity_summary.funding_instruments + ): + self.increment( + transform_constants.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED, + prefix=transform_constants.FUNDING_INSTRUMENT, + ) + logger.warning( + "Skipping funding instrument record", + extra=extra + | {"funding_instrument": transformed_funding_instrument.funding_instrument}, + ) + elif is_insert: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_INSERTED, + prefix=transform_constants.FUNDING_INSTRUMENT, + ) + # We append to the relationship so SQLAlchemy immediately attaches it to its cached + # opportunity summary object so that the above check works when we receive dupes in the same batch + opportunity_summary.link_funding_instruments.append(transformed_funding_instrument) + else: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_UPDATED, + prefix=transform_constants.FUNDING_INSTRUMENT, + ) + self.db_session.merge(transformed_funding_instrument) + + logger.info("Processed funding instrument", extra=extra) + source_funding_instrument.transformed_at = self.transform_time diff --git a/api/src/data_migration/transformation/subtask/transform_opportunity.py b/api/src/data_migration/transformation/subtask/transform_opportunity.py new file mode 100644 index 000000000..4a354b542 --- /dev/null +++ b/api/src/data_migration/transformation/subtask/transform_opportunity.py @@ -0,0 +1,81 @@ +import logging +from typing import Tuple + +import src.data_migration.transformation.transform_constants as transform_constants +import src.data_migration.transformation.transform_util as transform_util +from src.data_migration.transformation.subtask.abstract_transform_subtask import ( + AbstractTransformSubTask, +) +from src.db.models.opportunity_models import Opportunity +from src.db.models.staging.opportunity import Topportunity + +logger = logging.getLogger(__name__) + + +class TransformOpportunity(AbstractTransformSubTask): + def transform_records(self) -> None: + # Fetch all opportunities that were modified + # Alongside that, grab the existing opportunity record + opportunities: list[Tuple[Topportunity, Opportunity | None]] = self.fetch( + Topportunity, + Opportunity, + [Topportunity.opportunity_id == Opportunity.opportunity_id], + ) + + for source_opportunity, target_opportunity in opportunities: + try: + self.process_opportunity(source_opportunity, target_opportunity) + except ValueError: + self.increment( + transform_constants.Metrics.TOTAL_ERROR_COUNT, + prefix=transform_constants.OPPORTUNITY, + ) + logger.exception( + "Failed to process opportunity", + extra={"opportunity_id": source_opportunity.opportunity_id}, + ) + + def process_opportunity( + self, source_opportunity: Topportunity, target_opportunity: Opportunity | None + ) -> None: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_PROCESSED, + prefix=transform_constants.OPPORTUNITY, + ) + extra = {"opportunity_id": source_opportunity.opportunity_id} + logger.info("Processing opportunity", extra=extra) + + if source_opportunity.is_deleted: + self._handle_delete( + source_opportunity, + target_opportunity, + transform_constants.OPPORTUNITY, + extra, + error_on_missing_target=True, + ) + + else: + # To avoid incrementing metrics for records we fail to transform, record + # here whether it's an insert/update and we'll increment after transforming + is_insert = target_opportunity is None + + logger.info("Transforming and upserting opportunity", extra=extra) + transformed_opportunity = transform_util.transform_opportunity( + source_opportunity, target_opportunity + ) + + if is_insert: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_INSERTED, + prefix=transform_constants.OPPORTUNITY, + ) + self.db_session.add(transformed_opportunity) + else: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_UPDATED, + prefix=transform_constants.OPPORTUNITY, + ) + self.db_session.merge(transformed_opportunity) + + logger.info("Processed opportunity", extra=extra) + source_opportunity.transformed_at = self.transform_time diff --git a/api/src/data_migration/transformation/subtask/transform_opportunity_summary.py b/api/src/data_migration/transformation/subtask/transform_opportunity_summary.py new file mode 100644 index 000000000..d6385c15b --- /dev/null +++ b/api/src/data_migration/transformation/subtask/transform_opportunity_summary.py @@ -0,0 +1,145 @@ +import logging +from typing import Sequence, Tuple + +import src.data_migration.transformation.transform_constants as transform_constants +import src.data_migration.transformation.transform_util as transform_util +from src.data_migration.transformation.subtask.abstract_transform_subtask import ( + AbstractTransformSubTask, +) +from src.db.models.opportunity_models import Opportunity, OpportunitySummary +from src.db.models.staging.forecast import Tforecast, TforecastHist +from src.db.models.staging.synopsis import Tsynopsis, TsynopsisHist + +logger = logging.getLogger(__name__) + + +class TransformOpportunitySummary(AbstractTransformSubTask): + def transform_records(self) -> None: + logger.info("Processing opportunity summaries") + logger.info("Processing synopsis records") + synopsis_records = self.fetch_with_opportunity( + Tsynopsis, + OpportunitySummary, + [ + Tsynopsis.opportunity_id == OpportunitySummary.opportunity_id, + OpportunitySummary.is_forecast.is_(False), + OpportunitySummary.revision_number.is_(None), + ], + ) + self.process_opportunity_summary_group(synopsis_records) + + logger.info("Processing synopsis hist records") + synopsis_hist_records = self.fetch_with_opportunity( + TsynopsisHist, + OpportunitySummary, + [ + TsynopsisHist.opportunity_id == OpportunitySummary.opportunity_id, + TsynopsisHist.revision_number == OpportunitySummary.revision_number, + OpportunitySummary.is_forecast.is_(False), + ], + ) + self.process_opportunity_summary_group(synopsis_hist_records) + + logger.info("Processing forecast records") + forecast_records = self.fetch_with_opportunity( + Tforecast, + OpportunitySummary, + [ + Tforecast.opportunity_id == OpportunitySummary.opportunity_id, + OpportunitySummary.is_forecast.is_(True), + OpportunitySummary.revision_number.is_(None), + ], + ) + self.process_opportunity_summary_group(forecast_records) + + logger.info("Processing forecast hist records") + forecast_hist_records = self.fetch_with_opportunity( + TforecastHist, + OpportunitySummary, + [ + TforecastHist.opportunity_id == OpportunitySummary.opportunity_id, + TforecastHist.revision_number == OpportunitySummary.revision_number, + OpportunitySummary.is_forecast.is_(True), + ], + ) + self.process_opportunity_summary_group(forecast_hist_records) + + def process_opportunity_summary_group( + self, + records: Sequence[ + Tuple[transform_constants.SourceSummary, OpportunitySummary | None, Opportunity | None] + ], + ) -> None: + for source_summary, target_summary, opportunity in records: + try: + self.process_opportunity_summary(source_summary, target_summary, opportunity) + except ValueError: + self.increment( + transform_constants.Metrics.TOTAL_ERROR_COUNT, + prefix=transform_constants.OPPORTUNITY_SUMMARY, + ) + logger.exception( + "Failed to process opportunity summary", + extra=transform_util.get_log_extra_summary(source_summary), + ) + + def process_opportunity_summary( + self, + source_summary: transform_constants.SourceSummary, + target_summary: OpportunitySummary | None, + opportunity: Opportunity | None, + ) -> None: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_PROCESSED, + prefix=transform_constants.OPPORTUNITY_SUMMARY, + ) + extra = transform_util.get_log_extra_summary(source_summary) + logger.info("Processing opportunity summary", extra=extra) + + if source_summary.is_deleted: + self._handle_delete( + source_summary, target_summary, transform_constants.OPPORTUNITY_SUMMARY, extra + ) + + # Historical records are linked to other historical records, however + # we don't import historical opportunity records, so if the opportunity + # was deleted, we don't have anything to link these to. Whenever we do + # support historical opportunities, we'll have these all marked with a + # flag that we can use to reprocess these. + elif self._is_orphaned_historical(opportunity, source_summary): + self._handle_orphaned_historical( + source_summary, transform_constants.OPPORTUNITY_SUMMARY, extra + ) + + elif opportunity is None: + # This shouldn't be possible as the incoming data has foreign keys, but as a safety net + # we'll make sure the opportunity actually exists + raise ValueError( + "Opportunity summary cannot be processed as the opportunity for it does not exist" + ) + + else: + # To avoid incrementing metrics for records we fail to transform, record + # here whether it's an insert/update and we'll increment after transforming + is_insert = target_summary is None + + logger.info("Transforming and upserting opportunity summary", extra=extra) + transformed_opportunity_summary = transform_util.transform_opportunity_summary( + source_summary, target_summary + ) + + if is_insert: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_INSERTED, + prefix=transform_constants.OPPORTUNITY_SUMMARY, + ) + self.db_session.add(transformed_opportunity_summary) + else: + self.increment( + transform_constants.Metrics.TOTAL_RECORDS_UPDATED, + prefix=transform_constants.OPPORTUNITY_SUMMARY, + ) + self.db_session.merge(transformed_opportunity_summary) + + logger.info("Processed opportunity summary", extra=extra) + source_summary.transformed_at = self.transform_time diff --git a/api/src/data_migration/transformation/transform_constants.py b/api/src/data_migration/transformation/transform_constants.py new file mode 100644 index 000000000..9d50e2069 --- /dev/null +++ b/api/src/data_migration/transformation/transform_constants.py @@ -0,0 +1,75 @@ +from enum import StrEnum +from typing import TypeAlias, TypeVar + +from src.db.models.base import ApiSchemaTable +from src.db.models.staging.forecast import ( + TapplicanttypesForecast, + TapplicanttypesForecastHist, + Tforecast, + TforecastHist, + TfundactcatForecast, + TfundactcatForecastHist, + TfundinstrForecast, + TfundinstrForecastHist, +) +from src.db.models.staging.staging_base import StagingParamMixin +from src.db.models.staging.synopsis import ( + TapplicanttypesSynopsis, + TapplicanttypesSynopsisHist, + TfundactcatSynopsis, + TfundactcatSynopsisHist, + TfundinstrSynopsis, + TfundinstrSynopsisHist, + Tsynopsis, + TsynopsisHist, +) + +ORPHANED_CFDA = "orphaned_cfda" +ORPHANED_HISTORICAL_RECORD = "orphaned_historical_record" +ORPHANED_DELETE_RECORD = "orphaned_delete_record" + +OPPORTUNITY = "opportunity" +ASSISTANCE_LISTING = "assistance_listing" +OPPORTUNITY_SUMMARY = "opportunity_summary" +APPLICANT_TYPE = "applicant_type" +FUNDING_CATEGORY = "funding_category" +FUNDING_INSTRUMENT = "funding_instrument" + + +class Metrics(StrEnum): + TOTAL_RECORDS_PROCESSED = "total_records_processed" + TOTAL_RECORDS_DELETED = "total_records_deleted" + TOTAL_RECORDS_INSERTED = "total_records_inserted" + TOTAL_RECORDS_UPDATED = "total_records_updated" + TOTAL_RECORDS_ORPHANED = "total_records_orphaned" + TOTAL_DUPLICATE_RECORDS_SKIPPED = "total_duplicate_records_skipped" + TOTAL_HISTORICAL_ORPHANS_SKIPPED = "total_historical_orphans_skipped" + TOTAL_DELETE_ORPHANS_SKIPPED = "total_delete_orphans_skipped" + + TOTAL_ERROR_COUNT = "total_error_count" + + +S = TypeVar("S", bound=StagingParamMixin) +D = TypeVar("D", bound=ApiSchemaTable) + + +SourceSummary: TypeAlias = Tforecast | Tsynopsis | TforecastHist | TsynopsisHist + +SourceApplicantType: TypeAlias = ( + TapplicanttypesForecast + | TapplicanttypesForecastHist + | TapplicanttypesSynopsis + | TapplicanttypesSynopsisHist +) + +SourceFundingCategory: TypeAlias = ( + TfundactcatForecast | TfundactcatForecastHist | TfundactcatSynopsis | TfundactcatSynopsisHist +) + +SourceFundingInstrument: TypeAlias = ( + TfundinstrForecastHist | TfundinstrForecast | TfundinstrSynopsisHist | TfundinstrSynopsis +) + +SourceAny: TypeAlias = ( + SourceSummary | SourceApplicantType | SourceFundingCategory | SourceFundingInstrument +) diff --git a/api/src/data_migration/transformation/transform_oracle_data_task.py b/api/src/data_migration/transformation/transform_oracle_data_task.py index 4018f0767..ed5f33a3c 100644 --- a/api/src/data_migration/transformation/transform_oracle_data_task.py +++ b/api/src/data_migration/transformation/transform_oracle_data_task.py @@ -1,931 +1,78 @@ import logging from datetime import datetime -from enum import StrEnum -from typing import Any, Sequence, Tuple, Type, TypeVar, cast -from sqlalchemy import and_, select -from sqlalchemy.orm import selectinload +from pydantic_settings import SettingsConfigDict +import src.data_migration.transformation.transform_constants as transform_constants from src.adapters import db -from src.data_migration.transformation import transform_util -from src.db.models.base import ApiSchemaTable -from src.db.models.opportunity_models import ( - LinkOpportunitySummaryApplicantType, - LinkOpportunitySummaryFundingCategory, - LinkOpportunitySummaryFundingInstrument, - Opportunity, - OpportunityAssistanceListing, - OpportunitySummary, +from src.data_migration.transformation.subtask.transform_applicant_type import ( + TransformApplicantType, ) -from src.db.models.staging.forecast import ( - TapplicanttypesForecast, - TapplicanttypesForecastHist, - Tforecast, - TforecastHist, - TfundactcatForecast, - TfundactcatForecastHist, - TfundinstrForecast, - TfundinstrForecastHist, +from src.data_migration.transformation.subtask.transform_assistance_listing import ( + TransformAssistanceListing, ) -from src.db.models.staging.opportunity import Topportunity, TopportunityCfda -from src.db.models.staging.staging_base import StagingParamMixin -from src.db.models.staging.synopsis import ( - TapplicanttypesSynopsis, - TapplicanttypesSynopsisHist, - TfundactcatSynopsis, - TfundactcatSynopsisHist, - TfundinstrSynopsis, - TfundinstrSynopsisHist, - Tsynopsis, - TsynopsisHist, +from src.data_migration.transformation.subtask.transform_funding_category import ( + TransformFundingCategory, +) +from src.data_migration.transformation.subtask.transform_funding_instrument import ( + TransformFundingInstrument, +) +from src.data_migration.transformation.subtask.transform_opportunity import TransformOpportunity +from src.data_migration.transformation.subtask.transform_opportunity_summary import ( + TransformOpportunitySummary, ) from src.task.task import Task from src.util import datetime_util - -from . import SourceApplicantType, SourceFundingCategory, SourceFundingInstrument, SourceSummary - -S = TypeVar("S", bound=StagingParamMixin) -D = TypeVar("D", bound=ApiSchemaTable) +from src.util.env_config import PydanticBaseEnvConfig logger = logging.getLogger(__name__) -### Constants -ORPHANED_CFDA = "orphaned_cfda" -ORPHANED_HISTORICAL_RECORD = "orphaned_historical_record" -ORPHANED_DELETE_RECORD = "orphaned_delete_record" -OPPORTUNITY = "opportunity" -ASSISTANCE_LISTING = "assistance_listing" -OPPORTUNITY_SUMMARY = "opportunity_summary" -APPLICANT_TYPE = "applicant_type" -FUNDING_CATEGORY = "funding_category" -FUNDING_INSTRUMENT = "funding_instrument" +class TransformOracleDataTaskConfig(PydanticBaseEnvConfig): + model_config = SettingsConfigDict(env_prefix="TRANSFORM_ORACLE_DATA_") + enable_opportunity: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_OPPORTUNITY + enable_assistance_listing: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_ASSISTANCE_LISTING + enable_opportunity_summary: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_OPPORTUNITY_SUMMARY + enable_applicant_type: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_APPLICANT_TYPE + enable_funding_category: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_FUNDING_CATEGORY + enable_funding_instrument: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_FUNDING_INSTRUMENT -class TransformOracleDataTask(Task): - class Metrics(StrEnum): - TOTAL_RECORDS_PROCESSED = "total_records_processed" - TOTAL_RECORDS_DELETED = "total_records_deleted" - TOTAL_RECORDS_INSERTED = "total_records_inserted" - TOTAL_RECORDS_UPDATED = "total_records_updated" - TOTAL_RECORDS_ORPHANED = "total_records_orphaned" - TOTAL_DUPLICATE_RECORDS_SKIPPED = "total_duplicate_records_skipped" - TOTAL_HISTORICAL_ORPHANS_SKIPPED = "total_historical_orphans_skipped" - TOTAL_DELETE_ORPHANS_SKIPPED = "total_delete_orphans_skipped" - TOTAL_ERROR_COUNT = "total_error_count" +class TransformOracleDataTask(Task): + Metrics = transform_constants.Metrics - def __init__(self, db_session: db.Session, transform_time: datetime | None = None) -> None: + def __init__( + self, + db_session: db.Session, + transform_time: datetime | None = None, + transform_config: TransformOracleDataTaskConfig | None = None, + ) -> None: super().__init__(db_session) if transform_time is None: transform_time = datetime_util.utcnow() self.transform_time = transform_time - def run_task(self) -> None: - with self.db_session.begin(): - # Opportunities - self.process_opportunities() - - # Assistance Listings - self.process_assistance_listings() - - # Opportunity Summary - self.process_opportunity_summaries() - - # One-to-many lookups - self.process_link_applicant_types() - self.process_link_funding_categories() - self.process_link_funding_instruments() - - def _handle_delete( - self, - source: S, - target: D | None, - record_type: str, - extra: dict, - error_on_missing_target: bool = False, - ) -> None: - # If the target we want to delete is None, we have nothing to delete - if target is None: - # In some scenarios we want to error when this happens - if error_on_missing_target: - raise ValueError("Cannot delete %s record as it does not exist" % record_type) - - # In a lot of scenarios, we actually just want to log a message as it is expected to happen - # For example, if we are deleting an opportunity_summary record, and already deleted the opportunity, - # then SQLAlchemy would have deleted the opportunity_summary for us already. When we later go to delete - # it, we'd hit this case, which isn't a problem. - logger.info("Cannot delete %s record as it does not exist", record_type, extra=extra) - source.transformation_notes = ORPHANED_DELETE_RECORD - self.increment(self.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED, prefix=record_type) - return - - logger.info("Deleting %s record", record_type, extra=extra) - self.increment(self.Metrics.TOTAL_RECORDS_DELETED, prefix=record_type) - self.db_session.delete(target) - - def fetch( - self, source_model: Type[S], destination_model: Type[D], join_clause: Sequence - ) -> list[Tuple[S, D | None]]: - # The real type is: Sequence[Row[Tuple[S, D | None]]] - # but MyPy is weird about this and the Row+Tuple causes some - # confusion in the parsing so it ends up assuming everything is Any - # So just cast it to a simpler type that doesn't confuse anything - return cast( - list[Tuple[S, D | None]], - self.db_session.execute( - select(source_model, destination_model) - .join(destination_model, and_(*join_clause), isouter=True) - .where(source_model.transformed_at.is_(None)) - .execution_options(yield_per=5000) - ), - ) - - def fetch_with_opportunity( - self, source_model: Type[S], destination_model: Type[D], join_clause: Sequence - ) -> list[Tuple[S, D | None, Opportunity | None]]: - # Similar to the above fetch function, but also grabs an opportunity record - # Note that this requires your source_model to have an opportunity_id field defined. - - return cast( - list[Tuple[S, D | None, Opportunity | None]], - self.db_session.execute( - select(source_model, destination_model, Opportunity) - .join(destination_model, and_(*join_clause), isouter=True) - .join( - Opportunity, - source_model.opportunity_id == Opportunity.opportunity_id, # type: ignore[attr-defined] - isouter=True, - ) - .where(source_model.transformed_at.is_(None)) - .execution_options(yield_per=5000) - ), - ) - - def fetch_with_opportunity_summary( - self, - source_model: Type[S], - destination_model: Type[D], - join_clause: Sequence, - is_forecast: bool, - is_historical_table: bool, - relationship_load_value: Any, - ) -> list[Tuple[S, D | None, OpportunitySummary | None]]: - # setup the join clause for getting the opportunity summary - - opportunity_summary_join_clause = [ - source_model.opportunity_id == OpportunitySummary.opportunity_id, # type: ignore[attr-defined] - OpportunitySummary.is_forecast.is_(is_forecast), - ] - - if is_historical_table: - opportunity_summary_join_clause.append( - source_model.revision_number == OpportunitySummary.revision_number # type: ignore[attr-defined] - ) - else: - opportunity_summary_join_clause.append(OpportunitySummary.revision_number.is_(None)) - - return cast( - list[Tuple[S, D | None, OpportunitySummary | None]], - self.db_session.execute( - select(source_model, destination_model, OpportunitySummary) - .join(OpportunitySummary, and_(*opportunity_summary_join_clause), isouter=True) - .join(destination_model, and_(*join_clause), isouter=True) - .where(source_model.transformed_at.is_(None)) - .options(selectinload(relationship_load_value)) - .execution_options(yield_per=5000, populate_existing=True) - ), - ) - - def process_opportunities(self) -> None: - # Fetch all opportunities that were modified - # Alongside that, grab the existing opportunity record - opportunities: list[Tuple[Topportunity, Opportunity | None]] = self.fetch( - Topportunity, - Opportunity, - [Topportunity.opportunity_id == Opportunity.opportunity_id], - ) - - for source_opportunity, target_opportunity in opportunities: - try: - self.process_opportunity(source_opportunity, target_opportunity) - except ValueError: - self.increment(self.Metrics.TOTAL_ERROR_COUNT, prefix=OPPORTUNITY) - logger.exception( - "Failed to process opportunity", - extra={"opportunity_id": source_opportunity.opportunity_id}, - ) - - def process_opportunity( - self, source_opportunity: Topportunity, target_opportunity: Opportunity | None - ) -> None: - self.increment(self.Metrics.TOTAL_RECORDS_PROCESSED, prefix=OPPORTUNITY) - extra = {"opportunity_id": source_opportunity.opportunity_id} - logger.info("Processing opportunity", extra=extra) - - if source_opportunity.is_deleted: - self._handle_delete( - source_opportunity, - target_opportunity, - OPPORTUNITY, - extra, - error_on_missing_target=True, - ) - - else: - # To avoid incrementing metrics for records we fail to transform, record - # here whether it's an insert/update and we'll increment after transforming - is_insert = target_opportunity is None - - logger.info("Transforming and upserting opportunity", extra=extra) - transformed_opportunity = transform_util.transform_opportunity( - source_opportunity, target_opportunity - ) - - if is_insert: - self.increment(self.Metrics.TOTAL_RECORDS_INSERTED, prefix=OPPORTUNITY) - self.db_session.add(transformed_opportunity) - else: - self.increment(self.Metrics.TOTAL_RECORDS_UPDATED, prefix=OPPORTUNITY) - self.db_session.merge(transformed_opportunity) - - logger.info("Processed opportunity", extra=extra) - source_opportunity.transformed_at = self.transform_time - - def process_assistance_listings(self) -> None: - assistance_listings: list[ - Tuple[TopportunityCfda, OpportunityAssistanceListing | None, Opportunity | None] - ] = self.fetch_with_opportunity( - TopportunityCfda, - OpportunityAssistanceListing, - [ - TopportunityCfda.opp_cfda_id - == OpportunityAssistanceListing.opportunity_assistance_listing_id - ], - ) - - for ( - source_assistance_listing, - target_assistance_listing, - opportunity, - ) in assistance_listings: - try: - self.process_assistance_listing( - source_assistance_listing, target_assistance_listing, opportunity - ) - except ValueError: - self.increment(self.Metrics.TOTAL_ERROR_COUNT, prefix=ASSISTANCE_LISTING) - logger.exception( - "Failed to process assistance listing", - extra={ - "opportunity_assistance_listing_id": source_assistance_listing.opp_cfda_id - }, - ) - - def process_assistance_listing( - self, - source_assistance_listing: TopportunityCfda, - target_assistance_listing: OpportunityAssistanceListing | None, - opportunity: Opportunity | None, - ) -> None: - self.increment(self.Metrics.TOTAL_RECORDS_PROCESSED, prefix=ASSISTANCE_LISTING) - extra = { - "opportunity_assistance_listing_id": source_assistance_listing.opp_cfda_id, - "opportunity_id": source_assistance_listing.opportunity_id, - } - logger.info("Processing assistance listing", extra=extra) - - if source_assistance_listing.is_deleted: - self._handle_delete( - source_assistance_listing, target_assistance_listing, ASSISTANCE_LISTING, extra - ) - - elif opportunity is None: - # The Oracle system we're importing these from does not have a foreign key between - # the opportunity ID in the TOPPORTUNITY_CFDA table and the TOPPORTUNITY table. - # There are many (2306 as of writing) orphaned CFDA records, created between 2007 and 2011 - # We don't want to continuously process these, so won't error for these, and will just - # mark them as transformed below. - self.increment(self.Metrics.TOTAL_RECORDS_ORPHANED, prefix=ASSISTANCE_LISTING) - logger.info( - "Assistance listing is orphaned and does not connect to any opportunity", - extra=extra, - ) - source_assistance_listing.transformation_notes = ORPHANED_CFDA - - else: - # To avoid incrementing metrics for records we fail to transform, record - # here whether it's an insert/update and we'll increment after transforming - is_insert = target_assistance_listing is None - - logger.info("Transforming and upserting assistance listing", extra=extra) - transformed_assistance_listing = transform_util.transform_assistance_listing( - source_assistance_listing, target_assistance_listing - ) - - if is_insert: - self.increment(self.Metrics.TOTAL_RECORDS_INSERTED, prefix=ASSISTANCE_LISTING) - self.db_session.add(transformed_assistance_listing) - else: - self.increment(self.Metrics.TOTAL_RECORDS_UPDATED, prefix=ASSISTANCE_LISTING) - self.db_session.merge(transformed_assistance_listing) - - logger.info("Processed assistance listing", extra=extra) - source_assistance_listing.transformed_at = self.transform_time - - def process_opportunity_summaries(self) -> None: - logger.info("Processing opportunity summaries") - logger.info("Processing synopsis records") - synopsis_records = self.fetch_with_opportunity( - Tsynopsis, - OpportunitySummary, - [ - Tsynopsis.opportunity_id == OpportunitySummary.opportunity_id, - OpportunitySummary.is_forecast.is_(False), - OpportunitySummary.revision_number.is_(None), - ], - ) - self.process_opportunity_summary_group(synopsis_records) - - logger.info("Processing synopsis hist records") - synopsis_hist_records = self.fetch_with_opportunity( - TsynopsisHist, - OpportunitySummary, - [ - TsynopsisHist.opportunity_id == OpportunitySummary.opportunity_id, - TsynopsisHist.revision_number == OpportunitySummary.revision_number, - OpportunitySummary.is_forecast.is_(False), - ], - ) - self.process_opportunity_summary_group(synopsis_hist_records) - - logger.info("Processing forecast records") - forecast_records = self.fetch_with_opportunity( - Tforecast, - OpportunitySummary, - [ - Tforecast.opportunity_id == OpportunitySummary.opportunity_id, - OpportunitySummary.is_forecast.is_(True), - OpportunitySummary.revision_number.is_(None), - ], - ) - self.process_opportunity_summary_group(forecast_records) - - logger.info("Processing forecast hist records") - forecast_hist_records = self.fetch_with_opportunity( - TforecastHist, - OpportunitySummary, - [ - TforecastHist.opportunity_id == OpportunitySummary.opportunity_id, - TforecastHist.revision_number == OpportunitySummary.revision_number, - OpportunitySummary.is_forecast.is_(True), - ], - ) - self.process_opportunity_summary_group(forecast_hist_records) - - def process_opportunity_summary_group( - self, records: Sequence[Tuple[SourceSummary, OpportunitySummary | None, Opportunity | None]] - ) -> None: - for source_summary, target_summary, opportunity in records: - try: - self.process_opportunity_summary(source_summary, target_summary, opportunity) - except ValueError: - self.increment(self.Metrics.TOTAL_ERROR_COUNT, prefix=OPPORTUNITY_SUMMARY) - logger.exception( - "Failed to process opportunity summary", - extra=transform_util.get_log_extra_summary(source_summary), - ) - - def process_opportunity_summary( - self, - source_summary: SourceSummary, - target_summary: OpportunitySummary | None, - opportunity: Opportunity | None, - ) -> None: - self.increment(self.Metrics.TOTAL_RECORDS_PROCESSED, prefix=OPPORTUNITY_SUMMARY) - extra = transform_util.get_log_extra_summary(source_summary) - logger.info("Processing opportunity summary", extra=extra) - - if source_summary.is_deleted: - self._handle_delete(source_summary, target_summary, OPPORTUNITY_SUMMARY, extra) - - # Historical records are linked to other historical records, however - # we don't import historical opportunity records, so if the opportunity - # was deleted, we don't have anything to link these to. Whenever we do - # support historical opportunities, we'll have these all marked with a - # flag that we can use to reprocess these. - elif opportunity is None and source_summary.is_historical_table: - logger.warning( - "Historical opportunity summary does not have a corresponding opportunity - cannot import, but will mark as processed", - extra=extra, - ) - self.increment( - self.Metrics.TOTAL_HISTORICAL_ORPHANS_SKIPPED, prefix=OPPORTUNITY_SUMMARY - ) - source_summary.transformation_notes = ORPHANED_HISTORICAL_RECORD - - elif opportunity is None: - # This shouldn't be possible as the incoming data has foreign keys, but as a safety net - # we'll make sure the opportunity actually exists - raise ValueError( - "Opportunity summary cannot be processed as the opportunity for it does not exist" - ) - - else: - # To avoid incrementing metrics for records we fail to transform, record - # here whether it's an insert/update and we'll increment after transforming - is_insert = target_summary is None - - logger.info("Transforming and upserting opportunity summary", extra=extra) - transformed_opportunity_summary = transform_util.transform_opportunity_summary( - source_summary, target_summary - ) - - if is_insert: - self.increment(self.Metrics.TOTAL_RECORDS_INSERTED, prefix=OPPORTUNITY_SUMMARY) - self.db_session.add(transformed_opportunity_summary) - else: - self.increment(self.Metrics.TOTAL_RECORDS_UPDATED, prefix=OPPORTUNITY_SUMMARY) - self.db_session.merge(transformed_opportunity_summary) - - logger.info("Processed opportunity summary", extra=extra) - source_summary.transformed_at = self.transform_time - - def process_link_applicant_types(self) -> None: - link_table = LinkOpportunitySummaryApplicantType - relationship_load_value = OpportunitySummary.link_applicant_types - - forecast_applicant_type_records = self.fetch_with_opportunity_summary( - TapplicanttypesForecast, - link_table, - [ - TapplicanttypesForecast.at_frcst_id - == LinkOpportunitySummaryApplicantType.legacy_applicant_type_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryApplicantType.opportunity_summary_id, - ], - is_forecast=True, - is_historical_table=False, - relationship_load_value=relationship_load_value, - ) - self.process_link_applicant_types_group(forecast_applicant_type_records) - - forecast_applicant_type_hist_records = self.fetch_with_opportunity_summary( - TapplicanttypesForecastHist, - link_table, - [ - TapplicanttypesForecastHist.at_frcst_id - == LinkOpportunitySummaryApplicantType.legacy_applicant_type_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryApplicantType.opportunity_summary_id, - ], - is_forecast=True, - is_historical_table=True, - relationship_load_value=relationship_load_value, - ) - self.process_link_applicant_types_group(forecast_applicant_type_hist_records) - - synopsis_applicant_type_records = self.fetch_with_opportunity_summary( - TapplicanttypesSynopsis, - link_table, - [ - TapplicanttypesSynopsis.at_syn_id - == LinkOpportunitySummaryApplicantType.legacy_applicant_type_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryApplicantType.opportunity_summary_id, - ], - is_forecast=False, - is_historical_table=False, - relationship_load_value=relationship_load_value, - ) - self.process_link_applicant_types_group(synopsis_applicant_type_records) - - synopsis_applicant_type_hist_records = self.fetch_with_opportunity_summary( - TapplicanttypesSynopsisHist, - link_table, - [ - TapplicanttypesSynopsisHist.at_syn_id - == LinkOpportunitySummaryApplicantType.legacy_applicant_type_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryApplicantType.opportunity_summary_id, - ], - is_forecast=False, - is_historical_table=True, - relationship_load_value=relationship_load_value, - ) - self.process_link_applicant_types_group(synopsis_applicant_type_hist_records) - - def process_link_applicant_types_group( - self, - records: Sequence[ - Tuple[ - SourceApplicantType, - LinkOpportunitySummaryApplicantType | None, - OpportunitySummary | None, - ] - ], - ) -> None: - for source_applicant_type, target_applicant_type, opportunity_summary in records: - try: - self.process_link_applicant_type( - source_applicant_type, target_applicant_type, opportunity_summary - ) - except ValueError: - self.increment(self.Metrics.TOTAL_ERROR_COUNT, prefix=APPLICANT_TYPE) - logger.exception( - "Failed to process opportunity summary applicant type", - extra=transform_util.get_log_extra_applicant_type(source_applicant_type), - ) - - def process_link_applicant_type( - self, - source_applicant_type: SourceApplicantType, - target_applicant_type: LinkOpportunitySummaryApplicantType | None, - opportunity_summary: OpportunitySummary | None, - ) -> None: - self.increment(self.Metrics.TOTAL_RECORDS_PROCESSED, prefix=APPLICANT_TYPE) - extra = transform_util.get_log_extra_applicant_type(source_applicant_type) - logger.info("Processing applicant type", extra=extra) - - if source_applicant_type.is_deleted: - self._handle_delete(source_applicant_type, target_applicant_type, APPLICANT_TYPE, extra) - - # Historical records are linked to other historical records, however - # we don't import historical opportunity records, so if the opportunity - # was deleted, we won't have created the opportunity summary. Whenever we do - # support historical opportunities, we'll have these all marked with a - # flag that we can use to reprocess these. - elif opportunity_summary is None and source_applicant_type.is_historical_table: - logger.warning( - "Historical applicant type does not have a corresponding opportunity summary - cannot import, but will mark as processed", - extra=extra, - ) - self.increment(self.Metrics.TOTAL_HISTORICAL_ORPHANS_SKIPPED, prefix=APPLICANT_TYPE) - source_applicant_type.transformation_notes = ORPHANED_HISTORICAL_RECORD - - elif opportunity_summary is None: - # This shouldn't be possible as the incoming data has foreign keys, but as a safety net - # we'll make sure the opportunity actually exists - raise ValueError( - "Applicant type record cannot be processed as the opportunity summary for it does not exist" - ) - else: - # To avoid incrementing metrics for records we fail to transform, record - # here whether it's an insert/update and we'll increment after transforming - is_insert = target_applicant_type is None - - logger.info("Transforming and upserting applicant type", extra=extra) - transformed_applicant_type = transform_util.convert_opportunity_summary_applicant_type( - source_applicant_type, target_applicant_type, opportunity_summary - ) - - # Before we insert, we have to still be certain we're not adding a duplicate record - # because the primary key of the legacy tables is the legacy ID + lookup value + opportunity ID - # its possible for the same lookup value to appear multiple times because the legacy ID is different - # This would hit a conflict in our DBs primary key, so we need to verify that won't happen - if ( - is_insert - and transformed_applicant_type.applicant_type in opportunity_summary.applicant_types - ): - self.increment(self.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED, prefix=APPLICANT_TYPE) - logger.warning( - "Skipping applicant type record", - extra=extra | {"applicant_type": transformed_applicant_type.applicant_type}, - ) - elif is_insert: - self.increment(self.Metrics.TOTAL_RECORDS_INSERTED, prefix=APPLICANT_TYPE) - # We append to the relationship so SQLAlchemy immediately attaches it to its cached - # opportunity summary object so that the above check works when we receive dupes in the same batch - opportunity_summary.link_applicant_types.append(transformed_applicant_type) - else: - self.increment(self.Metrics.TOTAL_RECORDS_UPDATED, prefix=APPLICANT_TYPE) - self.db_session.merge(transformed_applicant_type) - - logger.info("Processed applicant type", extra=extra) - source_applicant_type.transformed_at = self.transform_time - - def process_link_funding_categories(self) -> None: - link_table = LinkOpportunitySummaryFundingCategory - relationship_load_value = OpportunitySummary.link_funding_categories - - forecast_funding_category_records = self.fetch_with_opportunity_summary( - TfundactcatForecast, - link_table, - [ - TfundactcatForecast.fac_frcst_id - == LinkOpportunitySummaryFundingCategory.legacy_funding_category_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryFundingCategory.opportunity_summary_id, - ], - is_forecast=True, - is_historical_table=False, - relationship_load_value=relationship_load_value, - ) - self.process_link_funding_categories_group(forecast_funding_category_records) - - forecast_funding_category_hist_records = self.fetch_with_opportunity_summary( - TfundactcatForecastHist, - link_table, - [ - TfundactcatForecastHist.fac_frcst_id - == LinkOpportunitySummaryFundingCategory.legacy_funding_category_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryFundingCategory.opportunity_summary_id, - ], - is_forecast=True, - is_historical_table=True, - relationship_load_value=relationship_load_value, - ) - self.process_link_funding_categories_group(forecast_funding_category_hist_records) + if transform_config is None: + transform_config = TransformOracleDataTaskConfig() + self.transform_config = transform_config - synopsis_funding_category_records = self.fetch_with_opportunity_summary( - TfundactcatSynopsis, - link_table, - [ - TfundactcatSynopsis.fac_syn_id - == LinkOpportunitySummaryFundingCategory.legacy_funding_category_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryFundingCategory.opportunity_summary_id, - ], - is_forecast=False, - is_historical_table=False, - relationship_load_value=relationship_load_value, - ) - self.process_link_funding_categories_group(synopsis_funding_category_records) - - synopsis_funding_category_hist_records = self.fetch_with_opportunity_summary( - TfundactcatSynopsisHist, - link_table, - [ - TfundactcatSynopsisHist.fac_syn_id - == LinkOpportunitySummaryFundingCategory.legacy_funding_category_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryFundingCategory.opportunity_summary_id, - ], - is_forecast=False, - is_historical_table=True, - relationship_load_value=relationship_load_value, - ) - self.process_link_funding_categories_group(synopsis_funding_category_hist_records) - - def process_link_funding_categories_group( - self, - records: Sequence[ - Tuple[ - SourceFundingCategory, - LinkOpportunitySummaryFundingCategory | None, - OpportunitySummary | None, - ] - ], - ) -> None: - for source_funding_category, target_funding_category, opportunity_summary in records: - try: - self.process_link_funding_category( - source_funding_category, target_funding_category, opportunity_summary - ) - except ValueError: - self.increment(self.Metrics.TOTAL_ERROR_COUNT, prefix=FUNDING_CATEGORY) - logger.exception( - "Failed to process opportunity summary funding category", - extra=transform_util.get_log_extra_funding_category(source_funding_category), - ) - - def process_link_funding_category( - self, - source_funding_category: SourceFundingCategory, - target_funding_category: LinkOpportunitySummaryFundingCategory | None, - opportunity_summary: OpportunitySummary | None, - ) -> None: - self.increment(self.Metrics.TOTAL_RECORDS_PROCESSED, prefix=FUNDING_CATEGORY) - extra = transform_util.get_log_extra_funding_category(source_funding_category) - logger.info("Processing funding category", extra=extra) - - if source_funding_category.is_deleted: - self._handle_delete( - source_funding_category, target_funding_category, FUNDING_CATEGORY, extra - ) - - # Historical records are linked to other historical records, however - # we don't import historical opportunity records, so if the opportunity - # was deleted, we won't have created the opportunity summary. Whenever we do - # support historical opportunities, we'll have these all marked with a - # flag that we can use to reprocess these. - elif opportunity_summary is None and source_funding_category.is_historical_table: - logger.warning( - "Historical funding category does not have a corresponding opportunity summary - cannot import, but will mark as processed", - extra=extra, - ) - self.increment(self.Metrics.TOTAL_HISTORICAL_ORPHANS_SKIPPED, prefix=FUNDING_CATEGORY) - source_funding_category.transformation_notes = ORPHANED_HISTORICAL_RECORD - - elif opportunity_summary is None: - # This shouldn't be possible as the incoming data has foreign keys, but as a safety net - # we'll make sure the opportunity actually exists - raise ValueError( - "Funding category record cannot be processed as the opportunity summary for it does not exist" - ) - else: - # To avoid incrementing metrics for records we fail to transform, record - # here whether it's an insert/update and we'll increment after transforming - is_insert = target_funding_category is None - - logger.info("Transforming and upserting funding category", extra=extra) - transformed_funding_category = ( - transform_util.convert_opportunity_summary_funding_category( - source_funding_category, target_funding_category, opportunity_summary - ) - ) - - # Before we insert, we have to still be certain we're not adding a duplicate record - # because the primary key of the legacy tables is the legacy ID + lookup value + opportunity ID - # its possible for the same lookup value to appear multiple times because the legacy ID is different - # This would hit a conflict in our DBs primary key, so we need to verify that won't happen - if ( - is_insert - and transformed_funding_category.funding_category - in opportunity_summary.funding_categories - ): - self.increment( - self.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED, prefix=FUNDING_CATEGORY - ) - logger.warning( - "Skipping funding category record", - extra=extra - | {"funding_category": transformed_funding_category.funding_category}, - ) - elif is_insert: - self.increment(self.Metrics.TOTAL_RECORDS_INSERTED, prefix=FUNDING_CATEGORY) - # We append to the relationship so SQLAlchemy immediately attaches it to its cached - # opportunity summary object so that the above check works when we receive dupes in the same batch - opportunity_summary.link_funding_categories.append(transformed_funding_category) - else: - self.increment(self.Metrics.TOTAL_RECORDS_UPDATED, prefix=FUNDING_CATEGORY) - self.db_session.merge(transformed_funding_category) - - logger.info("Processed funding category", extra=extra) - source_funding_category.transformed_at = self.transform_time - - def process_link_funding_instruments(self) -> None: - link_table = LinkOpportunitySummaryFundingInstrument - relationship_load_value = OpportunitySummary.link_funding_instruments - - forecast_funding_instrument_records = self.fetch_with_opportunity_summary( - TfundinstrForecast, - link_table, - [ - TfundinstrForecast.fi_frcst_id - == LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryFundingInstrument.opportunity_summary_id, - ], - is_forecast=True, - is_historical_table=False, - relationship_load_value=relationship_load_value, - ) - self.process_link_funding_instruments_group(forecast_funding_instrument_records) - - forecast_funding_instrument_hist_records = self.fetch_with_opportunity_summary( - TfundinstrForecastHist, - link_table, - [ - TfundinstrForecastHist.fi_frcst_id - == LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryFundingInstrument.opportunity_summary_id, - ], - is_forecast=True, - is_historical_table=True, - relationship_load_value=relationship_load_value, - ) - self.process_link_funding_instruments_group(forecast_funding_instrument_hist_records) - - synopsis_funding_instrument_records = self.fetch_with_opportunity_summary( - TfundinstrSynopsis, - link_table, - [ - TfundinstrSynopsis.fi_syn_id - == LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryFundingInstrument.opportunity_summary_id, - ], - is_forecast=False, - is_historical_table=False, - relationship_load_value=relationship_load_value, - ) - self.process_link_funding_instruments_group(synopsis_funding_instrument_records) - - synopsis_funding_instrument_hist_records = self.fetch_with_opportunity_summary( - TfundinstrSynopsisHist, - link_table, - [ - TfundinstrSynopsisHist.fi_syn_id - == LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id, - OpportunitySummary.opportunity_summary_id - == LinkOpportunitySummaryFundingInstrument.opportunity_summary_id, - ], - is_forecast=False, - is_historical_table=True, - relationship_load_value=relationship_load_value, - ) - self.process_link_funding_instruments_group(synopsis_funding_instrument_hist_records) - - def process_link_funding_instruments_group( - self, - records: Sequence[ - Tuple[ - SourceFundingInstrument, - LinkOpportunitySummaryFundingInstrument | None, - OpportunitySummary | None, - ] - ], - ) -> None: - for source_funding_instrument, target_funding_instrument, opportunity_summary in records: - try: - self.process_link_funding_instrument( - source_funding_instrument, target_funding_instrument, opportunity_summary - ) - except ValueError: - self.increment(self.Metrics.TOTAL_ERROR_COUNT, prefix=FUNDING_INSTRUMENT) - logger.exception( - "Failed to process opportunity summary funding instrument", - extra=transform_util.get_log_extra_funding_instrument( - source_funding_instrument - ), - ) - - def process_link_funding_instrument( - self, - source_funding_instrument: SourceFundingInstrument, - target_funding_instrument: LinkOpportunitySummaryFundingInstrument | None, - opportunity_summary: OpportunitySummary | None, - ) -> None: - self.increment(self.Metrics.TOTAL_RECORDS_PROCESSED, prefix=FUNDING_INSTRUMENT) - extra = transform_util.get_log_extra_funding_instrument(source_funding_instrument) - logger.info("Processing funding instrument", extra=extra) - - if source_funding_instrument.is_deleted: - self._handle_delete( - source_funding_instrument, target_funding_instrument, FUNDING_INSTRUMENT, extra - ) - - # Historical records are linked to other historical records, however - # we don't import historical opportunity records, so if the opportunity - # was deleted, we won't have created the opportunity summary. Whenever we do - # support historical opportunities, we'll have these all marked with a - # flag that we can use to reprocess these. - elif opportunity_summary is None and source_funding_instrument.is_historical_table: - logger.warning( - "Historical funding instrument does not have a corresponding opportunity summary - cannot import, but will mark as processed", - extra=extra, - ) - self.increment(self.Metrics.TOTAL_HISTORICAL_ORPHANS_SKIPPED, prefix=FUNDING_INSTRUMENT) - source_funding_instrument.transformation_notes = ORPHANED_HISTORICAL_RECORD + def run_task(self) -> None: + if self.transform_config.enable_opportunity: + TransformOpportunity(self).run() - elif opportunity_summary is None: - # This shouldn't be possible as the incoming data has foreign keys, but as a safety net - # we'll make sure the opportunity actually exists - raise ValueError( - "Funding instrument record cannot be processed as the opportunity summary for it does not exist" - ) + if self.transform_config.enable_assistance_listing: + TransformAssistanceListing(self).run() - else: - # To avoid incrementing metrics for records we fail to transform, record - # here whether it's an insert/update and we'll increment after transforming - is_insert = target_funding_instrument is None + if self.transform_config.enable_opportunity_summary: + TransformOpportunitySummary(self).run() - logger.info("Transforming and upserting funding instrument", extra=extra) - transformed_funding_instrument = ( - transform_util.convert_opportunity_summary_funding_instrument( - source_funding_instrument, target_funding_instrument, opportunity_summary - ) - ) + if self.transform_config.enable_applicant_type: + TransformApplicantType(self).run() - # Before we insert, we have to still be certain we're not adding a duplicate record - # because the primary key of the legacy tables is the legacy ID + lookup value + opportunity ID - # its possible for the same lookup value to appear multiple times because the legacy ID is different - # This would hit a conflict in our DBs primary key, so we need to verify that won't happen - if ( - is_insert - and transformed_funding_instrument.funding_instrument - in opportunity_summary.funding_instruments - ): - self.increment( - self.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED, prefix=FUNDING_INSTRUMENT - ) - logger.warning( - "Skipping funding instrument record", - extra=extra - | {"funding_instrument": transformed_funding_instrument.funding_instrument}, - ) - elif is_insert: - self.increment(self.Metrics.TOTAL_RECORDS_INSERTED, prefix=FUNDING_INSTRUMENT) - # We append to the relationship so SQLAlchemy immediately attaches it to its cached - # opportunity summary object so that the above check works when we receive dupes in the same batch - opportunity_summary.link_funding_instruments.append(transformed_funding_instrument) - else: - self.increment(self.Metrics.TOTAL_RECORDS_UPDATED, prefix=FUNDING_INSTRUMENT) - self.db_session.merge(transformed_funding_instrument) + if self.transform_config.enable_funding_category: + TransformFundingCategory(self).run() - logger.info("Processed funding instrument", extra=extra) - source_funding_instrument.transformed_at = self.transform_time + if self.transform_config.enable_funding_instrument: + TransformFundingInstrument(self).run() diff --git a/api/src/data_migration/transformation/transform_util.py b/api/src/data_migration/transformation/transform_util.py index 3dc183f9e..d8bf58a1b 100644 --- a/api/src/data_migration/transformation/transform_util.py +++ b/api/src/data_migration/transformation/transform_util.py @@ -7,6 +7,12 @@ FundingInstrument, OpportunityCategory, ) +from src.data_migration.transformation.transform_constants import ( + SourceApplicantType, + SourceFundingCategory, + SourceFundingInstrument, + SourceSummary, +) from src.db.models.base import TimestampMixin from src.db.models.opportunity_models import ( LinkOpportunitySummaryApplicantType, @@ -16,14 +22,10 @@ OpportunityAssistanceListing, OpportunitySummary, ) -from src.db.models.staging.forecast import TforecastHist from src.db.models.staging.opportunity import Topportunity, TopportunityCfda from src.db.models.staging.staging_base import StagingBase -from src.db.models.staging.synopsis import Tsynopsis, TsynopsisHist from src.util import datetime_util -from . import SourceApplicantType, SourceFundingCategory, SourceFundingInstrument, SourceSummary - logger = logging.getLogger(__name__) OPPORTUNITY_CATEGORY_MAP = { @@ -194,15 +196,15 @@ def transform_opportunity_summary( if incoming_summary is None: logger.info("Creating new opportunity summary record", extra=log_extra) + # These values are a part of a unique key for identifying across tables, we don't + # ever want to modify them once created target_summary = OpportunitySummary( opportunity_id=source_summary.opportunity_id, is_forecast=source_summary.is_forecast, - revision_number=None, + # Revision number is only found in the historical table, use getattr + # to avoid type checking + revision_number=getattr(source_summary, "revision_number", None), ) - - # Revision number is only found in the historical table - if isinstance(source_summary, (TsynopsisHist, TforecastHist)): - target_summary.revision_number = source_summary.revision_number else: # We create a new summary object and merge it outside this function # that way if any modifications occur on the object and then it errors @@ -238,39 +240,32 @@ def transform_opportunity_summary( target_summary.updated_by = source_summary.last_upd_id target_summary.created_by = source_summary.creator_id - # Some fields either are named different in synopsis/forecast - # or only come from one of those tables, so handle those here - if isinstance(source_summary, (Tsynopsis, TsynopsisHist)): - target_summary.summary_description = source_summary.syn_desc - target_summary.agency_code = source_summary.a_sa_code - target_summary.agency_phone_number = source_summary.ac_phone_number - - # Synopsis only fields - target_summary.agency_contact_description = source_summary.agency_contact_desc - target_summary.close_date = source_summary.response_date - target_summary.close_date_description = source_summary.response_date_desc - target_summary.unarchive_date = source_summary.unarchive_date - - else: # TForecast & TForecastHist - target_summary.summary_description = source_summary.forecast_desc - target_summary.agency_code = source_summary.agency_code - target_summary.agency_phone_number = source_summary.ac_phone - - # Forecast only fields - target_summary.forecasted_post_date = source_summary.est_synopsis_posting_date - target_summary.forecasted_close_date = source_summary.est_appl_response_date - target_summary.forecasted_close_date_description = ( - source_summary.est_appl_response_date_desc - ) - target_summary.forecasted_award_date = source_summary.est_award_date - target_summary.forecasted_project_start_date = source_summary.est_project_start_date - target_summary.fiscal_year = source_summary.fiscal_year + target_summary.summary_description = source_summary.description + target_summary.agency_code = source_summary.agency_code + target_summary.agency_phone_number = source_summary.agency_phone_number + + # These fields are only on synopsis records, use getattr to avoid isinstance + target_summary.agency_contact_description = getattr(source_summary, "agency_contact_desc", None) + target_summary.close_date = getattr(source_summary, "response_date", None) + target_summary.close_date_description = getattr(source_summary, "response_date_desc", None) + target_summary.unarchive_date = getattr(source_summary, "unarchive_date", None) + + # These fields are only on forecast records, use getattr to avoid isinstance + target_summary.forecasted_post_date = getattr(source_summary, "est_synopsis_posting_date", None) + target_summary.forecasted_close_date = getattr(source_summary, "est_appl_response_date", None) + target_summary.forecasted_close_date_description = getattr( + source_summary, "est_appl_response_date_desc", None + ) + target_summary.forecasted_award_date = getattr(source_summary, "est_award_date", None) + target_summary.forecasted_project_start_date = getattr( + source_summary, "est_project_start_date", None + ) + target_summary.fiscal_year = getattr(source_summary, "fiscal_year", None) - # Historical only - if isinstance(source_summary, (TsynopsisHist, TforecastHist)): - target_summary.is_deleted = convert_action_type_to_is_deleted(source_summary.action_type) - else: - target_summary.is_deleted = False + # Set whether it is deleted based on action_type, which only appears on the historical records + target_summary.is_deleted = convert_action_type_to_is_deleted( + getattr(source_summary, "action_type", None) + ) transform_update_create_timestamp(source_summary, target_summary, log_extra=log_extra) @@ -436,9 +431,13 @@ def convert_yn_bool(value: str | None) -> bool | None: raise ValueError("Unexpected Y/N bool value: %s" % value) -def convert_action_type_to_is_deleted(value: str | None) -> bool | None: +def convert_action_type_to_is_deleted(value: str | None) -> bool: + # Action type can be U (update) or D (delete) + # however many older records seem to not have this set at all + # The legacy system looks like it treats anything that isn't D + # the same, so we'll go with that assumption as well. if value is None or value == "": - return None + return False if value == "D": # D = Delete return True diff --git a/api/src/db/models/staging/forecast.py b/api/src/db/models/staging/forecast.py index 2030d2329..4c9ddc61d 100644 --- a/api/src/db/models/staging/forecast.py +++ b/api/src/db/models/staging/forecast.py @@ -24,6 +24,14 @@ def is_forecast(self) -> bool: def is_historical_table(self) -> bool: return False + @property + def description(self) -> str | None: + return self.forecast_desc + + @property + def agency_phone_number(self) -> str | None: + return self.ac_phone + class TforecastHist(StagingBase, forecast_mixin.TforecastHistMixin, StagingParamMixin): __tablename__ = "tforecast_hist" @@ -43,6 +51,14 @@ def is_forecast(self) -> bool: def is_historical_table(self) -> bool: return True + @property + def description(self) -> str | None: + return self.forecast_desc + + @property + def agency_phone_number(self) -> str | None: + return self.ac_phone + class TapplicanttypesForecast( StagingBase, forecast_mixin.TapplicanttypesForecastMixin, StagingParamMixin diff --git a/api/src/db/models/staging/synopsis.py b/api/src/db/models/staging/synopsis.py index 2ad20d7c9..3fc59ab18 100644 --- a/api/src/db/models/staging/synopsis.py +++ b/api/src/db/models/staging/synopsis.py @@ -24,6 +24,18 @@ def is_forecast(self) -> bool: def is_historical_table(self) -> bool: return False + @property + def description(self) -> str | None: + return self.syn_desc + + @property + def agency_code(self) -> str | None: + return self.a_sa_code + + @property + def agency_phone_number(self) -> str | None: + return self.ac_phone_number + class TsynopsisHist(StagingBase, synopsis_mixin.TsynopsisHistMixin, StagingParamMixin): __tablename__ = "tsynopsis_hist" @@ -43,6 +55,18 @@ def is_forecast(self) -> bool: def is_historical_table(self) -> bool: return True + @property + def description(self) -> str | None: + return self.syn_desc + + @property + def agency_code(self) -> str | None: + return self.a_sa_code + + @property + def agency_phone_number(self) -> str | None: + return self.ac_phone_number + class TapplicanttypesSynopsis( StagingBase, synopsis_mixin.TapplicanttypesSynopsisMixin, StagingParamMixin diff --git a/api/src/task/subtask.py b/api/src/task/subtask.py new file mode 100644 index 000000000..178b067b0 --- /dev/null +++ b/api/src/task/subtask.py @@ -0,0 +1,69 @@ +import abc +import logging +import time +from typing import Any + +import src.adapters.db as db +from src.task.task import Task + +logger = logging.getLogger(__name__) + + +class SubTask(abc.ABC, metaclass=abc.ABCMeta): + """ + A SubTask is a class that defines a set of behavior + that can be seen as a subset of a Task. + + This object has access to the same internal metrics + and reporting attributes as its Task, but can be defined + as a separate class which can help with organizing large + complex tasks that can't be easily broken down. + """ + + def __init__(self, task: Task): + self.task = task + + def run(self) -> None: + try: + logger.info("Starting subtask %s", self.cls_name()) + start = time.perf_counter() + + # Run the actual subtask + self.run_subtask() + + # Calculate and set a duration + end = time.perf_counter() + duration = round((end - start), 3) + self.set_metrics({f"{self.cls_name()}_subtask_duration_sec": duration}) + + logger.info("Completed subtask %s in %s seconds", self.cls_name(), duration) + + except Exception: + logger.exception("Failed to run subtask %s", self.cls_name()) + raise + + def set_metrics(self, metrics: dict[str, Any]) -> None: + # Passthrough method to the task set_metrics function + self.task.set_metrics(metrics) + + def increment(self, name: str, value: int = 1, prefix: str | None = None) -> None: + # Passthrough method to the task increment function + self.task.increment(name, value, prefix) + + def cls_name(self) -> str: + return self.__class__.__name__ + + @abc.abstractmethod + def run_subtask(self) -> None: + """Override to define the subtask logic""" + pass + + @property + def db_session(self) -> db.Session: + # Property to make it so the subtask can reference the db_session + # as if it were the task itself + return self.task.db_session + + @property + def metrics(self) -> dict[str, Any]: + return self.task.metrics diff --git a/api/tests/src/data_migration/transformation/conftest.py b/api/tests/src/data_migration/transformation/conftest.py new file mode 100644 index 000000000..443c113b6 --- /dev/null +++ b/api/tests/src/data_migration/transformation/conftest.py @@ -0,0 +1,659 @@ +from datetime import date, datetime +from typing import Tuple + +import pytest + +import tests.src.db.models.factories as f +from src.constants.lookup_constants import ApplicantType, FundingCategory, FundingInstrument +from src.data_migration.transformation.transform_oracle_data_task import TransformOracleDataTask +from src.db.models import staging +from src.db.models.opportunity_models import ( + LinkOpportunitySummaryApplicantType, + LinkOpportunitySummaryFundingCategory, + LinkOpportunitySummaryFundingInstrument, + Opportunity, + OpportunityAssistanceListing, + OpportunitySummary, +) +from tests.conftest import BaseTestClass + + +class BaseTransformTestClass(BaseTestClass): + @pytest.fixture() + def transform_oracle_data_task( + self, db_session, enable_factory_create, truncate_opportunities + ) -> TransformOracleDataTask: + return TransformOracleDataTask(db_session) + + +def setup_opportunity( + create_existing: bool, + is_delete: bool = False, + is_already_processed: bool = False, + source_values: dict | None = None, + all_fields_null: bool = False, +) -> staging.opportunity.Topportunity: + if source_values is None: + source_values = {} + + source_opportunity = f.StagingTopportunityFactory.create( + **source_values, + is_deleted=is_delete, + already_transformed=is_already_processed, + all_fields_null=all_fields_null, + cfdas=[], + ) + + if create_existing: + f.OpportunityFactory.create( + opportunity_id=source_opportunity.opportunity_id, + # set created_at/updated_at to an earlier time so its clear + # when they were last updated + timestamps_in_past=True, + ) + + return source_opportunity + + +def setup_cfda( + create_existing: bool, + is_delete: bool = False, + is_already_processed: bool = False, + source_values: dict | None = None, + all_fields_null: bool = False, + opportunity: Opportunity | None = None, +) -> staging.opportunity.TopportunityCfda: + if source_values is None: + source_values = {} + + # If you don't provide an opportunity, you need to provide an ID + if opportunity is not None: + source_values["opportunity_id"] = opportunity.opportunity_id + + source_cfda = f.StagingTopportunityCfdaFactory.create( + **source_values, + opportunity=None, # To override the factory trying to create something + is_deleted=is_delete, + already_transformed=is_already_processed, + all_fields_null=all_fields_null, + ) + + if create_existing: + f.OpportunityAssistanceListingFactory.create( + opportunity=opportunity, + opportunity_assistance_listing_id=source_cfda.opp_cfda_id, + # set created_at/updated_at to an earlier time so its clear + # when they were last updated + timestamps_in_past=True, + ) + + return source_cfda + + +def setup_synopsis_forecast( + is_forecast: bool, + revision_number: int | None, + create_existing: bool, + opportunity: Opportunity | None, + is_delete: bool = False, + is_already_processed: bool = False, + source_values: dict | None = None, +): + if source_values is None: + source_values = {} + + if is_forecast: + if revision_number is None: + factory_cls = f.StagingTforecastFactory + else: + factory_cls = f.StagingTforecastHistFactory + else: + if revision_number is None: + factory_cls = f.StagingTsynopsisFactory + else: + factory_cls = f.StagingTsynopsisHistFactory + + if revision_number is not None: + source_values["revision_number"] = revision_number + + if opportunity is not None: + source_values["opportunity_id"] = opportunity.opportunity_id + + source_summary = factory_cls.create( + **source_values, + opportunity=None, # To override the factory trying to create something + is_deleted=is_delete, + already_transformed=is_already_processed, + ) + + if create_existing: + f.OpportunitySummaryFactory.create( + opportunity=opportunity, is_forecast=is_forecast, revision_number=revision_number + ) + + return source_summary + + +def setup_applicant_type( + create_existing: bool, + opportunity_summary: OpportunitySummary, + legacy_lookup_value: str, + applicant_type: ApplicantType | None = None, + is_delete: bool = False, + is_already_processed: bool = False, + source_values: dict | None = None, +): + if create_existing and is_delete is False and applicant_type is None: + raise Exception( + "If create_existing is True, is_delete is False - must provide the properly converted / mapped value for applicant_type" + ) + + if source_values is None: + source_values = {} + + if opportunity_summary.is_forecast: + source_values["forecast"] = None + if opportunity_summary.revision_number is None: + factory_cls = f.StagingTapplicanttypesForecastFactory + else: + factory_cls = f.StagingTapplicanttypesForecastHistFactory + source_values["revision_number"] = opportunity_summary.revision_number + else: + source_values["synopsis"] = None + if opportunity_summary.revision_number is None: + factory_cls = f.StagingTapplicanttypesSynopsisFactory + else: + factory_cls = f.StagingTapplicanttypesSynopsisHistFactory + source_values["revision_number"] = opportunity_summary.revision_number + + source_applicant_type = factory_cls.create( + **source_values, + opportunity_id=opportunity_summary.opportunity_id, + is_deleted=is_delete, + already_transformed=is_already_processed, + at_id=legacy_lookup_value, + ) + + if create_existing: + if opportunity_summary.is_forecast: + legacy_id = source_applicant_type.at_frcst_id + else: + legacy_id = source_applicant_type.at_syn_id + + f.LinkOpportunitySummaryApplicantTypeFactory.create( + opportunity_summary=opportunity_summary, + legacy_applicant_type_id=legacy_id, + applicant_type=applicant_type, + ) + + return source_applicant_type + + +def setup_funding_instrument( + create_existing: bool, + opportunity_summary: OpportunitySummary, + legacy_lookup_value: str, + funding_instrument: FundingInstrument | None = None, + is_delete: bool = False, + is_already_processed: bool = False, + source_values: dict | None = None, +): + if create_existing and is_delete is False and funding_instrument is None: + raise Exception( + "If create_existing is True, is_delete is False - must provide the properly converted / mapped value for funding_instrument" + ) + + if source_values is None: + source_values = {} + + if opportunity_summary.is_forecast: + source_values["forecast"] = None + if opportunity_summary.revision_number is None: + factory_cls = f.StagingTfundinstrForecastFactory + else: + factory_cls = f.StagingTfundinstrForecastHistFactory + source_values["revision_number"] = opportunity_summary.revision_number + else: + source_values["synopsis"] = None + if opportunity_summary.revision_number is None: + factory_cls = f.StagingTfundinstrSynopsisFactory + else: + factory_cls = f.StagingTfundinstrSynopsisHistFactory + source_values["revision_number"] = opportunity_summary.revision_number + + source_funding_instrument = factory_cls.create( + **source_values, + opportunity_id=opportunity_summary.opportunity_id, + is_deleted=is_delete, + already_transformed=is_already_processed, + fi_id=legacy_lookup_value, + ) + + if create_existing: + if opportunity_summary.is_forecast: + legacy_id = source_funding_instrument.fi_frcst_id + else: + legacy_id = source_funding_instrument.fi_syn_id + + f.LinkOpportunitySummaryFundingInstrumentFactory.create( + opportunity_summary=opportunity_summary, + legacy_funding_instrument_id=legacy_id, + funding_instrument=funding_instrument, + ) + + return source_funding_instrument + + +def setup_funding_category( + create_existing: bool, + opportunity_summary: OpportunitySummary, + legacy_lookup_value: str, + funding_category: FundingCategory | None = None, + is_delete: bool = False, + is_already_processed: bool = False, + source_values: dict | None = None, +): + if create_existing and is_delete is False and funding_category is None: + raise Exception( + "If create_existing is True, is_delete is False - must provide the properly converted / mapped value for funding_category" + ) + + if source_values is None: + source_values = {} + + if opportunity_summary.is_forecast: + source_values["forecast"] = None + if opportunity_summary.revision_number is None: + factory_cls = f.StagingTfundactcatForecastFactory + else: + factory_cls = f.StagingTfundactcatForecastHistFactory + source_values["revision_number"] = opportunity_summary.revision_number + else: + source_values["synopsis"] = None + if opportunity_summary.revision_number is None: + factory_cls = f.StagingTfundactcatSynopsisFactory + else: + factory_cls = f.StagingTfundactcatSynopsisHistFactory + source_values["revision_number"] = opportunity_summary.revision_number + + source_funding_category = factory_cls.create( + **source_values, + opportunity_id=opportunity_summary.opportunity_id, + is_deleted=is_delete, + already_transformed=is_already_processed, + fac_id=legacy_lookup_value, + ) + + if create_existing: + if opportunity_summary.is_forecast: + legacy_id = source_funding_category.fac_frcst_id + else: + legacy_id = source_funding_category.fac_syn_id + + f.LinkOpportunitySummaryFundingCategoryFactory.create( + opportunity_summary=opportunity_summary, + legacy_funding_category_id=legacy_id, + funding_category=funding_category, + ) + + return source_funding_category + + +def validate_matching_fields( + source, destination, fields: list[Tuple[str, str]], expect_all_to_match: bool +): + mismatched_fields = [] + + for source_field, destination_field in fields: + source_value = getattr(source, source_field) + destination_value = getattr(destination, destination_field) + + # Some fields that we copy in are datetime typed (although behave as dates and we convert as such) + # If so, we need to make sure they're both dates for the purposes of comparison + if isinstance(source_value, datetime) and isinstance(destination_value, date): + source_value = source_value.date() + + if source_value != destination_value: + mismatched_fields.append( + f"{source_field}/{destination_field}: '{source_value}' != '{destination_value}'" + ) + + # If a values weren't copied in an update + # then we should expect most things to not match, + # but randomness in the factories might cause some overlap + if expect_all_to_match: + assert ( + len(mismatched_fields) == 0 + ), f"Expected all fields to match between {source.__class__} and {destination.__class__}, but found mismatched fields: {','.join(mismatched_fields)}" + else: + assert ( + len(mismatched_fields) != 0 + ), f"Did not expect all fields to match between {source.__class__} and {destination.__class__}, but they did which means an unexpected update occurred" + + +def validate_opportunity( + db_session, + source_opportunity: staging.opportunity.Topportunity, + expect_in_db: bool = True, + expect_values_to_match: bool = True, +): + opportunity = ( + db_session.query(Opportunity) + .filter(Opportunity.opportunity_id == source_opportunity.opportunity_id) + .one_or_none() + ) + + if not expect_in_db: + assert opportunity is None + return + + assert opportunity is not None + # For fields that we expect to match 1:1, verify that they match as expected + validate_matching_fields( + source_opportunity, + opportunity, + [ + ("oppnumber", "opportunity_number"), + ("opptitle", "opportunity_title"), + ("owningagency", "agency"), + ("category_explanation", "category_explanation"), + ("revision_number", "revision_number"), + ("modified_comments", "modified_comments"), + ("publisheruid", "publisher_user_id"), + ("publisher_profile_id", "publisher_profile_id"), + ], + expect_values_to_match, + ) + + # Validation of fields that aren't copied exactly + if expect_values_to_match: + # Deliberately validating is_draft with a different calculation + if source_opportunity.is_draft == "N": + assert opportunity.is_draft is False + else: + assert opportunity.is_draft is True + + +def validate_assistance_listing( + db_session, + source_cfda: staging.opportunity.TopportunityCfda, + expect_in_db: bool = True, + expect_values_to_match: bool = True, +): + assistance_listing = ( + db_session.query(OpportunityAssistanceListing) + .filter( + OpportunityAssistanceListing.opportunity_assistance_listing_id + == source_cfda.opp_cfda_id + ) + .one_or_none() + ) + + if not expect_in_db: + assert assistance_listing is None + return + + assert assistance_listing is not None + # For fields that we expect to match 1:1, verify that they match as expected + validate_matching_fields( + source_cfda, + assistance_listing, + [ + ("cfdanumber", "assistance_listing_number"), + ("programtitle", "program_title"), + ], + expect_values_to_match, + ) + + +def get_summary_from_source(db_session, source_summary): + revision_number = None + is_forecast = source_summary.is_forecast + if isinstance(source_summary, (staging.synopsis.TsynopsisHist, staging.forecast.TforecastHist)): + revision_number = source_summary.revision_number + + opportunity_summary = ( + db_session.query(OpportunitySummary) + .filter( + OpportunitySummary.opportunity_id == source_summary.opportunity_id, + OpportunitySummary.revision_number == revision_number, + OpportunitySummary.is_forecast == is_forecast, + # Populate existing to force it to fetch updates from the DB + ) + .execution_options(populate_existing=True) + .one_or_none() + ) + + return opportunity_summary + + +def validate_opportunity_summary( + db_session, source_summary, expect_in_db: bool = True, expect_values_to_match: bool = True +): + opportunity_summary = get_summary_from_source(db_session, source_summary) + + if not expect_in_db: + assert opportunity_summary is None + return + + matching_fields = [ + ("version_nbr", "version_number"), + ("posting_date", "post_date"), + ("archive_date", "archive_date"), + ("fd_link_url", "additional_info_url"), + ("fd_link_desc", "additional_info_url_description"), + ("modification_comments", "modification_comments"), + ("oth_cat_fa_desc", "funding_category_description"), + ("applicant_elig_desc", "applicant_eligibility_description"), + ("ac_name", "agency_name"), + ("ac_email_addr", "agency_email_address"), + ("ac_email_desc", "agency_email_address_description"), + ("publisher_profile_id", "publisher_profile_id"), + ("publisheruid", "publisher_user_id"), + ("last_upd_id", "updated_by"), + ("creator_id", "created_by"), + ] + + if isinstance(source_summary, (staging.synopsis.Tsynopsis, staging.synopsis.TsynopsisHist)): + matching_fields.extend( + [ + ("syn_desc", "summary_description"), + ("a_sa_code", "agency_code"), + ("ac_phone_number", "agency_phone_number"), + ("agency_contact_desc", "agency_contact_description"), + ("response_date", "close_date"), + ("response_date_desc", "close_date_description"), + ("unarchive_date", "unarchive_date"), + ] + ) + else: # Forecast+ForecastHist + matching_fields.extend( + [ + ("forecast_desc", "summary_description"), + ("agency_code", "agency_code"), + ("ac_phone", "agency_phone_number"), + ("est_synopsis_posting_date", "forecasted_post_date"), + ("est_appl_response_date", "forecasted_close_date"), + ("est_appl_response_date_desc", "forecasted_close_date_description"), + ("est_award_date", "forecasted_award_date"), + ("est_project_start_date", "forecasted_project_start_date"), + ("fiscal_year", "fiscal_year"), + ] + ) + + # History only fields + is_deleted = False + if isinstance(source_summary, (staging.synopsis.TsynopsisHist, staging.forecast.TforecastHist)): + matching_fields.extend([("revision_number", "revision_number")]) + + is_deleted = source_summary.action_type == "D" + + assert opportunity_summary is not None + validate_matching_fields( + source_summary, opportunity_summary, matching_fields, expect_values_to_match + ) + + assert opportunity_summary.is_deleted == is_deleted + + +def validate_summary_and_nested( + db_session, + source_summary, + expected_applicant_types: list[ApplicantType], + expected_funding_categories: list[FundingCategory], + expected_funding_instruments: list[FundingInstrument], + expect_in_db: bool = True, + expect_values_to_match: bool = True, +): + validate_opportunity_summary(db_session, source_summary, expect_in_db, expect_values_to_match) + + if not expect_in_db: + return + + created_record = get_summary_from_source(db_session, source_summary) + + assert set(created_record.applicant_types) == set(expected_applicant_types) + assert set(created_record.funding_categories) == set(expected_funding_categories) + assert set(created_record.funding_instruments) == set(expected_funding_instruments) + + +def validate_applicant_type( + db_session, + source_applicant_type, + expect_in_db: bool = True, + expected_applicant_type: ApplicantType | None = None, + was_processed: bool = True, + expect_values_to_match: bool = True, +): + assert (source_applicant_type.transformed_at is not None) == was_processed + + # In order to properly find the link table value, need to first determine + # the opportunity summary in a subquery + opportunity_summary_id = ( + db_session.query(OpportunitySummary.opportunity_summary_id) + .filter( + OpportunitySummary.revision_number == source_applicant_type.revision_number, + OpportunitySummary.is_forecast == source_applicant_type.is_forecast, + OpportunitySummary.opportunity_id == source_applicant_type.opportunity_id, + ) + .scalar() + ) + + link_applicant_type = ( + db_session.query(LinkOpportunitySummaryApplicantType) + .filter( + LinkOpportunitySummaryApplicantType.legacy_applicant_type_id + == source_applicant_type.legacy_applicant_type_id, + LinkOpportunitySummaryApplicantType.opportunity_summary_id == opportunity_summary_id, + ) + .one_or_none() + ) + + if not expect_in_db: + assert link_applicant_type is None + return + + assert link_applicant_type is not None + assert link_applicant_type.applicant_type == expected_applicant_type + + validate_matching_fields( + source_applicant_type, + link_applicant_type, + [("creator_id", "created_by"), ("last_upd_id", "updated_by")], + expect_values_to_match, + ) + + +def validate_funding_instrument( + db_session, + source_funding_instrument, + expect_in_db: bool = True, + expected_funding_instrument: FundingInstrument | None = None, + was_processed: bool = True, + expect_values_to_match: bool = True, +): + assert (source_funding_instrument.transformed_at is not None) == was_processed + + # In order to properly find the link table value, need to first determine + # the opportunity summary in a subquery + opportunity_summary_id = ( + db_session.query(OpportunitySummary.opportunity_summary_id) + .filter( + OpportunitySummary.revision_number == source_funding_instrument.revision_number, + OpportunitySummary.is_forecast == source_funding_instrument.is_forecast, + OpportunitySummary.opportunity_id == source_funding_instrument.opportunity_id, + ) + .scalar() + ) + + link_funding_instrument = ( + db_session.query(LinkOpportunitySummaryFundingInstrument) + .filter( + LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id + == source_funding_instrument.legacy_funding_instrument_id, + LinkOpportunitySummaryFundingInstrument.opportunity_summary_id + == opportunity_summary_id, + ) + .one_or_none() + ) + + if not expect_in_db: + assert link_funding_instrument is None + return + + assert link_funding_instrument is not None + assert link_funding_instrument.funding_instrument == expected_funding_instrument + + validate_matching_fields( + source_funding_instrument, + link_funding_instrument, + [("creator_id", "created_by"), ("last_upd_id", "updated_by")], + expect_values_to_match, + ) + + +def validate_funding_category( + db_session, + source_funding_category, + expect_in_db: bool = True, + expected_funding_category: FundingCategory | None = None, + was_processed: bool = True, + expect_values_to_match: bool = True, +): + assert (source_funding_category.transformed_at is not None) == was_processed + + # In order to properly find the link table value, need to first determine + # the opportunity summary in a subquery + opportunity_summary_id = ( + db_session.query(OpportunitySummary.opportunity_summary_id) + .filter( + OpportunitySummary.revision_number == source_funding_category.revision_number, + OpportunitySummary.is_forecast == source_funding_category.is_forecast, + OpportunitySummary.opportunity_id == source_funding_category.opportunity_id, + ) + .scalar() + ) + + link_funding_category = ( + db_session.query(LinkOpportunitySummaryFundingCategory) + .filter( + LinkOpportunitySummaryFundingCategory.legacy_funding_category_id + == source_funding_category.legacy_funding_category_id, + LinkOpportunitySummaryFundingCategory.opportunity_summary_id == opportunity_summary_id, + ) + .one_or_none() + ) + + if not expect_in_db: + assert link_funding_category is None + return + + assert link_funding_category is not None + assert link_funding_category.funding_category == expected_funding_category + + validate_matching_fields( + source_funding_category, + link_funding_category, + [("creator_id", "created_by"), ("last_upd_id", "updated_by")], + expect_values_to_match, + ) diff --git a/api/tests/src/data_migration/transformation/subtask/__init__.py b/api/tests/src/data_migration/transformation/subtask/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/api/tests/src/data_migration/transformation/subtask/test_transform_applicant_type.py b/api/tests/src/data_migration/transformation/subtask/test_transform_applicant_type.py new file mode 100644 index 000000000..1f4949ce8 --- /dev/null +++ b/api/tests/src/data_migration/transformation/subtask/test_transform_applicant_type.py @@ -0,0 +1,395 @@ +import pytest + +import src.data_migration.transformation.transform_constants as transform_constants +import tests.src.db.models.factories as f +from src.constants.lookup_constants import ApplicantType +from src.data_migration.transformation.subtask.transform_applicant_type import ( + TransformApplicantType, +) +from tests.src.data_migration.transformation.conftest import ( + BaseTransformTestClass, + setup_applicant_type, + validate_applicant_type, +) + + +class TestTransformApplicantType(BaseTransformTestClass): + @pytest.fixture() + def transform_applicant_type(self, transform_oracle_data_task): + return TransformApplicantType(transform_oracle_data_task) + + def test_process_applicant_types(self, db_session, transform_applicant_type): + opportunity_summary_forecast = f.OpportunitySummaryFactory.create( + is_forecast=True, revision_number=None, no_link_values=True + ) + forecast_insert1 = setup_applicant_type( + create_existing=False, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="00", + ) + forecast_update1 = setup_applicant_type( + create_existing=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="01", + applicant_type=ApplicantType.COUNTY_GOVERNMENTS, + ) + forecast_update2 = setup_applicant_type( + create_existing=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="02", + applicant_type=ApplicantType.CITY_OR_TOWNSHIP_GOVERNMENTS, + ) + forecast_delete1 = setup_applicant_type( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="04", + applicant_type=ApplicantType.SPECIAL_DISTRICT_GOVERNMENTS, + ) + forecast_delete2 = setup_applicant_type( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="05", + applicant_type=ApplicantType.INDEPENDENT_SCHOOL_DISTRICTS, + ) + forecast_update_already_processed = setup_applicant_type( + create_existing=True, + is_already_processed=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="06", + applicant_type=ApplicantType.PUBLIC_AND_STATE_INSTITUTIONS_OF_HIGHER_EDUCATION, + ) + + opportunity_summary_forecast_hist = f.OpportunitySummaryFactory.create( + is_forecast=True, revision_number=3, no_link_values=True + ) + forecast_hist_insert1 = setup_applicant_type( + create_existing=False, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="07", + ) + forecast_hist_update1 = setup_applicant_type( + create_existing=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="08", + applicant_type=ApplicantType.PUBLIC_AND_INDIAN_HOUSING_AUTHORITIES, + ) + forecast_hist_update2 = setup_applicant_type( + create_existing=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="11", + applicant_type=ApplicantType.OTHER_NATIVE_AMERICAN_TRIBAL_ORGANIZATIONS, + ) + forecast_hist_delete1 = setup_applicant_type( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="12", + applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITH_501C3, + ) + forecast_hist_delete_already_processed = setup_applicant_type( + create_existing=False, + is_delete=True, + is_already_processed=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="13", + ) + forecast_hist_duplicate_insert = setup_applicant_type( + create_existing=False, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="08", + ) + + opportunity_summary_syn = f.OpportunitySummaryFactory.create( + is_forecast=False, revision_number=None, no_link_values=True + ) + syn_insert1 = setup_applicant_type( + create_existing=False, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="20", + ) + syn_insert2 = setup_applicant_type( + create_existing=False, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="21", + ) + syn_update1 = setup_applicant_type( + create_existing=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="22", + applicant_type=ApplicantType.FOR_PROFIT_ORGANIZATIONS_OTHER_THAN_SMALL_BUSINESSES, + ) + syn_update2 = setup_applicant_type( + create_existing=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="23", + applicant_type=ApplicantType.SMALL_BUSINESSES, + ) + syn_delete1 = setup_applicant_type( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="25", + applicant_type=ApplicantType.OTHER, + ) + syn_delete2 = setup_applicant_type( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="99", + applicant_type=ApplicantType.UNRESTRICTED, + ) + syn_delete_but_current_missing = setup_applicant_type( + create_existing=False, + is_delete=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="07", + ) + syn_update_already_processed = setup_applicant_type( + create_existing=True, + is_already_processed=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="08", + applicant_type=ApplicantType.PUBLIC_AND_INDIAN_HOUSING_AUTHORITIES, + ) + + opportunity_summary_syn_hist = f.OpportunitySummaryFactory.create( + is_forecast=False, revision_number=21, no_link_values=True + ) + syn_hist_insert1 = setup_applicant_type( + create_existing=False, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="11", + ) + syn_hist_update1 = setup_applicant_type( + create_existing=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="12", + applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITH_501C3, + ) + syn_hist_update2 = setup_applicant_type( + create_existing=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="13", + applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITHOUT_501C3, + ) + syn_hist_delete1 = setup_applicant_type( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="25", + applicant_type=ApplicantType.OTHER, + ) + syn_hist_delete2 = setup_applicant_type( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="99", + applicant_type=ApplicantType.UNRESTRICTED, + ) + syn_hist_insert_invalid_type = setup_applicant_type( + create_existing=False, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="X", + applicant_type=ApplicantType.STATE_GOVERNMENTS, + ) + + transform_applicant_type.run_subtask() + + validate_applicant_type( + db_session, forecast_insert1, expected_applicant_type=ApplicantType.STATE_GOVERNMENTS + ) + validate_applicant_type( + db_session, + forecast_hist_insert1, + expected_applicant_type=ApplicantType.FEDERALLY_RECOGNIZED_NATIVE_AMERICAN_TRIBAL_GOVERNMENTS, + ) + validate_applicant_type( + db_session, + syn_insert1, + expected_applicant_type=ApplicantType.PRIVATE_INSTITUTIONS_OF_HIGHER_EDUCATION, + ) + validate_applicant_type( + db_session, syn_insert2, expected_applicant_type=ApplicantType.INDIVIDUALS + ) + validate_applicant_type( + db_session, + syn_hist_insert1, + expected_applicant_type=ApplicantType.OTHER_NATIVE_AMERICAN_TRIBAL_ORGANIZATIONS, + ) + + validate_applicant_type( + db_session, forecast_update1, expected_applicant_type=ApplicantType.COUNTY_GOVERNMENTS + ) + validate_applicant_type( + db_session, + forecast_update2, + expected_applicant_type=ApplicantType.CITY_OR_TOWNSHIP_GOVERNMENTS, + ) + validate_applicant_type( + db_session, + forecast_hist_update1, + expected_applicant_type=ApplicantType.PUBLIC_AND_INDIAN_HOUSING_AUTHORITIES, + ) + validate_applicant_type( + db_session, + forecast_hist_update2, + expected_applicant_type=ApplicantType.OTHER_NATIVE_AMERICAN_TRIBAL_ORGANIZATIONS, + ) + validate_applicant_type( + db_session, + syn_update1, + expected_applicant_type=ApplicantType.FOR_PROFIT_ORGANIZATIONS_OTHER_THAN_SMALL_BUSINESSES, + ) + validate_applicant_type( + db_session, syn_update2, expected_applicant_type=ApplicantType.SMALL_BUSINESSES + ) + validate_applicant_type( + db_session, + syn_hist_update1, + expected_applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITH_501C3, + ) + validate_applicant_type( + db_session, + syn_hist_update2, + expected_applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITHOUT_501C3, + ) + + validate_applicant_type(db_session, forecast_delete1, expect_in_db=False) + validate_applicant_type(db_session, forecast_delete2, expect_in_db=False) + validate_applicant_type(db_session, forecast_hist_delete1, expect_in_db=False) + validate_applicant_type(db_session, syn_delete1, expect_in_db=False) + validate_applicant_type(db_session, syn_delete2, expect_in_db=False) + validate_applicant_type(db_session, syn_hist_delete1, expect_in_db=False) + validate_applicant_type(db_session, syn_hist_delete2, expect_in_db=False) + + validate_applicant_type( + db_session, + forecast_update_already_processed, + expected_applicant_type=ApplicantType.PUBLIC_AND_STATE_INSTITUTIONS_OF_HIGHER_EDUCATION, + expect_values_to_match=False, + ) + validate_applicant_type( + db_session, forecast_hist_delete_already_processed, expect_in_db=False + ) + validate_applicant_type( + db_session, + syn_update_already_processed, + expected_applicant_type=ApplicantType.PUBLIC_AND_INDIAN_HOUSING_AUTHORITIES, + expect_values_to_match=False, + ) + + validate_applicant_type( + db_session, syn_delete_but_current_missing, expect_in_db=False, was_processed=True + ) + validate_applicant_type( + db_session, syn_hist_insert_invalid_type, expect_in_db=False, was_processed=False + ) + + validate_applicant_type( + db_session, forecast_hist_duplicate_insert, expect_in_db=False, was_processed=True + ) + + metrics = transform_applicant_type.metrics + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 23 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 7 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 5 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 8 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 1 + assert metrics[transform_constants.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED] == 1 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + # Rerunning will only attempt to re-process the errors, so total+errors goes up by 1 + db_session.commit() # commit to end any existing transactions as run_subtask starts a new one + transform_applicant_type.run_subtask() + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 24 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 7 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 5 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 8 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 2 + assert metrics[transform_constants.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED] == 1 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + @pytest.mark.parametrize( + "is_forecast,revision_number", [(True, None), (False, None), (True, 5), (False, 10)] + ) + def test_process_applicant_types_but_current_missing( + self, db_session, transform_applicant_type, is_forecast, revision_number + ): + opportunity_summary = f.OpportunitySummaryFactory.create( + is_forecast=is_forecast, revision_number=revision_number, no_link_values=True + ) + delete_but_current_missing = setup_applicant_type( + create_existing=False, + opportunity_summary=opportunity_summary, + legacy_lookup_value="00", + is_delete=True, + ) + + transform_applicant_type.process_link_applicant_type( + delete_but_current_missing, None, opportunity_summary + ) + + validate_applicant_type(db_session, delete_but_current_missing, expect_in_db=False) + assert delete_but_current_missing.transformed_at is not None + assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" + + @pytest.mark.parametrize( + "is_forecast,revision_number,legacy_lookup_value", + [(True, None, "90"), (False, None, "xx"), (True, 5, "50"), (False, 10, "1")], + ) + def test_process_applicant_types_but_invalid_lookup_value( + self, + db_session, + transform_applicant_type, + is_forecast, + revision_number, + legacy_lookup_value, + ): + opportunity_summary = f.OpportunitySummaryFactory.create( + is_forecast=is_forecast, revision_number=revision_number, no_link_values=True + ) + insert_but_invalid_value = setup_applicant_type( + create_existing=False, + opportunity_summary=opportunity_summary, + legacy_lookup_value=legacy_lookup_value, + ) + + with pytest.raises(ValueError, match="Unrecognized applicant type"): + transform_applicant_type.process_link_applicant_type( + insert_but_invalid_value, None, opportunity_summary + ) + + @pytest.mark.parametrize( + "factory_cls", + [f.StagingTapplicanttypesForecastFactory, f.StagingTapplicanttypesSynopsisFactory], + ) + def test_process_applicant_type_but_no_opportunity_summary_non_hist( + self, + db_session, + transform_applicant_type, + factory_cls, + ): + source_record = factory_cls.create(orphaned_record=True) + + with pytest.raises( + ValueError, + match="Applicant type record cannot be processed as the opportunity summary for it does not exist", + ): + transform_applicant_type.process_link_applicant_type(source_record, None, None) + + @pytest.mark.parametrize( + "factory_cls", + [f.StagingTapplicanttypesForecastHistFactory, f.StagingTapplicanttypesSynopsisHistFactory], + ) + def test_process_applicant_type_but_no_opportunity_summary_hist( + self, + db_session, + transform_applicant_type, + factory_cls, + ): + source_record = factory_cls.create(orphaned_record=True, revision_number=12) + transform_applicant_type.process_link_applicant_type(source_record, None, None) + assert source_record.transformed_at is not None + assert source_record.transformation_notes == "orphaned_historical_record" diff --git a/api/tests/src/data_migration/transformation/subtask/test_transform_assistance_listing.py b/api/tests/src/data_migration/transformation/subtask/test_transform_assistance_listing.py new file mode 100644 index 000000000..91c6572e6 --- /dev/null +++ b/api/tests/src/data_migration/transformation/subtask/test_transform_assistance_listing.py @@ -0,0 +1,157 @@ +import pytest + +import src.data_migration.transformation.transform_constants as transform_constants +import tests.src.db.models.factories as f +from src.data_migration.transformation.subtask.transform_assistance_listing import ( + TransformAssistanceListing, +) +from src.db.models.opportunity_models import Opportunity, OpportunityAssistanceListing +from tests.src.data_migration.transformation.conftest import ( + BaseTransformTestClass, + setup_cfda, + validate_assistance_listing, +) + + +class TestTransformAssistanceListing(BaseTransformTestClass): + @pytest.fixture() + def transform_assistance_listing(self, transform_oracle_data_task): + return TransformAssistanceListing(transform_oracle_data_task) + + def test_process_opportunity_assistance_listings( + self, db_session, transform_assistance_listing + ): + opportunity1 = f.OpportunityFactory.create(opportunity_assistance_listings=[]) + cfda_insert1 = setup_cfda(create_existing=False, opportunity=opportunity1) + cfda_insert2 = setup_cfda(create_existing=False, opportunity=opportunity1) + cfda_update1 = setup_cfda(create_existing=True, opportunity=opportunity1) + cfda_delete1 = setup_cfda(create_existing=True, is_delete=True, opportunity=opportunity1) + cfda_update_already_processed1 = setup_cfda( + create_existing=True, is_already_processed=True, opportunity=opportunity1 + ) + + opportunity2 = f.OpportunityFactory.create(opportunity_assistance_listings=[]) + cfda_insert3 = setup_cfda(create_existing=False, opportunity=opportunity2) + cfda_update_already_processed2 = setup_cfda( + create_existing=True, is_already_processed=True, opportunity=opportunity2 + ) + cfda_delete_already_processed1 = setup_cfda( + create_existing=False, + is_already_processed=True, + is_delete=True, + opportunity=opportunity2, + ) + cfda_delete2 = setup_cfda(create_existing=True, is_delete=True, opportunity=opportunity2) + + opportunity3 = f.OpportunityFactory.create(opportunity_assistance_listings=[]) + cfda_update2 = setup_cfda(create_existing=True, opportunity=opportunity3) + cfda_delete_but_current_missing = setup_cfda( + create_existing=False, is_delete=True, opportunity=opportunity3 + ) + + cfda_insert_without_opportunity = setup_cfda( + create_existing=False, source_values={"opportunity_id": 12345678}, opportunity=None + ) + cfda_delete_without_opportunity = setup_cfda( + create_existing=False, source_values={"opportunity_id": 34567890}, opportunity=None + ) + + transform_assistance_listing.run_subtask() + + validate_assistance_listing(db_session, cfda_insert1) + validate_assistance_listing(db_session, cfda_insert2) + validate_assistance_listing(db_session, cfda_insert3) + validate_assistance_listing(db_session, cfda_update1) + validate_assistance_listing(db_session, cfda_update2) + validate_assistance_listing(db_session, cfda_delete1, expect_in_db=False) + validate_assistance_listing(db_session, cfda_delete2, expect_in_db=False) + + # Records that won't have been fetched + validate_assistance_listing( + db_session, + cfda_update_already_processed1, + expect_in_db=True, + expect_values_to_match=False, + ) + validate_assistance_listing( + db_session, + cfda_update_already_processed2, + expect_in_db=True, + expect_values_to_match=False, + ) + validate_assistance_listing(db_session, cfda_delete_already_processed1, expect_in_db=False) + + validate_assistance_listing(db_session, cfda_delete_but_current_missing, expect_in_db=False) + + validate_assistance_listing(db_session, cfda_insert_without_opportunity, expect_in_db=False) + validate_assistance_listing(db_session, cfda_delete_without_opportunity, expect_in_db=False) + + metrics = transform_assistance_listing.metrics + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 10 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 3 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_ORPHANED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + # Rerunning finds nothing - no metrics update + db_session.commit() # commit to end any existing transactions as run_subtask starts a new one + transform_assistance_listing.run_subtask() + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 10 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 3 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_ORPHANED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + def test_process_assistance_listing_orphaned_record( + self, db_session, transform_assistance_listing + ): + cfda_insert_without_opportunity = setup_cfda( + create_existing=False, source_values={"opportunity_id": 987654321}, opportunity=None + ) + + # Verify it gets marked as transformed + assert cfda_insert_without_opportunity.transformed_at is None + transform_assistance_listing.process_assistance_listing( + cfda_insert_without_opportunity, None, None + ) + assert cfda_insert_without_opportunity.transformed_at is not None + assert cfda_insert_without_opportunity.transformation_notes == "orphaned_cfda" + assert ( + transform_assistance_listing.metrics[transform_constants.Metrics.TOTAL_RECORDS_ORPHANED] + == 1 + ) + + # Verify nothing actually gets created + opportunity = ( + db_session.query(Opportunity) + .filter(Opportunity.opportunity_id == cfda_insert_without_opportunity.opportunity_id) + .one_or_none() + ) + assert opportunity is None + assistance_listing = ( + db_session.query(OpportunityAssistanceListing) + .filter( + OpportunityAssistanceListing.opportunity_assistance_listing_id + == cfda_insert_without_opportunity.opp_cfda_id + ) + .one_or_none() + ) + assert assistance_listing is None + + def test_process_assistance_listing_delete_but_current_missing( + self, db_session, transform_assistance_listing + ): + opportunity = f.OpportunityFactory.create(opportunity_assistance_listings=[]) + delete_but_current_missing = setup_cfda( + create_existing=False, is_delete=True, opportunity=opportunity + ) + + transform_assistance_listing.process_assistance_listing( + delete_but_current_missing, None, opportunity + ) + + validate_assistance_listing(db_session, delete_but_current_missing, expect_in_db=False) + assert delete_but_current_missing.transformed_at is not None + assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" diff --git a/api/tests/src/data_migration/transformation/subtask/test_transform_funding_category.py b/api/tests/src/data_migration/transformation/subtask/test_transform_funding_category.py new file mode 100644 index 000000000..c0aa04e71 --- /dev/null +++ b/api/tests/src/data_migration/transformation/subtask/test_transform_funding_category.py @@ -0,0 +1,374 @@ +import pytest + +import src.data_migration.transformation.transform_constants as transform_constants +import tests.src.db.models.factories as f +from src.constants.lookup_constants import FundingCategory +from src.data_migration.transformation.subtask.transform_funding_category import ( + TransformFundingCategory, +) +from tests.src.data_migration.transformation.conftest import ( + BaseTransformTestClass, + setup_funding_category, + validate_funding_category, +) + + +class TestTransformFundingCategory(BaseTransformTestClass): + @pytest.fixture() + def transform_funding_category(self, transform_oracle_data_task): + return TransformFundingCategory(transform_oracle_data_task) + + def test_process_funding_categories(self, db_session, transform_funding_category): + opportunity_summary_forecast = f.OpportunitySummaryFactory.create( + is_forecast=True, revision_number=None, no_link_values=True + ) + forecast_insert1 = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="RA", + ) + forecast_insert2 = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="AG", + ) + forecast_update1 = setup_funding_category( + create_existing=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="AR", + funding_category=FundingCategory.ARTS, + ) + forecast_delete1 = setup_funding_category( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="BC", + funding_category=FundingCategory.BUSINESS_AND_COMMERCE, + ) + forecast_delete2 = setup_funding_category( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="CD", + funding_category=FundingCategory.COMMUNITY_DEVELOPMENT, + ) + forecast_update_already_processed = setup_funding_category( + create_existing=True, + is_already_processed=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="CP", + funding_category=FundingCategory.CONSUMER_PROTECTION, + ) + + opportunity_summary_forecast_hist = f.OpportunitySummaryFactory.create( + is_forecast=True, revision_number=3, no_link_values=True + ) + forecast_hist_insert1 = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="DPR", + ) + forecast_hist_insert2 = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="ED", + ) + forecast_hist_update1 = setup_funding_category( + create_existing=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="ELT", + funding_category=FundingCategory.EMPLOYMENT_LABOR_AND_TRAINING, + ) + forecast_hist_delete1 = setup_funding_category( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="EN", + funding_category=FundingCategory.ENERGY, + ) + forecast_hist_delete_already_processed = setup_funding_category( + create_existing=False, + is_delete=True, + is_already_processed=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="ENV", + ) + + opportunity_summary_syn = f.OpportunitySummaryFactory.create( + is_forecast=False, revision_number=None, no_link_values=True + ) + syn_insert1 = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="FN", + ) + syn_insert2 = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="HL", + ) + syn_update1 = setup_funding_category( + create_existing=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="HO", + funding_category=FundingCategory.HOUSING, + ) + syn_delete1 = setup_funding_category( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="HU", + funding_category=FundingCategory.HUMANITIES, + ) + syn_delete2 = setup_funding_category( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="IIJ", + funding_category=FundingCategory.INFRASTRUCTURE_INVESTMENT_AND_JOBS_ACT, + ) + syn_delete_but_current_missing = setup_funding_category( + create_existing=False, + is_delete=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="IS", + ) + syn_update_already_processed = setup_funding_category( + create_existing=True, + is_already_processed=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="ISS", + funding_category=FundingCategory.INCOME_SECURITY_AND_SOCIAL_SERVICES, + ) + + opportunity_summary_syn_hist = f.OpportunitySummaryFactory.create( + is_forecast=False, revision_number=21, no_link_values=True + ) + syn_hist_insert1 = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="LJL", + ) + syn_hist_insert2 = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="NR", + ) + syn_hist_insert3 = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="OZ", + ) + syn_hist_update1 = setup_funding_category( + create_existing=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="RD", + funding_category=FundingCategory.REGIONAL_DEVELOPMENT, + ) + + syn_hist_delete1 = setup_funding_category( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="ST", + funding_category=FundingCategory.SCIENCE_TECHNOLOGY_AND_OTHER_RESEARCH_AND_DEVELOPMENT, + ) + syn_hist_delete2 = setup_funding_category( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="T", + funding_category=FundingCategory.TRANSPORTATION, + ) + syn_hist_insert_invalid_type = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="XYZ", + funding_category=FundingCategory.HEALTH, + ) + + transform_funding_category.run_subtask() + + validate_funding_category( + db_session, forecast_insert1, expected_funding_category=FundingCategory.RECOVERY_ACT + ) + validate_funding_category( + db_session, forecast_insert2, expected_funding_category=FundingCategory.AGRICULTURE + ) + validate_funding_category( + db_session, + forecast_hist_insert1, + expected_funding_category=FundingCategory.DISASTER_PREVENTION_AND_RELIEF, + ) + validate_funding_category( + db_session, forecast_hist_insert2, expected_funding_category=FundingCategory.EDUCATION + ) + validate_funding_category( + db_session, syn_insert1, expected_funding_category=FundingCategory.FOOD_AND_NUTRITION + ) + validate_funding_category( + db_session, syn_insert2, expected_funding_category=FundingCategory.HEALTH + ) + validate_funding_category( + db_session, + syn_hist_insert1, + expected_funding_category=FundingCategory.LAW_JUSTICE_AND_LEGAL_SERVICES, + ) + validate_funding_category( + db_session, + syn_hist_insert2, + expected_funding_category=FundingCategory.NATURAL_RESOURCES, + ) + validate_funding_category( + db_session, + syn_hist_insert3, + expected_funding_category=FundingCategory.OPPORTUNITY_ZONE_BENEFITS, + ) + + validate_funding_category( + db_session, forecast_update1, expected_funding_category=FundingCategory.ARTS + ) + validate_funding_category( + db_session, + forecast_hist_update1, + expected_funding_category=FundingCategory.EMPLOYMENT_LABOR_AND_TRAINING, + ) + validate_funding_category( + db_session, syn_update1, expected_funding_category=FundingCategory.HOUSING + ) + validate_funding_category( + db_session, + syn_hist_update1, + expected_funding_category=FundingCategory.REGIONAL_DEVELOPMENT, + ) + + validate_funding_category(db_session, forecast_delete1, expect_in_db=False) + validate_funding_category(db_session, forecast_delete2, expect_in_db=False) + validate_funding_category(db_session, forecast_hist_delete1, expect_in_db=False) + validate_funding_category(db_session, syn_delete1, expect_in_db=False) + validate_funding_category(db_session, syn_delete2, expect_in_db=False) + validate_funding_category(db_session, syn_hist_delete1, expect_in_db=False) + validate_funding_category(db_session, syn_hist_delete2, expect_in_db=False) + + validate_funding_category( + db_session, + forecast_update_already_processed, + expected_funding_category=FundingCategory.CONSUMER_PROTECTION, + expect_values_to_match=False, + ) + validate_funding_category( + db_session, forecast_hist_delete_already_processed, expect_in_db=False + ) + validate_funding_category( + db_session, + syn_update_already_processed, + expected_funding_category=FundingCategory.INCOME_SECURITY_AND_SOCIAL_SERVICES, + expect_values_to_match=False, + ) + + validate_funding_category( + db_session, syn_delete_but_current_missing, expect_in_db=False, was_processed=True + ) + validate_funding_category( + db_session, syn_hist_insert_invalid_type, expect_in_db=False, was_processed=False + ) + + metrics = transform_funding_category.metrics + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 22 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 7 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 9 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 4 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 1 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + # Rerunning will only attempt to re-process the errors, so total+errors goes up by 1 + db_session.commit() # commit to end any existing transactions as run_subtask starts a new one + transform_funding_category.run_subtask() + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 23 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 7 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 9 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 4 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 2 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + @pytest.mark.parametrize( + "is_forecast,revision_number", [(True, None), (False, None), (True, 1), (False, 70)] + ) + def test_process_funding_category_but_current_missing( + self, db_session, transform_funding_category, is_forecast, revision_number + ): + opportunity_summary = f.OpportunitySummaryFactory.create( + is_forecast=is_forecast, revision_number=revision_number, no_link_values=True + ) + delete_but_current_missing = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary, + legacy_lookup_value="00", + is_delete=True, + ) + + transform_funding_category.process_link_funding_category( + delete_but_current_missing, None, opportunity_summary + ) + + validate_funding_category(db_session, delete_but_current_missing, expect_in_db=False) + assert delete_but_current_missing.transformed_at is not None + assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" + + @pytest.mark.parametrize( + "is_forecast,revision_number,legacy_lookup_value", + [(True, None, "ab"), (False, None, "cd"), (True, 5, "ef"), (False, 10, "Ag")], + ) + def test_process_funding_category_but_invalid_lookup_value( + self, + db_session, + transform_funding_category, + is_forecast, + revision_number, + legacy_lookup_value, + ): + opportunity_summary = f.OpportunitySummaryFactory.create( + is_forecast=is_forecast, revision_number=revision_number, no_link_values=True + ) + insert_but_invalid_value = setup_funding_category( + create_existing=False, + opportunity_summary=opportunity_summary, + legacy_lookup_value=legacy_lookup_value, + ) + + with pytest.raises(ValueError, match="Unrecognized funding category"): + transform_funding_category.process_link_funding_category( + insert_but_invalid_value, None, opportunity_summary + ) + + @pytest.mark.parametrize( + "factory_cls", [f.StagingTfundactcatForecastFactory, f.StagingTfundactcatSynopsisFactory] + ) + def test_process_funding_category_but_no_opportunity_summary_non_hist( + self, + db_session, + transform_funding_category, + factory_cls, + ): + source_record = factory_cls.create(orphaned_record=True) + + with pytest.raises( + ValueError, + match="Funding category record cannot be processed as the opportunity summary for it does not exist", + ): + transform_funding_category.process_link_funding_category(source_record, None, None) + + @pytest.mark.parametrize( + "factory_cls", + [f.StagingTfundactcatForecastHistFactory, f.StagingTfundactcatSynopsisHistFactory], + ) + def test_process_funding_category_but_no_opportunity_summary_hist( + self, + db_session, + transform_funding_category, + factory_cls, + ): + source_record = factory_cls.create(orphaned_record=True, revision_number=12) + transform_funding_category.process_link_funding_category(source_record, None, None) + assert source_record.transformed_at is not None + assert source_record.transformation_notes == "orphaned_historical_record" diff --git a/api/tests/src/data_migration/transformation/subtask/test_transform_funding_instrument.py b/api/tests/src/data_migration/transformation/subtask/test_transform_funding_instrument.py new file mode 100644 index 000000000..6a85d9b2a --- /dev/null +++ b/api/tests/src/data_migration/transformation/subtask/test_transform_funding_instrument.py @@ -0,0 +1,298 @@ +import pytest + +import src.data_migration.transformation.transform_constants as transform_constants +import tests.src.db.models.factories as f +from src.constants.lookup_constants import FundingInstrument +from src.data_migration.transformation.subtask.transform_funding_instrument import ( + TransformFundingInstrument, +) +from tests.src.data_migration.transformation.conftest import ( + BaseTransformTestClass, + setup_funding_instrument, + validate_funding_instrument, +) + + +class TestTransformFundingInstrument(BaseTransformTestClass): + @pytest.fixture() + def transform_funding_instrument(self, transform_oracle_data_task): + return TransformFundingInstrument(transform_oracle_data_task) + + def test_process_funding_instruments(self, db_session, transform_funding_instrument): + opportunity_summary_forecast = f.OpportunitySummaryFactory.create( + is_forecast=True, revision_number=None, no_link_values=True + ) + forecast_insert1 = setup_funding_instrument( + create_existing=False, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="CA", + ) + forecast_update1 = setup_funding_instrument( + create_existing=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="G", + funding_instrument=FundingInstrument.GRANT, + ) + forecast_delete1 = setup_funding_instrument( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="PC", + funding_instrument=FundingInstrument.PROCUREMENT_CONTRACT, + ) + forecast_update_already_processed = setup_funding_instrument( + create_existing=True, + is_already_processed=True, + opportunity_summary=opportunity_summary_forecast, + legacy_lookup_value="O", + funding_instrument=FundingInstrument.OTHER, + ) + + opportunity_summary_forecast_hist = f.OpportunitySummaryFactory.create( + is_forecast=True, revision_number=3, no_link_values=True + ) + forecast_hist_insert1 = setup_funding_instrument( + create_existing=False, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="G", + ) + forecast_hist_delete1 = setup_funding_instrument( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="CA", + funding_instrument=FundingInstrument.COOPERATIVE_AGREEMENT, + ) + forecast_hist_delete_already_processed = setup_funding_instrument( + create_existing=False, + is_delete=True, + is_already_processed=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="O", + ) + syn_delete_but_current_missing = setup_funding_instrument( + create_existing=False, + is_delete=True, + opportunity_summary=opportunity_summary_forecast_hist, + legacy_lookup_value="PC", + ) + + opportunity_summary_syn = f.OpportunitySummaryFactory.create( + is_forecast=False, revision_number=None, no_link_values=True + ) + syn_insert1 = setup_funding_instrument( + create_existing=False, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="O", + ) + syn_insert2 = setup_funding_instrument( + create_existing=False, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="G", + ) + syn_delete1 = setup_funding_instrument( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="CA", + funding_instrument=FundingInstrument.COOPERATIVE_AGREEMENT, + ) + syn_update_already_processed = setup_funding_instrument( + create_existing=True, + is_already_processed=True, + opportunity_summary=opportunity_summary_syn, + legacy_lookup_value="PC", + funding_instrument=FundingInstrument.PROCUREMENT_CONTRACT, + ) + + opportunity_summary_syn_hist = f.OpportunitySummaryFactory.create( + is_forecast=False, revision_number=21, no_link_values=True + ) + syn_hist_insert1 = setup_funding_instrument( + create_existing=False, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="CA", + ) + syn_hist_update1 = setup_funding_instrument( + create_existing=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="O", + funding_instrument=FundingInstrument.OTHER, + ) + syn_hist_delete1 = setup_funding_instrument( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="PC", + funding_instrument=FundingInstrument.PROCUREMENT_CONTRACT, + ) + syn_hist_delete2 = setup_funding_instrument( + create_existing=True, + is_delete=True, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="G", + funding_instrument=FundingInstrument.GRANT, + ) + syn_hist_insert_invalid_type = setup_funding_instrument( + create_existing=False, + opportunity_summary=opportunity_summary_syn_hist, + legacy_lookup_value="X", + ) + + transform_funding_instrument.run_subtask() + + validate_funding_instrument( + db_session, + forecast_insert1, + expected_funding_instrument=FundingInstrument.COOPERATIVE_AGREEMENT, + ) + validate_funding_instrument( + db_session, forecast_hist_insert1, expected_funding_instrument=FundingInstrument.GRANT + ) + validate_funding_instrument( + db_session, syn_insert1, expected_funding_instrument=FundingInstrument.OTHER + ) + validate_funding_instrument( + db_session, syn_insert2, expected_funding_instrument=FundingInstrument.GRANT + ) + validate_funding_instrument( + db_session, + syn_hist_insert1, + expected_funding_instrument=FundingInstrument.COOPERATIVE_AGREEMENT, + ) + + validate_funding_instrument( + db_session, forecast_update1, expected_funding_instrument=FundingInstrument.GRANT + ) + validate_funding_instrument( + db_session, syn_hist_update1, expected_funding_instrument=FundingInstrument.OTHER + ) + + validate_funding_instrument(db_session, forecast_delete1, expect_in_db=False) + validate_funding_instrument(db_session, forecast_hist_delete1, expect_in_db=False) + validate_funding_instrument(db_session, syn_delete1, expect_in_db=False) + validate_funding_instrument(db_session, syn_hist_delete1, expect_in_db=False) + validate_funding_instrument(db_session, syn_hist_delete2, expect_in_db=False) + + validate_funding_instrument( + db_session, + forecast_update_already_processed, + expected_funding_instrument=FundingInstrument.OTHER, + expect_values_to_match=False, + ) + validate_funding_instrument( + db_session, forecast_hist_delete_already_processed, expect_in_db=False + ) + validate_funding_instrument( + db_session, + syn_update_already_processed, + expected_funding_instrument=FundingInstrument.PROCUREMENT_CONTRACT, + expect_values_to_match=False, + ) + + validate_funding_instrument( + db_session, syn_delete_but_current_missing, expect_in_db=False, was_processed=True + ) + validate_funding_instrument( + db_session, syn_hist_insert_invalid_type, expect_in_db=False, was_processed=False + ) + + metrics = transform_funding_instrument.metrics + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 14 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 5 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 5 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 1 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + # Rerunning will only attempt to re-process the errors, so total+errors goes up by 2 + db_session.commit() # commit to end any existing transactions as run_subtask starts a new one + transform_funding_instrument.run_subtask() + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 15 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 5 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 5 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 2 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + @pytest.mark.parametrize( + "is_forecast,revision_number", [(True, None), (False, None), (True, 1), (False, 4)] + ) + def test_process_funding_instrument_but_current_missing( + self, db_session, transform_funding_instrument, is_forecast, revision_number + ): + opportunity_summary = f.OpportunitySummaryFactory.create( + is_forecast=is_forecast, revision_number=revision_number, no_link_values=True + ) + delete_but_current_missing = setup_funding_instrument( + create_existing=False, + opportunity_summary=opportunity_summary, + legacy_lookup_value="G", + is_delete=True, + ) + + transform_funding_instrument.process_link_funding_instrument( + delete_but_current_missing, None, opportunity_summary + ) + + validate_funding_instrument(db_session, delete_but_current_missing, expect_in_db=False) + assert delete_but_current_missing.transformed_at is not None + assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" + + @pytest.mark.parametrize( + "is_forecast,revision_number,legacy_lookup_value", + [(True, None, "X"), (False, None, "4"), (True, 5, "Y"), (False, 10, "A")], + ) + def test_process_funding_instrument_but_invalid_lookup_value( + self, + db_session, + transform_funding_instrument, + is_forecast, + revision_number, + legacy_lookup_value, + ): + opportunity_summary = f.OpportunitySummaryFactory.create( + is_forecast=is_forecast, revision_number=revision_number, no_link_values=True + ) + insert_but_invalid_value = setup_funding_instrument( + create_existing=False, + opportunity_summary=opportunity_summary, + legacy_lookup_value=legacy_lookup_value, + ) + + with pytest.raises(ValueError, match="Unrecognized funding instrument"): + transform_funding_instrument.process_link_funding_instrument( + insert_but_invalid_value, None, opportunity_summary + ) + + @pytest.mark.parametrize( + "factory_cls", [f.StagingTfundinstrForecastFactory, f.StagingTfundinstrSynopsisFactory] + ) + def test_process_funding_instrument_but_no_opportunity_summary_non_hist( + self, + db_session, + transform_funding_instrument, + factory_cls, + ): + source_record = factory_cls.create(orphaned_record=True) + + with pytest.raises( + ValueError, + match="Funding instrument record cannot be processed as the opportunity summary for it does not exist", + ): + transform_funding_instrument.process_link_funding_instrument(source_record, None, None) + + @pytest.mark.parametrize( + "factory_cls", + [f.StagingTfundinstrForecastHistFactory, f.StagingTfundinstrSynopsisHistFactory], + ) + def test_process_funding_instrument_but_no_opportunity_summary_hist( + self, + db_session, + transform_funding_instrument, + factory_cls, + ): + source_record = factory_cls.create(orphaned_record=True, revision_number=12) + transform_funding_instrument.process_link_funding_instrument(source_record, None, None) + assert source_record.transformed_at is not None + assert source_record.transformation_notes == "orphaned_historical_record" diff --git a/api/tests/src/data_migration/transformation/subtask/test_transform_opportunity.py b/api/tests/src/data_migration/transformation/subtask/test_transform_opportunity.py new file mode 100644 index 000000000..ae89652ba --- /dev/null +++ b/api/tests/src/data_migration/transformation/subtask/test_transform_opportunity.py @@ -0,0 +1,110 @@ +import pytest + +import src.data_migration.transformation.transform_constants as transform_constants +from src.data_migration.transformation.subtask.transform_opportunity import TransformOpportunity +from tests.src.data_migration.transformation.conftest import ( + BaseTransformTestClass, + setup_opportunity, + validate_opportunity, +) + + +class TestTransformOpportunity(BaseTransformTestClass): + @pytest.fixture() + def transform_opportunity(self, transform_oracle_data_task): + return TransformOpportunity(transform_oracle_data_task) + + def test_process_opportunities(self, db_session, transform_opportunity): + ordinary_delete = setup_opportunity( + create_existing=True, is_delete=True, all_fields_null=True + ) + ordinary_delete2 = setup_opportunity( + create_existing=True, is_delete=True, all_fields_null=False + ) + delete_but_current_missing = setup_opportunity(create_existing=False, is_delete=True) + + basic_insert = setup_opportunity(create_existing=False) + basic_insert2 = setup_opportunity(create_existing=False, all_fields_null=True) + basic_insert3 = setup_opportunity(create_existing=False) + + basic_update = setup_opportunity( + create_existing=True, + ) + basic_update2 = setup_opportunity(create_existing=True, all_fields_null=True) + basic_update3 = setup_opportunity(create_existing=True, all_fields_null=True) + basic_update4 = setup_opportunity(create_existing=True) + + # Something else deleted it + already_processed_insert = setup_opportunity( + create_existing=False, is_already_processed=True + ) + already_processed_update = setup_opportunity( + create_existing=True, is_already_processed=True + ) + + insert_that_will_fail = setup_opportunity( + create_existing=False, source_values={"oppcategory": "X"} + ) + + transform_opportunity.run_subtask() + + validate_opportunity(db_session, ordinary_delete, expect_in_db=False) + validate_opportunity(db_session, ordinary_delete2, expect_in_db=False) + validate_opportunity(db_session, delete_but_current_missing, expect_in_db=False) + + validate_opportunity(db_session, basic_insert) + validate_opportunity(db_session, basic_insert2) + validate_opportunity(db_session, basic_insert3) + + validate_opportunity(db_session, basic_update) + validate_opportunity(db_session, basic_update2) + validate_opportunity(db_session, basic_update3) + validate_opportunity(db_session, basic_update4) + + validate_opportunity(db_session, already_processed_insert, expect_in_db=False) + validate_opportunity(db_session, already_processed_update, expect_values_to_match=False) + + validate_opportunity(db_session, insert_that_will_fail, expect_in_db=False) + + metrics = transform_opportunity.metrics + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 11 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 2 + # Note this insert counts the case where the category fails + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 3 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 4 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 2 + + # Rerunning does mostly nothing, it will attempt to re-process the two that errored + # but otherwise won't find anything else + db_session.commit() # commit to end any existing transactions as run_subtask starts a new one + transform_opportunity.run_subtask() + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 13 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 2 + # Note this insert counts the case where the category fails + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 3 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 4 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 4 + + def test_process_opportunity_delete_but_current_missing( + self, db_session, transform_opportunity + ): + # Verify an error is raised when we try to delete something that doesn't exist + delete_but_current_missing = setup_opportunity(create_existing=False, is_delete=True) + + with pytest.raises( + ValueError, match="Cannot delete opportunity record as it does not exist" + ): + transform_opportunity.process_opportunity(delete_but_current_missing, None) + + validate_opportunity(db_session, delete_but_current_missing, expect_in_db=False) + + def test_process_opportunity_invalid_category(self, db_session, transform_opportunity): + # This will error in the transform as that isn't a category we have configured + insert_that_will_fail = setup_opportunity( + create_existing=False, source_values={"oppcategory": "X"} + ) + + with pytest.raises(ValueError, match="Unrecognized opportunity category"): + transform_opportunity.process_opportunity(insert_that_will_fail, None) + + validate_opportunity(db_session, insert_that_will_fail, expect_in_db=False) diff --git a/api/tests/src/data_migration/transformation/subtask/test_transform_opportunity_summary.py b/api/tests/src/data_migration/transformation/subtask/test_transform_opportunity_summary.py new file mode 100644 index 000000000..5dcec4f56 --- /dev/null +++ b/api/tests/src/data_migration/transformation/subtask/test_transform_opportunity_summary.py @@ -0,0 +1,280 @@ +import pytest + +import src.data_migration.transformation.transform_constants as transform_constants +import tests.src.db.models.factories as f +from src.data_migration.transformation.subtask.transform_opportunity_summary import ( + TransformOpportunitySummary, +) +from tests.src.data_migration.transformation.conftest import ( + BaseTransformTestClass, + setup_synopsis_forecast, + validate_opportunity_summary, +) + + +class TestTransformOpportunitySummary(BaseTransformTestClass): + @pytest.fixture() + def transform_opportunity_summary(self, transform_oracle_data_task): + return TransformOpportunitySummary(transform_oracle_data_task) + + def test_process_opportunity_summaries(self, db_session, transform_opportunity_summary): + # Basic inserts + opportunity1 = f.OpportunityFactory.create( + no_current_summary=True, opportunity_assistance_listings=[] + ) + forecast_insert1 = setup_synopsis_forecast( + is_forecast=True, revision_number=None, create_existing=False, opportunity=opportunity1 + ) + synopsis_insert1 = setup_synopsis_forecast( + is_forecast=False, revision_number=None, create_existing=False, opportunity=opportunity1 + ) + forecast_hist_insert1 = setup_synopsis_forecast( + is_forecast=True, revision_number=1, create_existing=False, opportunity=opportunity1 + ) + synopsis_hist_insert1 = setup_synopsis_forecast( + is_forecast=False, revision_number=1, create_existing=False, opportunity=opportunity1 + ) + + # Mix of updates and inserts, somewhat resembling what happens when summary objects + # get moved to the historical table (we'd update the synopsis/forecast records, and create new historical) + opportunity2 = f.OpportunityFactory.create( + no_current_summary=True, opportunity_assistance_listings=[] + ) + forecast_update1 = setup_synopsis_forecast( + is_forecast=True, revision_number=None, create_existing=True, opportunity=opportunity2 + ) + synopsis_update1 = setup_synopsis_forecast( + is_forecast=False, revision_number=None, create_existing=True, opportunity=opportunity2 + ) + forecast_hist_update1 = setup_synopsis_forecast( + is_forecast=True, revision_number=1, create_existing=True, opportunity=opportunity2 + ) + synopsis_hist_update1 = setup_synopsis_forecast( + is_forecast=False, revision_number=1, create_existing=True, opportunity=opportunity2 + ) + forecast_hist_insert2 = setup_synopsis_forecast( + is_forecast=True, revision_number=2, create_existing=False, opportunity=opportunity2 + ) + synopsis_hist_insert2 = setup_synopsis_forecast( + is_forecast=False, revision_number=2, create_existing=False, opportunity=opportunity2 + ) + + # Mix of inserts, updates, and deletes + opportunity3 = f.OpportunityFactory.create( + no_current_summary=True, opportunity_assistance_listings=[] + ) + forecast_delete1 = setup_synopsis_forecast( + is_forecast=True, + revision_number=None, + create_existing=True, + is_delete=True, + opportunity=opportunity3, + ) + synopsis_delete1 = setup_synopsis_forecast( + is_forecast=False, + revision_number=None, + create_existing=True, + is_delete=True, + opportunity=opportunity3, + ) + forecast_hist_insert3 = setup_synopsis_forecast( + is_forecast=True, revision_number=2, create_existing=False, opportunity=opportunity3 + ) + synopsis_hist_update2 = setup_synopsis_forecast( + is_forecast=False, + revision_number=1, + create_existing=True, + source_values={"action_type": "D"}, + opportunity=opportunity3, + ) + + # A few error scenarios + opportunity4 = f.OpportunityFactory.create( + no_current_summary=True, opportunity_assistance_listings=[] + ) + forecast_delete_but_current_missing = setup_synopsis_forecast( + is_forecast=True, + revision_number=None, + create_existing=False, + is_delete=True, + opportunity=opportunity4, + ) + synopsis_update_invalid_yn_field = setup_synopsis_forecast( + is_forecast=False, + revision_number=None, + create_existing=True, + source_values={"sendmail": "E"}, + opportunity=opportunity4, + ) + synopsis_hist_insert_invalid_yn_field = setup_synopsis_forecast( + is_forecast=False, + revision_number=1, + create_existing=False, + source_values={"cost_sharing": "1"}, + opportunity=opportunity4, + ) + forecast_hist_update_invalid_action_type = setup_synopsis_forecast( + is_forecast=True, + revision_number=2, + create_existing=True, + source_values={"action_type": "X"}, + opportunity=opportunity4, + ) + + transform_opportunity_summary.run_subtask() + + validate_opportunity_summary(db_session, forecast_insert1) + validate_opportunity_summary(db_session, synopsis_insert1) + validate_opportunity_summary(db_session, forecast_hist_insert1) + validate_opportunity_summary(db_session, synopsis_hist_insert1) + validate_opportunity_summary(db_session, forecast_hist_insert2) + validate_opportunity_summary(db_session, synopsis_hist_insert2) + validate_opportunity_summary(db_session, forecast_hist_insert3) + + validate_opportunity_summary(db_session, forecast_update1) + validate_opportunity_summary(db_session, synopsis_update1) + validate_opportunity_summary(db_session, forecast_hist_update1) + validate_opportunity_summary(db_session, synopsis_hist_update1) + validate_opportunity_summary(db_session, synopsis_hist_update2) + + validate_opportunity_summary(db_session, forecast_delete1, expect_in_db=False) + validate_opportunity_summary(db_session, synopsis_delete1, expect_in_db=False) + + validate_opportunity_summary( + db_session, forecast_delete_but_current_missing, expect_in_db=False + ) + validate_opportunity_summary( + db_session, + synopsis_update_invalid_yn_field, + expect_in_db=True, + expect_values_to_match=False, + ) + validate_opportunity_summary( + db_session, synopsis_hist_insert_invalid_yn_field, expect_in_db=False + ) + validate_opportunity_summary( + db_session, + forecast_hist_update_invalid_action_type, + expect_in_db=True, + expect_values_to_match=False, + ) + + metrics = transform_opportunity_summary.metrics + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 18 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 7 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 5 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 3 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + # Rerunning will only attempt to re-process the errors, so total+errors goes up by 3 + db_session.commit() # commit to end any existing transactions as run_subtask starts a new one + transform_opportunity_summary.run_subtask() + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_PROCESSED] == 21 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_DELETED] == 2 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_INSERTED] == 7 + assert metrics[transform_constants.Metrics.TOTAL_RECORDS_UPDATED] == 5 + assert metrics[transform_constants.Metrics.TOTAL_ERROR_COUNT] == 6 + assert metrics[transform_constants.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 + + @pytest.mark.parametrize( + "is_forecast,revision_number", [(True, None), (False, None), (True, 5), (False, 10)] + ) + def test_process_opportunity_summary_delete_but_current_missing( + self, db_session, transform_opportunity_summary, is_forecast, revision_number + ): + opportunity = f.OpportunityFactory.create( + no_current_summary=True, opportunity_assistance_listings=[] + ) + delete_but_current_missing = setup_synopsis_forecast( + is_forecast=is_forecast, + revision_number=revision_number, + create_existing=False, + is_delete=True, + opportunity=opportunity, + ) + + transform_opportunity_summary.process_opportunity_summary( + delete_but_current_missing, None, opportunity + ) + + validate_opportunity_summary(db_session, delete_but_current_missing, expect_in_db=False) + assert delete_but_current_missing.transformed_at is not None + assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" + + @pytest.mark.parametrize( + "is_forecast,revision_number,source_values,expected_error", + [ + (True, None, {"sendmail": "z"}, "Unexpected Y/N bool value: z"), + (False, None, {"cost_sharing": "v"}, "Unexpected Y/N bool value: v"), + (True, 5, {"action_type": "T"}, "Unexpected action type value: T"), + (False, 10, {"action_type": "5"}, "Unexpected action type value: 5"), + ], + ) + def test_process_opportunity_summary_invalid_value_errors( + self, + db_session, + transform_opportunity_summary, + is_forecast, + revision_number, + source_values, + expected_error, + ): + opportunity = f.OpportunityFactory.create( + no_current_summary=True, opportunity_assistance_listings=[] + ) + source_summary = setup_synopsis_forecast( + is_forecast=is_forecast, + revision_number=revision_number, + create_existing=False, + opportunity=opportunity, + source_values=source_values, + ) + + with pytest.raises(ValueError, match=expected_error): + transform_opportunity_summary.process_opportunity_summary( + source_summary, None, opportunity + ) + + @pytest.mark.parametrize("is_forecast", [True, False]) + def test_process_opportunity_summary_but_no_opportunity_non_hist( + self, + db_session, + transform_opportunity_summary, + is_forecast, + ): + source_record = setup_synopsis_forecast( + is_forecast=is_forecast, + revision_number=None, + create_existing=False, + opportunity=None, + source_values={"opportunity_id": 12121212}, + ) + + with pytest.raises( + ValueError, + match="Opportunity summary cannot be processed as the opportunity for it does not exist", + ): + transform_opportunity_summary.process_opportunity_summary(source_record, None, None) + + @pytest.mark.parametrize("is_forecast,revision_number", [(True, 10), (False, 9)]) + def test_process_opportunity_summary_but_no_opportunity_hist( + self, + db_session, + transform_opportunity_summary, + is_forecast, + revision_number, + ): + source_record = setup_synopsis_forecast( + is_forecast=is_forecast, + revision_number=revision_number, + create_existing=False, + opportunity=None, + source_values={"opportunity_id": 12121212}, + ) + + transform_opportunity_summary.process_opportunity_summary(source_record, None, None) + + validate_opportunity_summary(db_session, source_record, expect_in_db=False) + assert source_record.transformed_at is not None + assert source_record.transformation_notes == "orphaned_historical_record" diff --git a/api/tests/src/data_migration/transformation/test_transform_oracle_data_task.py b/api/tests/src/data_migration/transformation/test_transform_oracle_data_task.py index f4d07bfdd..21ca27c9a 100644 --- a/api/tests/src/data_migration/transformation/test_transform_oracle_data_task.py +++ b/api/tests/src/data_migration/transformation/test_transform_oracle_data_task.py @@ -1,2194 +1,24 @@ -from typing import Tuple - import pytest import tests.src.db.models.factories as f from src.constants.lookup_constants import ApplicantType, FundingCategory, FundingInstrument from src.data_migration.transformation.transform_oracle_data_task import TransformOracleDataTask from src.db.models import staging -from src.db.models.opportunity_models import ( - LinkOpportunitySummaryApplicantType, - LinkOpportunitySummaryFundingCategory, - LinkOpportunitySummaryFundingInstrument, - Opportunity, - OpportunityAssistanceListing, - OpportunitySummary, -) +from src.db.models.opportunity_models import Opportunity from tests.conftest import BaseTestClass - - -def setup_opportunity( - create_existing: bool, - is_delete: bool = False, - is_already_processed: bool = False, - source_values: dict | None = None, - all_fields_null: bool = False, -) -> staging.opportunity.Topportunity: - if source_values is None: - source_values = {} - - source_opportunity = f.StagingTopportunityFactory.create( - **source_values, - is_deleted=is_delete, - already_transformed=is_already_processed, - all_fields_null=all_fields_null, - cfdas=[], - ) - - if create_existing: - f.OpportunityFactory.create( - opportunity_id=source_opportunity.opportunity_id, - # set created_at/updated_at to an earlier time so its clear - # when they were last updated - timestamps_in_past=True, - ) - - return source_opportunity - - -def setup_cfda( - create_existing: bool, - is_delete: bool = False, - is_already_processed: bool = False, - source_values: dict | None = None, - all_fields_null: bool = False, - opportunity: Opportunity | None = None, -) -> staging.opportunity.TopportunityCfda: - if source_values is None: - source_values = {} - - # If you don't provide an opportunity, you need to provide an ID - if opportunity is not None: - source_values["opportunity_id"] = opportunity.opportunity_id - - source_cfda = f.StagingTopportunityCfdaFactory.create( - **source_values, - opportunity=None, # To override the factory trying to create something - is_deleted=is_delete, - already_transformed=is_already_processed, - all_fields_null=all_fields_null, - ) - - if create_existing: - f.OpportunityAssistanceListingFactory.create( - opportunity=opportunity, - opportunity_assistance_listing_id=source_cfda.opp_cfda_id, - # set created_at/updated_at to an earlier time so its clear - # when they were last updated - timestamps_in_past=True, - ) - - return source_cfda - - -def setup_synopsis_forecast( - is_forecast: bool, - revision_number: int | None, - create_existing: bool, - opportunity: Opportunity | None, - is_delete: bool = False, - is_already_processed: bool = False, - source_values: dict | None = None, -): - if source_values is None: - source_values = {} - - if is_forecast: - if revision_number is None: - factory_cls = f.StagingTforecastFactory - else: - factory_cls = f.StagingTforecastHistFactory - else: - if revision_number is None: - factory_cls = f.StagingTsynopsisFactory - else: - factory_cls = f.StagingTsynopsisHistFactory - - if revision_number is not None: - source_values["revision_number"] = revision_number - - if opportunity is not None: - source_values["opportunity_id"] = opportunity.opportunity_id - - source_summary = factory_cls.create( - **source_values, - opportunity=None, # To override the factory trying to create something - is_deleted=is_delete, - already_transformed=is_already_processed, - ) - - if create_existing: - f.OpportunitySummaryFactory.create( - opportunity=opportunity, is_forecast=is_forecast, revision_number=revision_number - ) - - return source_summary - - -def setup_applicant_type( - create_existing: bool, - opportunity_summary: OpportunitySummary, - legacy_lookup_value: str, - applicant_type: ApplicantType | None = None, - is_delete: bool = False, - is_already_processed: bool = False, - source_values: dict | None = None, -): - if create_existing and is_delete is False and applicant_type is None: - raise Exception( - "If create_existing is True, is_delete is False - must provide the properly converted / mapped value for applicant_type" - ) - - if source_values is None: - source_values = {} - - if opportunity_summary.is_forecast: - source_values["forecast"] = None - if opportunity_summary.revision_number is None: - factory_cls = f.StagingTapplicanttypesForecastFactory - else: - factory_cls = f.StagingTapplicanttypesForecastHistFactory - source_values["revision_number"] = opportunity_summary.revision_number - else: - source_values["synopsis"] = None - if opportunity_summary.revision_number is None: - factory_cls = f.StagingTapplicanttypesSynopsisFactory - else: - factory_cls = f.StagingTapplicanttypesSynopsisHistFactory - source_values["revision_number"] = opportunity_summary.revision_number - - source_applicant_type = factory_cls.create( - **source_values, - opportunity_id=opportunity_summary.opportunity_id, - is_deleted=is_delete, - already_transformed=is_already_processed, - at_id=legacy_lookup_value, - ) - - if create_existing: - if opportunity_summary.is_forecast: - legacy_id = source_applicant_type.at_frcst_id - else: - legacy_id = source_applicant_type.at_syn_id - - f.LinkOpportunitySummaryApplicantTypeFactory.create( - opportunity_summary=opportunity_summary, - legacy_applicant_type_id=legacy_id, - applicant_type=applicant_type, - ) - - return source_applicant_type - - -def setup_funding_instrument( - create_existing: bool, - opportunity_summary: OpportunitySummary, - legacy_lookup_value: str, - funding_instrument: FundingInstrument | None = None, - is_delete: bool = False, - is_already_processed: bool = False, - source_values: dict | None = None, -): - if create_existing and is_delete is False and funding_instrument is None: - raise Exception( - "If create_existing is True, is_delete is False - must provide the properly converted / mapped value for funding_instrument" - ) - - if source_values is None: - source_values = {} - - if opportunity_summary.is_forecast: - source_values["forecast"] = None - if opportunity_summary.revision_number is None: - factory_cls = f.StagingTfundinstrForecastFactory - else: - factory_cls = f.StagingTfundinstrForecastHistFactory - source_values["revision_number"] = opportunity_summary.revision_number - else: - source_values["synopsis"] = None - if opportunity_summary.revision_number is None: - factory_cls = f.StagingTfundinstrSynopsisFactory - else: - factory_cls = f.StagingTfundinstrSynopsisHistFactory - source_values["revision_number"] = opportunity_summary.revision_number - - source_funding_instrument = factory_cls.create( - **source_values, - opportunity_id=opportunity_summary.opportunity_id, - is_deleted=is_delete, - already_transformed=is_already_processed, - fi_id=legacy_lookup_value, - ) - - if create_existing: - if opportunity_summary.is_forecast: - legacy_id = source_funding_instrument.fi_frcst_id - else: - legacy_id = source_funding_instrument.fi_syn_id - - f.LinkOpportunitySummaryFundingInstrumentFactory.create( - opportunity_summary=opportunity_summary, - legacy_funding_instrument_id=legacy_id, - funding_instrument=funding_instrument, - ) - - return source_funding_instrument - - -def setup_funding_category( - create_existing: bool, - opportunity_summary: OpportunitySummary, - legacy_lookup_value: str, - funding_category: FundingCategory | None = None, - is_delete: bool = False, - is_already_processed: bool = False, - source_values: dict | None = None, -): - if create_existing and is_delete is False and funding_category is None: - raise Exception( - "If create_existing is True, is_delete is False - must provide the properly converted / mapped value for funding_category" - ) - - if source_values is None: - source_values = {} - - if opportunity_summary.is_forecast: - source_values["forecast"] = None - if opportunity_summary.revision_number is None: - factory_cls = f.StagingTfundactcatForecastFactory - else: - factory_cls = f.StagingTfundactcatForecastHistFactory - source_values["revision_number"] = opportunity_summary.revision_number - else: - source_values["synopsis"] = None - if opportunity_summary.revision_number is None: - factory_cls = f.StagingTfundactcatSynopsisFactory - else: - factory_cls = f.StagingTfundactcatSynopsisHistFactory - source_values["revision_number"] = opportunity_summary.revision_number - - source_funding_category = factory_cls.create( - **source_values, - opportunity_id=opportunity_summary.opportunity_id, - is_deleted=is_delete, - already_transformed=is_already_processed, - fac_id=legacy_lookup_value, - ) - - if create_existing: - if opportunity_summary.is_forecast: - legacy_id = source_funding_category.fac_frcst_id - else: - legacy_id = source_funding_category.fac_syn_id - - f.LinkOpportunitySummaryFundingCategoryFactory.create( - opportunity_summary=opportunity_summary, - legacy_funding_category_id=legacy_id, - funding_category=funding_category, - ) - - return source_funding_category - - -def validate_matching_fields( - source, destination, fields: list[Tuple[str, str]], expect_all_to_match: bool -): - mismatched_fields = [] - - for source_field, destination_field in fields: - source_value = getattr(source, source_field) - destination_value = getattr(destination, destination_field) - if source_value != destination_value: - mismatched_fields.append( - f"{source_field}/{destination_field}: '{source_value}' != '{destination_value}'" - ) - - # If a values weren't copied in an update - # then we should expect most things to not match, - # but randomness in the factories might cause some overlap - if expect_all_to_match: - assert ( - len(mismatched_fields) == 0 - ), f"Expected all fields to match between {source.__class__} and {destination.__class__}, but found mismatched fields: {','.join(mismatched_fields)}" - else: - assert ( - len(mismatched_fields) != 0 - ), f"Did not expect all fields to match between {source.__class__} and {destination.__class__}, but they did which means an unexpected update occurred" - - -def validate_opportunity( - db_session, - source_opportunity: staging.opportunity.Topportunity, - expect_in_db: bool = True, - expect_values_to_match: bool = True, -): - opportunity = ( - db_session.query(Opportunity) - .filter(Opportunity.opportunity_id == source_opportunity.opportunity_id) - .one_or_none() - ) - - if not expect_in_db: - assert opportunity is None - return - - assert opportunity is not None - # For fields that we expect to match 1:1, verify that they match as expected - validate_matching_fields( - source_opportunity, - opportunity, - [ - ("oppnumber", "opportunity_number"), - ("opptitle", "opportunity_title"), - ("owningagency", "agency"), - ("category_explanation", "category_explanation"), - ("revision_number", "revision_number"), - ("modified_comments", "modified_comments"), - ("publisheruid", "publisher_user_id"), - ("publisher_profile_id", "publisher_profile_id"), - ], - expect_values_to_match, - ) - - # Validation of fields that aren't copied exactly - if expect_values_to_match: - # Deliberately validating is_draft with a different calculation - if source_opportunity.is_draft == "N": - assert opportunity.is_draft is False - else: - assert opportunity.is_draft is True - - -def validate_assistance_listing( - db_session, - source_cfda: staging.opportunity.TopportunityCfda, - expect_in_db: bool = True, - expect_values_to_match: bool = True, -): - assistance_listing = ( - db_session.query(OpportunityAssistanceListing) - .filter( - OpportunityAssistanceListing.opportunity_assistance_listing_id - == source_cfda.opp_cfda_id - ) - .one_or_none() - ) - - if not expect_in_db: - assert assistance_listing is None - return - - assert assistance_listing is not None - # For fields that we expect to match 1:1, verify that they match as expected - validate_matching_fields( - source_cfda, - assistance_listing, - [ - ("cfdanumber", "assistance_listing_number"), - ("programtitle", "program_title"), - ], - expect_values_to_match, - ) - - -def get_summary_from_source(db_session, source_summary): - revision_number = None - is_forecast = source_summary.is_forecast - if isinstance(source_summary, (staging.synopsis.TsynopsisHist, staging.forecast.TforecastHist)): - revision_number = source_summary.revision_number - - opportunity_summary = ( - db_session.query(OpportunitySummary) - .filter( - OpportunitySummary.opportunity_id == source_summary.opportunity_id, - OpportunitySummary.revision_number == revision_number, - OpportunitySummary.is_forecast == is_forecast, - # Populate existing to force it to fetch updates from the DB - ) - .execution_options(populate_existing=True) - .one_or_none() - ) - - return opportunity_summary - - -def validate_opportunity_summary( - db_session, source_summary, expect_in_db: bool = True, expect_values_to_match: bool = True -): - opportunity_summary = get_summary_from_source(db_session, source_summary) - - if not expect_in_db: - assert opportunity_summary is None - return - - matching_fields = [ - ("version_nbr", "version_number"), - ("posting_date", "post_date"), - ("archive_date", "archive_date"), - ("fd_link_url", "additional_info_url"), - ("fd_link_desc", "additional_info_url_description"), - ("modification_comments", "modification_comments"), - ("oth_cat_fa_desc", "funding_category_description"), - ("applicant_elig_desc", "applicant_eligibility_description"), - ("ac_name", "agency_name"), - ("ac_email_addr", "agency_email_address"), - ("ac_email_desc", "agency_email_address_description"), - ("publisher_profile_id", "publisher_profile_id"), - ("publisheruid", "publisher_user_id"), - ("last_upd_id", "updated_by"), - ("creator_id", "created_by"), - ] - - if isinstance(source_summary, (staging.synopsis.Tsynopsis, staging.synopsis.TsynopsisHist)): - matching_fields.extend( - [ - ("syn_desc", "summary_description"), - ("a_sa_code", "agency_code"), - ("ac_phone_number", "agency_phone_number"), - ("agency_contact_desc", "agency_contact_description"), - ("response_date", "close_date"), - ("response_date_desc", "close_date_description"), - ("unarchive_date", "unarchive_date"), - ] - ) - else: # Forecast+ForecastHist - matching_fields.extend( - [ - ("forecast_desc", "summary_description"), - ("agency_code", "agency_code"), - ("ac_phone", "agency_phone_number"), - ("est_synopsis_posting_date", "forecasted_post_date"), - ("est_appl_response_date", "forecasted_close_date"), - ("est_appl_response_date_desc", "forecasted_close_date_description"), - ("est_award_date", "forecasted_award_date"), - ("est_project_start_date", "forecasted_project_start_date"), - ("fiscal_year", "fiscal_year"), - ] - ) - - # History only fields - is_deleted = False - if isinstance(source_summary, (staging.synopsis.TsynopsisHist, staging.forecast.TforecastHist)): - matching_fields.extend([("revision_number", "revision_number")]) - - is_deleted = source_summary.action_type == "D" - - assert opportunity_summary is not None - validate_matching_fields( - source_summary, opportunity_summary, matching_fields, expect_values_to_match - ) - - assert opportunity_summary.is_deleted == is_deleted - - -def validate_summary_and_nested( - db_session, - source_summary, - expected_applicant_types: list[ApplicantType], - expected_funding_categories: list[FundingCategory], - expected_funding_instruments: list[FundingInstrument], - expect_in_db: bool = True, - expect_values_to_match: bool = True, -): - validate_opportunity_summary(db_session, source_summary, expect_in_db, expect_values_to_match) - - if not expect_in_db: - return - - created_record = get_summary_from_source(db_session, source_summary) - - assert set(created_record.applicant_types) == set(expected_applicant_types) - assert set(created_record.funding_categories) == set(expected_funding_categories) - assert set(created_record.funding_instruments) == set(expected_funding_instruments) - - -def validate_applicant_type( - db_session, - source_applicant_type, - expect_in_db: bool = True, - expected_applicant_type: ApplicantType | None = None, - was_processed: bool = True, - expect_values_to_match: bool = True, -): - assert (source_applicant_type.transformed_at is not None) == was_processed - - # In order to properly find the link table value, need to first determine - # the opportunity summary in a subquery - opportunity_summary_id = ( - db_session.query(OpportunitySummary.opportunity_summary_id) - .filter( - OpportunitySummary.revision_number == source_applicant_type.revision_number, - OpportunitySummary.is_forecast == source_applicant_type.is_forecast, - OpportunitySummary.opportunity_id == source_applicant_type.opportunity_id, - ) - .scalar() - ) - - link_applicant_type = ( - db_session.query(LinkOpportunitySummaryApplicantType) - .filter( - LinkOpportunitySummaryApplicantType.legacy_applicant_type_id - == source_applicant_type.legacy_applicant_type_id, - LinkOpportunitySummaryApplicantType.opportunity_summary_id == opportunity_summary_id, - ) - .one_or_none() - ) - - if not expect_in_db: - assert link_applicant_type is None - return - - assert link_applicant_type is not None - assert link_applicant_type.applicant_type == expected_applicant_type - - validate_matching_fields( - source_applicant_type, - link_applicant_type, - [("creator_id", "created_by"), ("last_upd_id", "updated_by")], - expect_values_to_match, - ) - - -def validate_funding_instrument( - db_session, - source_funding_instrument, - expect_in_db: bool = True, - expected_funding_instrument: FundingInstrument | None = None, - was_processed: bool = True, - expect_values_to_match: bool = True, -): - assert (source_funding_instrument.transformed_at is not None) == was_processed - - # In order to properly find the link table value, need to first determine - # the opportunity summary in a subquery - opportunity_summary_id = ( - db_session.query(OpportunitySummary.opportunity_summary_id) - .filter( - OpportunitySummary.revision_number == source_funding_instrument.revision_number, - OpportunitySummary.is_forecast == source_funding_instrument.is_forecast, - OpportunitySummary.opportunity_id == source_funding_instrument.opportunity_id, - ) - .scalar() - ) - - link_funding_instrument = ( - db_session.query(LinkOpportunitySummaryFundingInstrument) - .filter( - LinkOpportunitySummaryFundingInstrument.legacy_funding_instrument_id - == source_funding_instrument.legacy_funding_instrument_id, - LinkOpportunitySummaryFundingInstrument.opportunity_summary_id - == opportunity_summary_id, - ) - .one_or_none() - ) - - if not expect_in_db: - assert link_funding_instrument is None - return - - assert link_funding_instrument is not None - assert link_funding_instrument.funding_instrument == expected_funding_instrument - - validate_matching_fields( - source_funding_instrument, - link_funding_instrument, - [("creator_id", "created_by"), ("last_upd_id", "updated_by")], - expect_values_to_match, - ) - - -def validate_funding_category( - db_session, - source_funding_category, - expect_in_db: bool = True, - expected_funding_category: FundingCategory | None = None, - was_processed: bool = True, - expect_values_to_match: bool = True, -): - assert (source_funding_category.transformed_at is not None) == was_processed - - # In order to properly find the link table value, need to first determine - # the opportunity summary in a subquery - opportunity_summary_id = ( - db_session.query(OpportunitySummary.opportunity_summary_id) - .filter( - OpportunitySummary.revision_number == source_funding_category.revision_number, - OpportunitySummary.is_forecast == source_funding_category.is_forecast, - OpportunitySummary.opportunity_id == source_funding_category.opportunity_id, - ) - .scalar() - ) - - link_funding_category = ( - db_session.query(LinkOpportunitySummaryFundingCategory) - .filter( - LinkOpportunitySummaryFundingCategory.legacy_funding_category_id - == source_funding_category.legacy_funding_category_id, - LinkOpportunitySummaryFundingCategory.opportunity_summary_id == opportunity_summary_id, - ) - .one_or_none() - ) - - if not expect_in_db: - assert link_funding_category is None - return - - assert link_funding_category is not None - assert link_funding_category.funding_category == expected_funding_category - - validate_matching_fields( - source_funding_category, - link_funding_category, - [("creator_id", "created_by"), ("last_upd_id", "updated_by")], - expect_values_to_match, - ) - - -class TestTransformOpportunity(BaseTestClass): - @pytest.fixture() - def transform_oracle_data_task( - self, db_session, enable_factory_create, truncate_opportunities - ) -> TransformOracleDataTask: - return TransformOracleDataTask(db_session) - - def test_process_opportunities(self, db_session, transform_oracle_data_task): - ordinary_delete = setup_opportunity( - create_existing=True, is_delete=True, all_fields_null=True - ) - ordinary_delete2 = setup_opportunity( - create_existing=True, is_delete=True, all_fields_null=False - ) - delete_but_current_missing = setup_opportunity(create_existing=False, is_delete=True) - - basic_insert = setup_opportunity(create_existing=False) - basic_insert2 = setup_opportunity(create_existing=False, all_fields_null=True) - basic_insert3 = setup_opportunity(create_existing=False) - - basic_update = setup_opportunity( - create_existing=True, - ) - basic_update2 = setup_opportunity(create_existing=True, all_fields_null=True) - basic_update3 = setup_opportunity(create_existing=True, all_fields_null=True) - basic_update4 = setup_opportunity(create_existing=True) - - # Something else deleted it - already_processed_insert = setup_opportunity( - create_existing=False, is_already_processed=True - ) - already_processed_update = setup_opportunity( - create_existing=True, is_already_processed=True - ) - - insert_that_will_fail = setup_opportunity( - create_existing=False, source_values={"oppcategory": "X"} - ) - - transform_oracle_data_task.process_opportunities() - - validate_opportunity(db_session, ordinary_delete, expect_in_db=False) - validate_opportunity(db_session, ordinary_delete2, expect_in_db=False) - validate_opportunity(db_session, delete_but_current_missing, expect_in_db=False) - - validate_opportunity(db_session, basic_insert) - validate_opportunity(db_session, basic_insert2) - validate_opportunity(db_session, basic_insert3) - - validate_opportunity(db_session, basic_update) - validate_opportunity(db_session, basic_update2) - validate_opportunity(db_session, basic_update3) - validate_opportunity(db_session, basic_update4) - - validate_opportunity(db_session, already_processed_insert, expect_in_db=False) - validate_opportunity(db_session, already_processed_update, expect_values_to_match=False) - - validate_opportunity(db_session, insert_that_will_fail, expect_in_db=False) - - metrics = transform_oracle_data_task.metrics - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 11 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 2 - # Note this insert counts the case where the category fails - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 3 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 4 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 2 - - # Rerunning does mostly nothing, it will attempt to re-process the two that errored - # but otherwise won't find anything else - transform_oracle_data_task.process_opportunities() - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 13 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 2 - # Note this insert counts the case where the category fails - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 3 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 4 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 4 - - def test_process_opportunity_delete_but_current_missing( - self, db_session, transform_oracle_data_task - ): - # Verify an error is raised when we try to delete something that doesn't exist - delete_but_current_missing = setup_opportunity(create_existing=False, is_delete=True) - - with pytest.raises( - ValueError, match="Cannot delete opportunity record as it does not exist" - ): - transform_oracle_data_task.process_opportunity(delete_but_current_missing, None) - - validate_opportunity(db_session, delete_but_current_missing, expect_in_db=False) - - def test_process_opportunity_invalid_category(self, db_session, transform_oracle_data_task): - # This will error in the transform as that isn't a category we have configured - insert_that_will_fail = setup_opportunity( - create_existing=False, source_values={"oppcategory": "X"} - ) - - with pytest.raises(ValueError, match="Unrecognized opportunity category"): - transform_oracle_data_task.process_opportunity(insert_that_will_fail, None) - - validate_opportunity(db_session, insert_that_will_fail, expect_in_db=False) - - -class TestTransformAssistanceListing(BaseTestClass): - @pytest.fixture() - def transform_oracle_data_task( - self, db_session, enable_factory_create, truncate_opportunities - ) -> TransformOracleDataTask: - return TransformOracleDataTask(db_session) - - def test_process_opportunity_assistance_listings(self, db_session, transform_oracle_data_task): - opportunity1 = f.OpportunityFactory.create(opportunity_assistance_listings=[]) - cfda_insert1 = setup_cfda(create_existing=False, opportunity=opportunity1) - cfda_insert2 = setup_cfda(create_existing=False, opportunity=opportunity1) - cfda_update1 = setup_cfda(create_existing=True, opportunity=opportunity1) - cfda_delete1 = setup_cfda(create_existing=True, is_delete=True, opportunity=opportunity1) - cfda_update_already_processed1 = setup_cfda( - create_existing=True, is_already_processed=True, opportunity=opportunity1 - ) - - opportunity2 = f.OpportunityFactory.create(opportunity_assistance_listings=[]) - cfda_insert3 = setup_cfda(create_existing=False, opportunity=opportunity2) - cfda_update_already_processed2 = setup_cfda( - create_existing=True, is_already_processed=True, opportunity=opportunity2 - ) - cfda_delete_already_processed1 = setup_cfda( - create_existing=False, - is_already_processed=True, - is_delete=True, - opportunity=opportunity2, - ) - cfda_delete2 = setup_cfda(create_existing=True, is_delete=True, opportunity=opportunity2) - - opportunity3 = f.OpportunityFactory.create(opportunity_assistance_listings=[]) - cfda_update2 = setup_cfda(create_existing=True, opportunity=opportunity3) - cfda_delete_but_current_missing = setup_cfda( - create_existing=False, is_delete=True, opportunity=opportunity3 - ) - - cfda_insert_without_opportunity = setup_cfda( - create_existing=False, source_values={"opportunity_id": 12345678}, opportunity=None - ) - cfda_delete_without_opportunity = setup_cfda( - create_existing=False, source_values={"opportunity_id": 34567890}, opportunity=None - ) - - transform_oracle_data_task.process_assistance_listings() - - validate_assistance_listing(db_session, cfda_insert1) - validate_assistance_listing(db_session, cfda_insert2) - validate_assistance_listing(db_session, cfda_insert3) - validate_assistance_listing(db_session, cfda_update1) - validate_assistance_listing(db_session, cfda_update2) - validate_assistance_listing(db_session, cfda_delete1, expect_in_db=False) - validate_assistance_listing(db_session, cfda_delete2, expect_in_db=False) - - # Records that won't have been fetched - validate_assistance_listing( - db_session, - cfda_update_already_processed1, - expect_in_db=True, - expect_values_to_match=False, - ) - validate_assistance_listing( - db_session, - cfda_update_already_processed2, - expect_in_db=True, - expect_values_to_match=False, - ) - validate_assistance_listing(db_session, cfda_delete_already_processed1, expect_in_db=False) - - validate_assistance_listing(db_session, cfda_delete_but_current_missing, expect_in_db=False) - - validate_assistance_listing(db_session, cfda_insert_without_opportunity, expect_in_db=False) - validate_assistance_listing(db_session, cfda_delete_without_opportunity, expect_in_db=False) - - metrics = transform_oracle_data_task.metrics - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 10 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 3 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_ORPHANED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - # Rerunning finds nothing - no metrics update - transform_oracle_data_task.process_assistance_listings() - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 10 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 3 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_ORPHANED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - def test_process_assistance_listing_orphaned_record( - self, db_session, transform_oracle_data_task - ): - cfda_insert_without_opportunity = setup_cfda( - create_existing=False, source_values={"opportunity_id": 987654321}, opportunity=None - ) - - # Verify it gets marked as transformed - assert cfda_insert_without_opportunity.transformed_at is None - transform_oracle_data_task.process_assistance_listing( - cfda_insert_without_opportunity, None, None - ) - assert cfda_insert_without_opportunity.transformed_at is not None - assert cfda_insert_without_opportunity.transformation_notes == "orphaned_cfda" - assert ( - transform_oracle_data_task.metrics[ - transform_oracle_data_task.Metrics.TOTAL_RECORDS_ORPHANED - ] - == 1 - ) - - # Verify nothing actually gets created - opportunity = ( - db_session.query(Opportunity) - .filter(Opportunity.opportunity_id == cfda_insert_without_opportunity.opportunity_id) - .one_or_none() - ) - assert opportunity is None - assistance_listing = ( - db_session.query(OpportunityAssistanceListing) - .filter( - OpportunityAssistanceListing.opportunity_assistance_listing_id - == cfda_insert_without_opportunity.opp_cfda_id - ) - .one_or_none() - ) - assert assistance_listing is None - - def test_process_assistance_listing_delete_but_current_missing( - self, db_session, transform_oracle_data_task - ): - opportunity = f.OpportunityFactory.create(opportunity_assistance_listings=[]) - delete_but_current_missing = setup_cfda( - create_existing=False, is_delete=True, opportunity=opportunity - ) - - transform_oracle_data_task.process_assistance_listing( - delete_but_current_missing, None, opportunity - ) - - validate_assistance_listing(db_session, delete_but_current_missing, expect_in_db=False) - assert delete_but_current_missing.transformed_at is not None - assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" - - -class TestTransformOpportunitySummary(BaseTestClass): - @pytest.fixture() - def transform_oracle_data_task( - self, db_session, enable_factory_create, truncate_opportunities - ) -> TransformOracleDataTask: - return TransformOracleDataTask(db_session) - - def test_process_opportunity_summaries(self, db_session, transform_oracle_data_task): - # Basic inserts - opportunity1 = f.OpportunityFactory.create( - no_current_summary=True, opportunity_assistance_listings=[] - ) - forecast_insert1 = setup_synopsis_forecast( - is_forecast=True, revision_number=None, create_existing=False, opportunity=opportunity1 - ) - synopsis_insert1 = setup_synopsis_forecast( - is_forecast=False, revision_number=None, create_existing=False, opportunity=opportunity1 - ) - forecast_hist_insert1 = setup_synopsis_forecast( - is_forecast=True, revision_number=1, create_existing=False, opportunity=opportunity1 - ) - synopsis_hist_insert1 = setup_synopsis_forecast( - is_forecast=False, revision_number=1, create_existing=False, opportunity=opportunity1 - ) - - # Mix of updates and inserts, somewhat resembling what happens when summary objects - # get moved to the historical table (we'd update the synopsis/forecast records, and create new historical) - opportunity2 = f.OpportunityFactory.create( - no_current_summary=True, opportunity_assistance_listings=[] - ) - forecast_update1 = setup_synopsis_forecast( - is_forecast=True, revision_number=None, create_existing=True, opportunity=opportunity2 - ) - synopsis_update1 = setup_synopsis_forecast( - is_forecast=False, revision_number=None, create_existing=True, opportunity=opportunity2 - ) - forecast_hist_update1 = setup_synopsis_forecast( - is_forecast=True, revision_number=1, create_existing=True, opportunity=opportunity2 - ) - synopsis_hist_update1 = setup_synopsis_forecast( - is_forecast=False, revision_number=1, create_existing=True, opportunity=opportunity2 - ) - forecast_hist_insert2 = setup_synopsis_forecast( - is_forecast=True, revision_number=2, create_existing=False, opportunity=opportunity2 - ) - synopsis_hist_insert2 = setup_synopsis_forecast( - is_forecast=False, revision_number=2, create_existing=False, opportunity=opportunity2 - ) - - # Mix of inserts, updates, and deletes - opportunity3 = f.OpportunityFactory.create( - no_current_summary=True, opportunity_assistance_listings=[] - ) - forecast_delete1 = setup_synopsis_forecast( - is_forecast=True, - revision_number=None, - create_existing=True, - is_delete=True, - opportunity=opportunity3, - ) - synopsis_delete1 = setup_synopsis_forecast( - is_forecast=False, - revision_number=None, - create_existing=True, - is_delete=True, - opportunity=opportunity3, - ) - forecast_hist_insert3 = setup_synopsis_forecast( - is_forecast=True, revision_number=2, create_existing=False, opportunity=opportunity3 - ) - synopsis_hist_update2 = setup_synopsis_forecast( - is_forecast=False, - revision_number=1, - create_existing=True, - source_values={"action_type": "D"}, - opportunity=opportunity3, - ) - - # A few error scenarios - opportunity4 = f.OpportunityFactory.create( - no_current_summary=True, opportunity_assistance_listings=[] - ) - forecast_delete_but_current_missing = setup_synopsis_forecast( - is_forecast=True, - revision_number=None, - create_existing=False, - is_delete=True, - opportunity=opportunity4, - ) - synopsis_update_invalid_yn_field = setup_synopsis_forecast( - is_forecast=False, - revision_number=None, - create_existing=True, - source_values={"sendmail": "E"}, - opportunity=opportunity4, - ) - synopsis_hist_insert_invalid_yn_field = setup_synopsis_forecast( - is_forecast=False, - revision_number=1, - create_existing=False, - source_values={"cost_sharing": "1"}, - opportunity=opportunity4, - ) - forecast_hist_update_invalid_action_type = setup_synopsis_forecast( - is_forecast=True, - revision_number=2, - create_existing=True, - source_values={"action_type": "X"}, - opportunity=opportunity4, - ) - - transform_oracle_data_task.process_opportunity_summaries() - - validate_opportunity_summary(db_session, forecast_insert1) - validate_opportunity_summary(db_session, synopsis_insert1) - validate_opportunity_summary(db_session, forecast_hist_insert1) - validate_opportunity_summary(db_session, synopsis_hist_insert1) - validate_opportunity_summary(db_session, forecast_hist_insert2) - validate_opportunity_summary(db_session, synopsis_hist_insert2) - validate_opportunity_summary(db_session, forecast_hist_insert3) - - validate_opportunity_summary(db_session, forecast_update1) - validate_opportunity_summary(db_session, synopsis_update1) - validate_opportunity_summary(db_session, forecast_hist_update1) - validate_opportunity_summary(db_session, synopsis_hist_update1) - validate_opportunity_summary(db_session, synopsis_hist_update2) - - validate_opportunity_summary(db_session, forecast_delete1, expect_in_db=False) - validate_opportunity_summary(db_session, synopsis_delete1, expect_in_db=False) - - validate_opportunity_summary( - db_session, forecast_delete_but_current_missing, expect_in_db=False - ) - validate_opportunity_summary( - db_session, - synopsis_update_invalid_yn_field, - expect_in_db=True, - expect_values_to_match=False, - ) - validate_opportunity_summary( - db_session, synopsis_hist_insert_invalid_yn_field, expect_in_db=False - ) - validate_opportunity_summary( - db_session, - forecast_hist_update_invalid_action_type, - expect_in_db=True, - expect_values_to_match=False, - ) - - metrics = transform_oracle_data_task.metrics - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 18 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 7 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 5 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 3 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - # Rerunning will only attempt to re-process the errors, so total+errors goes up by 3 - transform_oracle_data_task.process_opportunity_summaries() - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 21 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 7 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 5 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 6 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - @pytest.mark.parametrize( - "is_forecast,revision_number", [(True, None), (False, None), (True, 5), (False, 10)] - ) - def test_process_opportunity_summary_delete_but_current_missing( - self, db_session, transform_oracle_data_task, is_forecast, revision_number - ): - opportunity = f.OpportunityFactory.create( - no_current_summary=True, opportunity_assistance_listings=[] - ) - delete_but_current_missing = setup_synopsis_forecast( - is_forecast=is_forecast, - revision_number=revision_number, - create_existing=False, - is_delete=True, - opportunity=opportunity, - ) - - transform_oracle_data_task.process_opportunity_summary( - delete_but_current_missing, None, opportunity - ) - - validate_opportunity_summary(db_session, delete_but_current_missing, expect_in_db=False) - assert delete_but_current_missing.transformed_at is not None - assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" - - @pytest.mark.parametrize( - "is_forecast,revision_number,source_values,expected_error", - [ - (True, None, {"sendmail": "z"}, "Unexpected Y/N bool value: z"), - (False, None, {"cost_sharing": "v"}, "Unexpected Y/N bool value: v"), - (True, 5, {"action_type": "T"}, "Unexpected action type value: T"), - (False, 10, {"action_type": "5"}, "Unexpected action type value: 5"), - ], - ) - def test_process_opportunity_summary_invalid_value_errors( - self, - db_session, - transform_oracle_data_task, - is_forecast, - revision_number, - source_values, - expected_error, - ): - opportunity = f.OpportunityFactory.create( - no_current_summary=True, opportunity_assistance_listings=[] - ) - source_summary = setup_synopsis_forecast( - is_forecast=is_forecast, - revision_number=revision_number, - create_existing=False, - opportunity=opportunity, - source_values=source_values, - ) - - with pytest.raises(ValueError, match=expected_error): - transform_oracle_data_task.process_opportunity_summary( - source_summary, None, opportunity - ) - - @pytest.mark.parametrize("is_forecast", [True, False]) - def test_process_opportunity_summary_but_no_opportunity_non_hist( - self, - db_session, - transform_oracle_data_task, - is_forecast, - ): - source_record = setup_synopsis_forecast( - is_forecast=is_forecast, - revision_number=None, - create_existing=False, - opportunity=None, - source_values={"opportunity_id": 12121212}, - ) - - with pytest.raises( - ValueError, - match="Opportunity summary cannot be processed as the opportunity for it does not exist", - ): - transform_oracle_data_task.process_opportunity_summary(source_record, None, None) - - @pytest.mark.parametrize("is_forecast,revision_number", [(True, 10), (False, 9)]) - def test_process_opportunity_summary_but_no_opportunity_hist( - self, - db_session, - transform_oracle_data_task, - is_forecast, - revision_number, - ): - source_record = setup_synopsis_forecast( - is_forecast=is_forecast, - revision_number=revision_number, - create_existing=False, - opportunity=None, - source_values={"opportunity_id": 12121212}, - ) - - transform_oracle_data_task.process_opportunity_summary(source_record, None, None) - - validate_opportunity_summary(db_session, source_record, expect_in_db=False) - assert source_record.transformed_at is not None - assert source_record.transformation_notes == "orphaned_historical_record" - - -class TestTransformApplicantType(BaseTestClass): - @pytest.fixture() - def transform_oracle_data_task( - self, db_session, enable_factory_create, truncate_opportunities - ) -> TransformOracleDataTask: - return TransformOracleDataTask(db_session) - - def test_process_applicant_types(self, db_session, transform_oracle_data_task): - opportunity_summary_forecast = f.OpportunitySummaryFactory.create( - is_forecast=True, revision_number=None, no_link_values=True - ) - forecast_insert1 = setup_applicant_type( - create_existing=False, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="00", - ) - forecast_update1 = setup_applicant_type( - create_existing=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="01", - applicant_type=ApplicantType.COUNTY_GOVERNMENTS, - ) - forecast_update2 = setup_applicant_type( - create_existing=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="02", - applicant_type=ApplicantType.CITY_OR_TOWNSHIP_GOVERNMENTS, - ) - forecast_delete1 = setup_applicant_type( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="04", - applicant_type=ApplicantType.SPECIAL_DISTRICT_GOVERNMENTS, - ) - forecast_delete2 = setup_applicant_type( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="05", - applicant_type=ApplicantType.INDEPENDENT_SCHOOL_DISTRICTS, - ) - forecast_update_already_processed = setup_applicant_type( - create_existing=True, - is_already_processed=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="06", - applicant_type=ApplicantType.PUBLIC_AND_STATE_INSTITUTIONS_OF_HIGHER_EDUCATION, - ) - - opportunity_summary_forecast_hist = f.OpportunitySummaryFactory.create( - is_forecast=True, revision_number=3, no_link_values=True - ) - forecast_hist_insert1 = setup_applicant_type( - create_existing=False, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="07", - ) - forecast_hist_update1 = setup_applicant_type( - create_existing=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="08", - applicant_type=ApplicantType.PUBLIC_AND_INDIAN_HOUSING_AUTHORITIES, - ) - forecast_hist_update2 = setup_applicant_type( - create_existing=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="11", - applicant_type=ApplicantType.OTHER_NATIVE_AMERICAN_TRIBAL_ORGANIZATIONS, - ) - forecast_hist_delete1 = setup_applicant_type( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="12", - applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITH_501C3, - ) - forecast_hist_delete_already_processed = setup_applicant_type( - create_existing=False, - is_delete=True, - is_already_processed=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="13", - ) - forecast_hist_duplicate_insert = setup_applicant_type( - create_existing=False, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="08", - ) - - opportunity_summary_syn = f.OpportunitySummaryFactory.create( - is_forecast=False, revision_number=None, no_link_values=True - ) - syn_insert1 = setup_applicant_type( - create_existing=False, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="20", - ) - syn_insert2 = setup_applicant_type( - create_existing=False, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="21", - ) - syn_update1 = setup_applicant_type( - create_existing=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="22", - applicant_type=ApplicantType.FOR_PROFIT_ORGANIZATIONS_OTHER_THAN_SMALL_BUSINESSES, - ) - syn_update2 = setup_applicant_type( - create_existing=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="23", - applicant_type=ApplicantType.SMALL_BUSINESSES, - ) - syn_delete1 = setup_applicant_type( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="25", - applicant_type=ApplicantType.OTHER, - ) - syn_delete2 = setup_applicant_type( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="99", - applicant_type=ApplicantType.UNRESTRICTED, - ) - syn_delete_but_current_missing = setup_applicant_type( - create_existing=False, - is_delete=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="07", - ) - syn_update_already_processed = setup_applicant_type( - create_existing=True, - is_already_processed=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="08", - applicant_type=ApplicantType.PUBLIC_AND_INDIAN_HOUSING_AUTHORITIES, - ) - - opportunity_summary_syn_hist = f.OpportunitySummaryFactory.create( - is_forecast=False, revision_number=21, no_link_values=True - ) - syn_hist_insert1 = setup_applicant_type( - create_existing=False, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="11", - ) - syn_hist_update1 = setup_applicant_type( - create_existing=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="12", - applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITH_501C3, - ) - syn_hist_update2 = setup_applicant_type( - create_existing=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="13", - applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITHOUT_501C3, - ) - syn_hist_delete1 = setup_applicant_type( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="25", - applicant_type=ApplicantType.OTHER, - ) - syn_hist_delete2 = setup_applicant_type( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="99", - applicant_type=ApplicantType.UNRESTRICTED, - ) - syn_hist_insert_invalid_type = setup_applicant_type( - create_existing=False, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="X", - applicant_type=ApplicantType.STATE_GOVERNMENTS, - ) - - transform_oracle_data_task.process_link_applicant_types() - - validate_applicant_type( - db_session, forecast_insert1, expected_applicant_type=ApplicantType.STATE_GOVERNMENTS - ) - validate_applicant_type( - db_session, - forecast_hist_insert1, - expected_applicant_type=ApplicantType.FEDERALLY_RECOGNIZED_NATIVE_AMERICAN_TRIBAL_GOVERNMENTS, - ) - validate_applicant_type( - db_session, - syn_insert1, - expected_applicant_type=ApplicantType.PRIVATE_INSTITUTIONS_OF_HIGHER_EDUCATION, - ) - validate_applicant_type( - db_session, syn_insert2, expected_applicant_type=ApplicantType.INDIVIDUALS - ) - validate_applicant_type( - db_session, - syn_hist_insert1, - expected_applicant_type=ApplicantType.OTHER_NATIVE_AMERICAN_TRIBAL_ORGANIZATIONS, - ) - - validate_applicant_type( - db_session, forecast_update1, expected_applicant_type=ApplicantType.COUNTY_GOVERNMENTS - ) - validate_applicant_type( - db_session, - forecast_update2, - expected_applicant_type=ApplicantType.CITY_OR_TOWNSHIP_GOVERNMENTS, - ) - validate_applicant_type( - db_session, - forecast_hist_update1, - expected_applicant_type=ApplicantType.PUBLIC_AND_INDIAN_HOUSING_AUTHORITIES, - ) - validate_applicant_type( - db_session, - forecast_hist_update2, - expected_applicant_type=ApplicantType.OTHER_NATIVE_AMERICAN_TRIBAL_ORGANIZATIONS, - ) - validate_applicant_type( - db_session, - syn_update1, - expected_applicant_type=ApplicantType.FOR_PROFIT_ORGANIZATIONS_OTHER_THAN_SMALL_BUSINESSES, - ) - validate_applicant_type( - db_session, syn_update2, expected_applicant_type=ApplicantType.SMALL_BUSINESSES - ) - validate_applicant_type( - db_session, - syn_hist_update1, - expected_applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITH_501C3, - ) - validate_applicant_type( - db_session, - syn_hist_update2, - expected_applicant_type=ApplicantType.NONPROFITS_NON_HIGHER_EDUCATION_WITHOUT_501C3, - ) - - validate_applicant_type(db_session, forecast_delete1, expect_in_db=False) - validate_applicant_type(db_session, forecast_delete2, expect_in_db=False) - validate_applicant_type(db_session, forecast_hist_delete1, expect_in_db=False) - validate_applicant_type(db_session, syn_delete1, expect_in_db=False) - validate_applicant_type(db_session, syn_delete2, expect_in_db=False) - validate_applicant_type(db_session, syn_hist_delete1, expect_in_db=False) - validate_applicant_type(db_session, syn_hist_delete2, expect_in_db=False) - - validate_applicant_type( - db_session, - forecast_update_already_processed, - expected_applicant_type=ApplicantType.PUBLIC_AND_STATE_INSTITUTIONS_OF_HIGHER_EDUCATION, - expect_values_to_match=False, - ) - validate_applicant_type( - db_session, forecast_hist_delete_already_processed, expect_in_db=False - ) - validate_applicant_type( - db_session, - syn_update_already_processed, - expected_applicant_type=ApplicantType.PUBLIC_AND_INDIAN_HOUSING_AUTHORITIES, - expect_values_to_match=False, - ) - - validate_applicant_type( - db_session, syn_delete_but_current_missing, expect_in_db=False, was_processed=True - ) - validate_applicant_type( - db_session, syn_hist_insert_invalid_type, expect_in_db=False, was_processed=False - ) - - validate_applicant_type( - db_session, forecast_hist_duplicate_insert, expect_in_db=False, was_processed=True - ) - - metrics = transform_oracle_data_task.metrics - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 23 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 7 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 5 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 8 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 1 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED] == 1 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - # Rerunning will only attempt to re-process the errors, so total+errors goes up by 1 - transform_oracle_data_task.process_link_applicant_types() - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 24 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 7 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 5 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 8 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DUPLICATE_RECORDS_SKIPPED] == 1 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - @pytest.mark.parametrize( - "is_forecast,revision_number", [(True, None), (False, None), (True, 5), (False, 10)] - ) - def test_process_applicant_types_but_current_missing( - self, db_session, transform_oracle_data_task, is_forecast, revision_number - ): - opportunity_summary = f.OpportunitySummaryFactory.create( - is_forecast=is_forecast, revision_number=revision_number, no_link_values=True - ) - delete_but_current_missing = setup_applicant_type( - create_existing=False, - opportunity_summary=opportunity_summary, - legacy_lookup_value="00", - is_delete=True, - ) - - transform_oracle_data_task.process_link_applicant_type( - delete_but_current_missing, None, opportunity_summary - ) - - validate_applicant_type(db_session, delete_but_current_missing, expect_in_db=False) - assert delete_but_current_missing.transformed_at is not None - assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" - - @pytest.mark.parametrize( - "is_forecast,revision_number,legacy_lookup_value", - [(True, None, "90"), (False, None, "xx"), (True, 5, "50"), (False, 10, "1")], - ) - def test_process_applicant_types_but_invalid_lookup_value( - self, - db_session, - transform_oracle_data_task, - is_forecast, - revision_number, - legacy_lookup_value, - ): - opportunity_summary = f.OpportunitySummaryFactory.create( - is_forecast=is_forecast, revision_number=revision_number, no_link_values=True - ) - insert_but_invalid_value = setup_applicant_type( - create_existing=False, - opportunity_summary=opportunity_summary, - legacy_lookup_value=legacy_lookup_value, - ) - - with pytest.raises(ValueError, match="Unrecognized applicant type"): - transform_oracle_data_task.process_link_applicant_type( - insert_but_invalid_value, None, opportunity_summary - ) - - @pytest.mark.parametrize( - "factory_cls", - [f.StagingTapplicanttypesForecastFactory, f.StagingTapplicanttypesSynopsisFactory], - ) - def test_process_applicant_type_but_no_opportunity_summary_non_hist( - self, - db_session, - transform_oracle_data_task, - factory_cls, - ): - source_record = factory_cls.create(orphaned_record=True) - - with pytest.raises( - ValueError, - match="Applicant type record cannot be processed as the opportunity summary for it does not exist", - ): - transform_oracle_data_task.process_link_applicant_type(source_record, None, None) - - @pytest.mark.parametrize( - "factory_cls", - [f.StagingTapplicanttypesForecastHistFactory, f.StagingTapplicanttypesSynopsisHistFactory], - ) - def test_process_applicant_type_but_no_opportunity_summary_hist( - self, - db_session, - transform_oracle_data_task, - factory_cls, - ): - source_record = factory_cls.create(orphaned_record=True, revision_number=12) - transform_oracle_data_task.process_link_applicant_type(source_record, None, None) - assert source_record.transformed_at is not None - assert source_record.transformation_notes == "orphaned_historical_record" - - -class TestTransformFundingInstrument(BaseTestClass): - @pytest.fixture() - def transform_oracle_data_task( - self, db_session, enable_factory_create, truncate_opportunities - ) -> TransformOracleDataTask: - return TransformOracleDataTask(db_session) - - def test_process_funding_instruments(self, db_session, transform_oracle_data_task): - opportunity_summary_forecast = f.OpportunitySummaryFactory.create( - is_forecast=True, revision_number=None, no_link_values=True - ) - forecast_insert1 = setup_funding_instrument( - create_existing=False, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="CA", - ) - forecast_update1 = setup_funding_instrument( - create_existing=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="G", - funding_instrument=FundingInstrument.GRANT, - ) - forecast_delete1 = setup_funding_instrument( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="PC", - funding_instrument=FundingInstrument.PROCUREMENT_CONTRACT, - ) - forecast_update_already_processed = setup_funding_instrument( - create_existing=True, - is_already_processed=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="O", - funding_instrument=FundingInstrument.OTHER, - ) - - opportunity_summary_forecast_hist = f.OpportunitySummaryFactory.create( - is_forecast=True, revision_number=3, no_link_values=True - ) - forecast_hist_insert1 = setup_funding_instrument( - create_existing=False, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="G", - ) - forecast_hist_delete1 = setup_funding_instrument( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="CA", - funding_instrument=FundingInstrument.COOPERATIVE_AGREEMENT, - ) - forecast_hist_delete_already_processed = setup_funding_instrument( - create_existing=False, - is_delete=True, - is_already_processed=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="O", - ) - syn_delete_but_current_missing = setup_funding_instrument( - create_existing=False, - is_delete=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="PC", - ) - - opportunity_summary_syn = f.OpportunitySummaryFactory.create( - is_forecast=False, revision_number=None, no_link_values=True - ) - syn_insert1 = setup_funding_instrument( - create_existing=False, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="O", - ) - syn_insert2 = setup_funding_instrument( - create_existing=False, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="G", - ) - syn_delete1 = setup_funding_instrument( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="CA", - funding_instrument=FundingInstrument.COOPERATIVE_AGREEMENT, - ) - syn_update_already_processed = setup_funding_instrument( - create_existing=True, - is_already_processed=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="PC", - funding_instrument=FundingInstrument.PROCUREMENT_CONTRACT, - ) - - opportunity_summary_syn_hist = f.OpportunitySummaryFactory.create( - is_forecast=False, revision_number=21, no_link_values=True - ) - syn_hist_insert1 = setup_funding_instrument( - create_existing=False, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="CA", - ) - syn_hist_update1 = setup_funding_instrument( - create_existing=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="O", - funding_instrument=FundingInstrument.OTHER, - ) - syn_hist_delete1 = setup_funding_instrument( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="PC", - funding_instrument=FundingInstrument.PROCUREMENT_CONTRACT, - ) - syn_hist_delete2 = setup_funding_instrument( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="G", - funding_instrument=FundingInstrument.GRANT, - ) - syn_hist_insert_invalid_type = setup_funding_instrument( - create_existing=False, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="X", - ) - - transform_oracle_data_task.process_link_funding_instruments() - - validate_funding_instrument( - db_session, - forecast_insert1, - expected_funding_instrument=FundingInstrument.COOPERATIVE_AGREEMENT, - ) - validate_funding_instrument( - db_session, forecast_hist_insert1, expected_funding_instrument=FundingInstrument.GRANT - ) - validate_funding_instrument( - db_session, syn_insert1, expected_funding_instrument=FundingInstrument.OTHER - ) - validate_funding_instrument( - db_session, syn_insert2, expected_funding_instrument=FundingInstrument.GRANT - ) - validate_funding_instrument( - db_session, - syn_hist_insert1, - expected_funding_instrument=FundingInstrument.COOPERATIVE_AGREEMENT, - ) - - validate_funding_instrument( - db_session, forecast_update1, expected_funding_instrument=FundingInstrument.GRANT - ) - validate_funding_instrument( - db_session, syn_hist_update1, expected_funding_instrument=FundingInstrument.OTHER - ) - - validate_funding_instrument(db_session, forecast_delete1, expect_in_db=False) - validate_funding_instrument(db_session, forecast_hist_delete1, expect_in_db=False) - validate_funding_instrument(db_session, syn_delete1, expect_in_db=False) - validate_funding_instrument(db_session, syn_hist_delete1, expect_in_db=False) - validate_funding_instrument(db_session, syn_hist_delete2, expect_in_db=False) - - validate_funding_instrument( - db_session, - forecast_update_already_processed, - expected_funding_instrument=FundingInstrument.OTHER, - expect_values_to_match=False, - ) - validate_funding_instrument( - db_session, forecast_hist_delete_already_processed, expect_in_db=False - ) - validate_funding_instrument( - db_session, - syn_update_already_processed, - expected_funding_instrument=FundingInstrument.PROCUREMENT_CONTRACT, - expect_values_to_match=False, - ) - - validate_funding_instrument( - db_session, syn_delete_but_current_missing, expect_in_db=False, was_processed=True - ) - validate_funding_instrument( - db_session, syn_hist_insert_invalid_type, expect_in_db=False, was_processed=False - ) - - metrics = transform_oracle_data_task.metrics - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 14 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 5 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 5 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 1 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - # Rerunning will only attempt to re-process the errors, so total+errors goes up by 2 - transform_oracle_data_task.process_link_funding_instruments() - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 15 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 5 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 5 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - @pytest.mark.parametrize( - "is_forecast,revision_number", [(True, None), (False, None), (True, 1), (False, 4)] - ) - def test_process_funding_instrument_but_current_missing( - self, db_session, transform_oracle_data_task, is_forecast, revision_number - ): - opportunity_summary = f.OpportunitySummaryFactory.create( - is_forecast=is_forecast, revision_number=revision_number, no_link_values=True - ) - delete_but_current_missing = setup_funding_instrument( - create_existing=False, - opportunity_summary=opportunity_summary, - legacy_lookup_value="G", - is_delete=True, - ) - - transform_oracle_data_task.process_link_funding_instrument( - delete_but_current_missing, None, opportunity_summary - ) - - validate_funding_instrument(db_session, delete_but_current_missing, expect_in_db=False) - assert delete_but_current_missing.transformed_at is not None - assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" - - @pytest.mark.parametrize( - "is_forecast,revision_number,legacy_lookup_value", - [(True, None, "X"), (False, None, "4"), (True, 5, "Y"), (False, 10, "A")], - ) - def test_process_funding_instrument_but_invalid_lookup_value( - self, - db_session, - transform_oracle_data_task, - is_forecast, - revision_number, - legacy_lookup_value, - ): - opportunity_summary = f.OpportunitySummaryFactory.create( - is_forecast=is_forecast, revision_number=revision_number, no_link_values=True - ) - insert_but_invalid_value = setup_funding_instrument( - create_existing=False, - opportunity_summary=opportunity_summary, - legacy_lookup_value=legacy_lookup_value, - ) - - with pytest.raises(ValueError, match="Unrecognized funding instrument"): - transform_oracle_data_task.process_link_funding_instrument( - insert_but_invalid_value, None, opportunity_summary - ) - - @pytest.mark.parametrize( - "factory_cls", [f.StagingTfundinstrForecastFactory, f.StagingTfundinstrSynopsisFactory] - ) - def test_process_funding_instrument_but_no_opportunity_summary_non_hist( - self, - db_session, - transform_oracle_data_task, - factory_cls, - ): - source_record = factory_cls.create(orphaned_record=True) - - with pytest.raises( - ValueError, - match="Funding instrument record cannot be processed as the opportunity summary for it does not exist", - ): - transform_oracle_data_task.process_link_funding_instrument(source_record, None, None) - - @pytest.mark.parametrize( - "factory_cls", - [f.StagingTfundinstrForecastHistFactory, f.StagingTfundinstrSynopsisHistFactory], - ) - def test_process_funding_instrument_but_no_opportunity_summary_hist( - self, - db_session, - transform_oracle_data_task, - factory_cls, - ): - source_record = factory_cls.create(orphaned_record=True, revision_number=12) - transform_oracle_data_task.process_link_funding_instrument(source_record, None, None) - assert source_record.transformed_at is not None - assert source_record.transformation_notes == "orphaned_historical_record" - - -class TestTransformFundingCategory(BaseTestClass): - @pytest.fixture() - def transform_oracle_data_task( - self, db_session, enable_factory_create, truncate_opportunities - ) -> TransformOracleDataTask: - return TransformOracleDataTask(db_session) - - def test_process_funding_categories(self, db_session, transform_oracle_data_task): - opportunity_summary_forecast = f.OpportunitySummaryFactory.create( - is_forecast=True, revision_number=None, no_link_values=True - ) - forecast_insert1 = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="RA", - ) - forecast_insert2 = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="AG", - ) - forecast_update1 = setup_funding_category( - create_existing=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="AR", - funding_category=FundingCategory.ARTS, - ) - forecast_delete1 = setup_funding_category( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="BC", - funding_category=FundingCategory.BUSINESS_AND_COMMERCE, - ) - forecast_delete2 = setup_funding_category( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="CD", - funding_category=FundingCategory.COMMUNITY_DEVELOPMENT, - ) - forecast_update_already_processed = setup_funding_category( - create_existing=True, - is_already_processed=True, - opportunity_summary=opportunity_summary_forecast, - legacy_lookup_value="CP", - funding_category=FundingCategory.CONSUMER_PROTECTION, - ) - - opportunity_summary_forecast_hist = f.OpportunitySummaryFactory.create( - is_forecast=True, revision_number=3, no_link_values=True - ) - forecast_hist_insert1 = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="DPR", - ) - forecast_hist_insert2 = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="ED", - ) - forecast_hist_update1 = setup_funding_category( - create_existing=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="ELT", - funding_category=FundingCategory.EMPLOYMENT_LABOR_AND_TRAINING, - ) - forecast_hist_delete1 = setup_funding_category( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="EN", - funding_category=FundingCategory.ENERGY, - ) - forecast_hist_delete_already_processed = setup_funding_category( - create_existing=False, - is_delete=True, - is_already_processed=True, - opportunity_summary=opportunity_summary_forecast_hist, - legacy_lookup_value="ENV", - ) - - opportunity_summary_syn = f.OpportunitySummaryFactory.create( - is_forecast=False, revision_number=None, no_link_values=True - ) - syn_insert1 = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="FN", - ) - syn_insert2 = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="HL", - ) - syn_update1 = setup_funding_category( - create_existing=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="HO", - funding_category=FundingCategory.HOUSING, - ) - syn_delete1 = setup_funding_category( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="HU", - funding_category=FundingCategory.HUMANITIES, - ) - syn_delete2 = setup_funding_category( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="IIJ", - funding_category=FundingCategory.INFRASTRUCTURE_INVESTMENT_AND_JOBS_ACT, - ) - syn_delete_but_current_missing = setup_funding_category( - create_existing=False, - is_delete=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="IS", - ) - syn_update_already_processed = setup_funding_category( - create_existing=True, - is_already_processed=True, - opportunity_summary=opportunity_summary_syn, - legacy_lookup_value="ISS", - funding_category=FundingCategory.INCOME_SECURITY_AND_SOCIAL_SERVICES, - ) - - opportunity_summary_syn_hist = f.OpportunitySummaryFactory.create( - is_forecast=False, revision_number=21, no_link_values=True - ) - syn_hist_insert1 = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="LJL", - ) - syn_hist_insert2 = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="NR", - ) - syn_hist_insert3 = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="OZ", - ) - syn_hist_update1 = setup_funding_category( - create_existing=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="RD", - funding_category=FundingCategory.REGIONAL_DEVELOPMENT, - ) - - syn_hist_delete1 = setup_funding_category( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="ST", - funding_category=FundingCategory.SCIENCE_TECHNOLOGY_AND_OTHER_RESEARCH_AND_DEVELOPMENT, - ) - syn_hist_delete2 = setup_funding_category( - create_existing=True, - is_delete=True, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="T", - funding_category=FundingCategory.TRANSPORTATION, - ) - syn_hist_insert_invalid_type = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary_syn_hist, - legacy_lookup_value="XYZ", - funding_category=FundingCategory.HEALTH, - ) - - transform_oracle_data_task.process_link_funding_categories() - - validate_funding_category( - db_session, forecast_insert1, expected_funding_category=FundingCategory.RECOVERY_ACT - ) - validate_funding_category( - db_session, forecast_insert2, expected_funding_category=FundingCategory.AGRICULTURE - ) - validate_funding_category( - db_session, - forecast_hist_insert1, - expected_funding_category=FundingCategory.DISASTER_PREVENTION_AND_RELIEF, - ) - validate_funding_category( - db_session, forecast_hist_insert2, expected_funding_category=FundingCategory.EDUCATION - ) - validate_funding_category( - db_session, syn_insert1, expected_funding_category=FundingCategory.FOOD_AND_NUTRITION - ) - validate_funding_category( - db_session, syn_insert2, expected_funding_category=FundingCategory.HEALTH - ) - validate_funding_category( - db_session, - syn_hist_insert1, - expected_funding_category=FundingCategory.LAW_JUSTICE_AND_LEGAL_SERVICES, - ) - validate_funding_category( - db_session, - syn_hist_insert2, - expected_funding_category=FundingCategory.NATURAL_RESOURCES, - ) - validate_funding_category( - db_session, - syn_hist_insert3, - expected_funding_category=FundingCategory.OPPORTUNITY_ZONE_BENEFITS, - ) - - validate_funding_category( - db_session, forecast_update1, expected_funding_category=FundingCategory.ARTS - ) - validate_funding_category( - db_session, - forecast_hist_update1, - expected_funding_category=FundingCategory.EMPLOYMENT_LABOR_AND_TRAINING, - ) - validate_funding_category( - db_session, syn_update1, expected_funding_category=FundingCategory.HOUSING - ) - validate_funding_category( - db_session, - syn_hist_update1, - expected_funding_category=FundingCategory.REGIONAL_DEVELOPMENT, - ) - - validate_funding_category(db_session, forecast_delete1, expect_in_db=False) - validate_funding_category(db_session, forecast_delete2, expect_in_db=False) - validate_funding_category(db_session, forecast_hist_delete1, expect_in_db=False) - validate_funding_category(db_session, syn_delete1, expect_in_db=False) - validate_funding_category(db_session, syn_delete2, expect_in_db=False) - validate_funding_category(db_session, syn_hist_delete1, expect_in_db=False) - validate_funding_category(db_session, syn_hist_delete2, expect_in_db=False) - - validate_funding_category( - db_session, - forecast_update_already_processed, - expected_funding_category=FundingCategory.CONSUMER_PROTECTION, - expect_values_to_match=False, - ) - validate_funding_category( - db_session, forecast_hist_delete_already_processed, expect_in_db=False - ) - validate_funding_category( - db_session, - syn_update_already_processed, - expected_funding_category=FundingCategory.INCOME_SECURITY_AND_SOCIAL_SERVICES, - expect_values_to_match=False, - ) - - validate_funding_category( - db_session, syn_delete_but_current_missing, expect_in_db=False, was_processed=True - ) - validate_funding_category( - db_session, syn_hist_insert_invalid_type, expect_in_db=False, was_processed=False - ) - - metrics = transform_oracle_data_task.metrics - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 22 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 7 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 9 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 4 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 1 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - # Rerunning will only attempt to re-process the errors, so total+errors goes up by 1 - transform_oracle_data_task.process_link_funding_categories() - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_PROCESSED] == 23 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_DELETED] == 7 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_INSERTED] == 9 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_RECORDS_UPDATED] == 4 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_ERROR_COUNT] == 2 - assert metrics[transform_oracle_data_task.Metrics.TOTAL_DELETE_ORPHANS_SKIPPED] == 1 - - @pytest.mark.parametrize( - "is_forecast,revision_number", [(True, None), (False, None), (True, 1), (False, 70)] - ) - def test_process_funding_category_but_current_missing( - self, db_session, transform_oracle_data_task, is_forecast, revision_number - ): - opportunity_summary = f.OpportunitySummaryFactory.create( - is_forecast=is_forecast, revision_number=revision_number, no_link_values=True - ) - delete_but_current_missing = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary, - legacy_lookup_value="00", - is_delete=True, - ) - - transform_oracle_data_task.process_link_funding_category( - delete_but_current_missing, None, opportunity_summary - ) - - validate_funding_category(db_session, delete_but_current_missing, expect_in_db=False) - assert delete_but_current_missing.transformed_at is not None - assert delete_but_current_missing.transformation_notes == "orphaned_delete_record" - - @pytest.mark.parametrize( - "is_forecast,revision_number,legacy_lookup_value", - [(True, None, "ab"), (False, None, "cd"), (True, 5, "ef"), (False, 10, "Ag")], - ) - def test_process_funding_category_but_invalid_lookup_value( - self, - db_session, - transform_oracle_data_task, - is_forecast, - revision_number, - legacy_lookup_value, - ): - opportunity_summary = f.OpportunitySummaryFactory.create( - is_forecast=is_forecast, revision_number=revision_number, no_link_values=True - ) - insert_but_invalid_value = setup_funding_category( - create_existing=False, - opportunity_summary=opportunity_summary, - legacy_lookup_value=legacy_lookup_value, - ) - - with pytest.raises(ValueError, match="Unrecognized funding category"): - transform_oracle_data_task.process_link_funding_category( - insert_but_invalid_value, None, opportunity_summary - ) - - @pytest.mark.parametrize( - "factory_cls", [f.StagingTfundactcatForecastFactory, f.StagingTfundactcatSynopsisFactory] - ) - def test_process_funding_category_but_no_opportunity_summary_non_hist( - self, - db_session, - transform_oracle_data_task, - factory_cls, - ): - source_record = factory_cls.create(orphaned_record=True) - - with pytest.raises( - ValueError, - match="Funding category record cannot be processed as the opportunity summary for it does not exist", - ): - transform_oracle_data_task.process_link_funding_category(source_record, None, None) - - @pytest.mark.parametrize( - "factory_cls", - [f.StagingTfundactcatForecastHistFactory, f.StagingTfundactcatSynopsisHistFactory], - ) - def test_process_funding_category_but_no_opportunity_summary_hist( - self, - db_session, - transform_oracle_data_task, - factory_cls, - ): - source_record = factory_cls.create(orphaned_record=True, revision_number=12) - transform_oracle_data_task.process_link_funding_category(source_record, None, None) - assert source_record.transformed_at is not None - assert source_record.transformation_notes == "orphaned_historical_record" +from tests.src.data_migration.transformation.conftest import ( + get_summary_from_source, + setup_cfda, + setup_opportunity, + setup_synopsis_forecast, + validate_applicant_type, + validate_assistance_listing, + validate_funding_category, + validate_funding_instrument, + validate_opportunity, + validate_opportunity_summary, + validate_summary_and_nested, +) class TestTransformFullRunTask(BaseTestClass): @@ -2686,7 +516,6 @@ def test_delete_opportunity_with_deleted_children(self, db_session, transform_or db_session.expire_all() transform_oracle_data_task.run_task() - print(transform_oracle_data_task.metrics) # verify everything is not in the DB validate_opportunity(db_session, opportunity, expect_in_db=False) diff --git a/api/tests/src/data_migration/transformation/test_transform_util.py b/api/tests/src/data_migration/transformation/test_transform_util.py index 05edcdf3c..52c117aef 100644 --- a/api/tests/src/data_migration/transformation/test_transform_util.py +++ b/api/tests/src/data_migration/transformation/test_transform_util.py @@ -93,7 +93,7 @@ def test_convert_yn_boolean_unexpected_value(value): @pytest.mark.parametrize( - "value,expected_value", [("D", True), ("U", False), ("", None), (None, None)] + "value,expected_value", [("D", True), ("U", False), ("", False), (None, False)] ) def test_convert_action_type_to_is_deleted(value, expected_value): assert transform_util.convert_action_type_to_is_deleted(value) == expected_value