Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Commit

Permalink
[Issue #106] Transform agency data
Browse files Browse the repository at this point in the history
  • Loading branch information
chouinar committed Jul 11, 2024
1 parent 1648b09 commit 203b1ec
Show file tree
Hide file tree
Showing 10 changed files with 895 additions and 35 deletions.
422 changes: 422 additions & 0 deletions api/src/data_migration/transformation/subtask/transform_agency.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
APPLICANT_TYPE = "applicant_type"
FUNDING_CATEGORY = "funding_category"
FUNDING_INSTRUMENT = "funding_instrument"
AGENCY = "agency"


class Metrics(StrEnum):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import src.data_migration.transformation.transform_constants as transform_constants
from src.adapters import db
from src.data_migration.transformation.subtask.transform_agency import TransformAgency
from src.data_migration.transformation.subtask.transform_applicant_type import (
TransformApplicantType,
)
Expand Down Expand Up @@ -37,6 +38,7 @@ class TransformOracleDataTaskConfig(PydanticBaseEnvConfig):
enable_applicant_type: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_APPLICANT_TYPE
enable_funding_category: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_FUNDING_CATEGORY
enable_funding_instrument: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_FUNDING_INSTRUMENT
enable_agency: bool = False # TRANSFORM_ORACLE_DATA_ENABLE_AGENCY


class TransformOracleDataTask(Task):
Expand Down Expand Up @@ -76,3 +78,6 @@ def run_task(self) -> None:

if self.transform_config.enable_funding_instrument:
TransformFundingInstrument(self).run()

if self.transform_config.enable_agency:
TransformAgency(self).run()
69 changes: 48 additions & 21 deletions api/src/data_migration/transformation/transform_util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
from datetime import datetime
from typing import Tuple

from src.constants.lookup_constants import (
ApplicantType,
Expand Down Expand Up @@ -377,38 +378,47 @@ def convert_est_timestamp_to_utc(timestamp: datetime | None) -> datetime | None:
return datetime_util.adjust_timezone(aware_timestamp, "UTC")


def transform_update_create_timestamp(
source: StagingBase, target: TimestampMixin, log_extra: dict | None = None
) -> None:
# Convert the source timestamps to UTC
# Note: the type ignores are because created_date/last_upd_date are added
# on the individual class definitions, not the base class - due to how
# we need to maintain the column order of the legacy system.
# Every legacy table does have these columns.
created_timestamp = convert_est_timestamp_to_utc(source.created_date) # type: ignore[attr-defined]
updated_timestamp = convert_est_timestamp_to_utc(source.last_upd_date) # type: ignore[attr-defined]
def get_create_update_timestamps(
source_created_date: datetime | None,
source_last_upd_date: datetime | None,
log_extra: dict | None = None,
) -> Tuple[datetime, datetime]:
created_timestamp = convert_est_timestamp_to_utc(source_created_date)
updated_timestamp = convert_est_timestamp_to_utc(source_last_upd_date)

if created_timestamp is not None:
target.created_at = created_timestamp
else:
# This is incredibly rare, but possible - because our system requires
# we set something, we'll default to the current time and log a warning.
# This is incredibly rare, but possible - because our system requires
# we set something, we'll default to the current time and log a warning.
if created_timestamp is None:
if log_extra is None:
log_extra = {}

logger.warning(
f"{source.__class__} does not have a created_date timestamp set, setting value to now.",
"Record does not have a created_date timestamp set, assuming value to be now.",
extra=log_extra,
)
target.created_at = datetime_util.utcnow()
created_timestamp = datetime_util.utcnow()

if updated_timestamp is not None:
target.updated_at = updated_timestamp
else:
if updated_timestamp is None:
# In the legacy system, they don't set whether something was updated
# until it receives an update. We always set the value, and on initial insert
# want it to be the same as the created_at.
target.updated_at = target.created_at
updated_timestamp = created_timestamp

return created_timestamp, updated_timestamp


def transform_update_create_timestamp(
source: StagingBase, target: TimestampMixin, log_extra: dict | None = None
) -> None:
# Convert the source timestamps to UTC
# Note: the type ignores are because created_date/last_upd_date are added
# on the individual class definitions, not the base class - due to how
# we need to maintain the column order of the legacy system.
# Every legacy table does have these columns.
created_timestamp, updated_timestamp = get_create_update_timestamps(source.created_date, source.last_upd_date, log_extra) # type: ignore[attr-defined]

target.created_at = created_timestamp
target.updated_at = updated_timestamp


TRUTHY = {"Y", "Yes"}
Expand All @@ -431,6 +441,23 @@ def convert_yn_bool(value: str | None) -> bool | None:
raise ValueError("Unexpected Y/N bool value: %s" % value)


def convert_true_false_bool(value: str | None) -> bool | None:
if value is None or value == "":
return None

return value == "TRUE"


def convert_null_like_to_none(value: str | None) -> str | None:
if value is None:
return None

if value.lower() == "null":
return None

return value


def convert_action_type_to_is_deleted(value: str | None) -> bool:
# Action type can be U (update) or D (delete)
# however many older records seem to not have this set at all
Expand Down
12 changes: 6 additions & 6 deletions api/src/db/models/agency_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,12 @@ class Agency(ApiSchemaTable, TimestampMixin):
ldap_group: Mapped[str]
description: Mapped[str]
label: Mapped[str]
is_multilevel_agency: Mapped[bool]
is_multiproject: Mapped[bool]
has_system_to_system_certificate: Mapped[bool]
can_view_packages_in_grace_period: Mapped[bool]
is_image_workspace_enabled: Mapped[bool]
is_validation_workspace_enabled: Mapped[bool]
is_multilevel_agency: Mapped[bool] = mapped_column(default=False)
is_multiproject: Mapped[bool] = mapped_column(default=False)
has_system_to_system_certificate: Mapped[bool] = mapped_column(default=False)
can_view_packages_in_grace_period: Mapped[bool] = mapped_column(default=False)
is_image_workspace_enabled: Mapped[bool] = mapped_column(default=False)
is_validation_workspace_enabled: Mapped[bool] = mapped_column(default=False)

link_agency_download_file_types: Mapped[list["LinkAgencyDownloadFileType"]] = relationship(
back_populates="agency", uselist=True, cascade="all, delete-orphan"
Expand Down
4 changes: 4 additions & 0 deletions api/src/db/models/staging/staging_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,7 @@ class StagingParamMixin:
)

transformation_notes: Mapped[str | None]

@property
def is_modified(self) -> bool:
return self.transformed_at is None
11 changes: 11 additions & 0 deletions api/src/db/models/staging/tgroups.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,14 @@

class Tgroups(StagingBase, tgroups_mixin.TGroupsMixin, StagingParamMixin):
__tablename__ = "tgroups"

def get_agency_code(self) -> str:
# The keyfield is formatted as:
# Agency-<AGENCY CODE>-<field name>
# so to get the agency code, we need to parse out the middle bit
# so we split and drop the first + last field and rejoin it.
tokens = self.keyfield.split("-")
return "-".join(tokens[1:-1])

def get_field_name(self) -> str:
return self.keyfield.split("-")[-1]
92 changes: 91 additions & 1 deletion api/tests/src/data_migration/transformation/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from src.constants.lookup_constants import ApplicantType, FundingCategory, FundingInstrument
from src.data_migration.transformation.transform_oracle_data_task import TransformOracleDataTask
from src.db.models import staging
from src.db.models.agency_models import Agency
from src.db.models.opportunity_models import (
LinkOpportunitySummaryApplicantType,
LinkOpportunitySummaryFundingCategory,
Expand Down Expand Up @@ -299,13 +300,42 @@ def setup_funding_category(
return source_funding_category


def setup_agency(
agency_code: str,
create_existing: bool,
is_already_processed: bool = False,
deleted_fields: set | None = None,
already_processed_fields: set | None = None,
source_values: dict | None = None,
):
if source_values is None:
source_values = {}

tgroups = f.create_tgroups_agency(
agency_code,
is_already_processed=is_already_processed,
deleted_fields=deleted_fields,
already_processed_fields=already_processed_fields,
**source_values,
)

if create_existing:
f.AgencyFactory.create(agency_code=agency_code)

return tgroups


def validate_matching_fields(
source, destination, fields: list[Tuple[str, str]], expect_all_to_match: bool
):
mismatched_fields = []

for source_field, destination_field in fields:
source_value = getattr(source, source_field)
if isinstance(source, dict):
source_value = source.get(source_field)
else:
source_value = getattr(source, source_field)

destination_value = getattr(destination, destination_field)

# Some fields that we copy in are datetime typed (although behave as dates and we convert as such)
Expand Down Expand Up @@ -657,3 +687,63 @@ def validate_funding_category(
[("creator_id", "created_by"), ("last_upd_id", "updated_by")],
expect_values_to_match,
)


AGENCY_FIELD_MAPPING = [
("AgencyName", "agency_name"),
("AgencyCode", "sub_agency_code"),
("AgencyCFDA", "assistance_listing_number"),
("ldapGp", "ldap_group"),
("description", "description"),
("label", "label"),
]

AGENCY_CONTACT_FIELD_MAPPING = [
("AgencyContactName", "contact_name"),
("AgencyContactAddress1", "address_line_1"),
("AgencyContactCity", "city"),
("AgencyContactState", "state"),
("AgencyContactZipCode", "zip_code"),
("AgencyContactTelephone", "phone_number"),
("AgencyContactEMail", "primary_email"),
]


def validate_agency(
db_session,
source_tgroups: list[staging.tgroups.Tgroups],
expect_in_db: bool = True,
expect_values_to_match: bool = True,
is_test_agency: bool = False,
non_matching_fields: set | None = None,
):
agency_code = source_tgroups[0].get_agency_code()
agency = db_session.query(Agency).filter(Agency.agency_code == agency_code).one_or_none()

if not expect_in_db:
assert agency is None
return

assert agency is not None

# need to restructure the tgroups into a dict
tgroup_map = {tgroup.get_field_name(): tgroup.value for tgroup in source_tgroups}

if non_matching_fields is not None:
agency_field_mapping = [m for m in AGENCY_FIELD_MAPPING if m[0] not in non_matching_fields]
else:
agency_field_mapping = AGENCY_FIELD_MAPPING

validate_matching_fields(tgroup_map, agency, agency_field_mapping, expect_values_to_match)
assert agency.is_test_agency == is_test_agency

if non_matching_fields is not None:
agency_contact_field_mapping = [
m for m in AGENCY_CONTACT_FIELD_MAPPING if m[0] not in non_matching_fields
]
else:
agency_contact_field_mapping = AGENCY_CONTACT_FIELD_MAPPING

validate_matching_fields(
tgroup_map, agency.agency_contact_info, agency_contact_field_mapping, expect_values_to_match
)
Loading

0 comments on commit 203b1ec

Please sign in to comment.