Skip to content

Commit

Permalink
Merge pull request #61 from CCRI-POPROX/karl/feature/onboarding-export
Browse files Browse the repository at this point in the history
Add demographics and onboarding interests to dataset exports
  • Loading branch information
karlhigley authored Nov 14, 2024
2 parents aeffa81 + aa21665 commit 938afb7
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 2 deletions.
3 changes: 2 additions & 1 deletion src/poprox_storage/repositories/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from poprox_storage.repositories.account_interest_log import DbAccountInterestRepository
from poprox_storage.repositories.account_interest_log import DbAccountInterestRepository, S3AccountInterestRepository
from poprox_storage.repositories.accounts import DbAccountRepository
from poprox_storage.repositories.articles import (
DbArticleRepository,
Expand Down Expand Up @@ -56,6 +56,7 @@ def wrapper(event, context):
"DbNewsletterRepository",
"DbPlacementRepository",
"DbQualtricsSurveyRepository",
"S3AccountInterestRepository",
"S3ArticleRepository",
"S3ClicksRepository",
"S3DemographicsRepository",
Expand Down
30 changes: 30 additions & 0 deletions src/poprox_storage/repositories/account_interest_log.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import logging
from datetime import datetime
from uuid import UUID

from sqlalchemy import Connection, func, select

from poprox_concepts.domain import AccountInterest
from poprox_storage.repositories.data_stores.db import DatabaseRepository
from poprox_storage.repositories.data_stores.s3 import S3Repository

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
Expand Down Expand Up @@ -82,3 +84,31 @@ def fetch_topic_preferences(self, account_id: UUID) -> list[AccountInterest]:
for row in results
]
return results


class S3AccountInterestRepository(S3Repository):
def store_as_parquet(
self,
interests: list[AccountInterest],
bucket_name: str,
file_prefix: str,
start_time: datetime = None,
):
records = convert_to_records(interests)
return self._write_records_as_parquet(records, bucket_name, file_prefix, start_time)


def convert_to_records(interests: list[AccountInterest]) -> list[dict]:
records = []
for interest in interests:
records.append(
{
"account_id": str(interest.account_id),
"entity_id": str(interest.entity_id),
"entity_name": interest.entity_name,
"preference": interest.preference,
"frequency": interest.frequency,
}
)

return records
3 changes: 2 additions & 1 deletion src/poprox_storage/repositories/demographics.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,12 @@ def convert_to_records(demographics: list[Demographics]) -> list[dict]:
for demo in demographics:
records.append(
{
"account_id": demo.account_id,
"account_id": str(demo.account_id),
"birth_year": demo.birth_year,
"education": demo.education,
"gender": demo.gender,
"race": demo.race,
"zip3": demo.zip3,
}
)

Expand Down

0 comments on commit 938afb7

Please sign in to comment.