diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 88aa271..10d243d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,17 +14,17 @@ repos: - id: mixed-line-ending - repo: https://github.com/astral-sh/ruff-pre-commit - rev: 'v0.1.3' + rev: 'v0.1.5' hooks: - id: ruff args: ['--fix'] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.3 + rev: v0.1.5 hooks: - id: ruff-format - repo: https://github.com/pre-commit/mirrors-prettier - rev: v3.0.3 + rev: v3.1.0 hooks: - id: prettier diff --git a/offsets_db_data/projects.py b/offsets_db_data/projects.py index 7aeffd8..8d3e6d5 100644 --- a/offsets_db_data/projects.py +++ b/offsets_db_data/projects.py @@ -15,6 +15,56 @@ ) +def add_first_issuance_and_retirement_dates( + *, credits_data: pd.DataFrame, projects_data: pd.DataFrame +) -> pd.DataFrame: + """ + Add the first issuance date of carbon credits to each project in the projects DataFrame. + + Parameters + ---------- + credits_data : pd.DataFrame + A pandas DataFrame containing credit issuance data with columns 'project_id', 'transaction_date', and 'transaction_type'. + projects_data : pd.DataFrame + A pandas DataFrame containing project data with a 'project_id' column. + + Returns + ------- + projects_data : pd.DataFrame + A pandas DataFrame which is the original projects DataFrame with two additional columns 'first_issuance_at' representing + the first issuance date of each project and 'first_retirement_at' representing the first retirement date of each project. + """ + + first_issuance = ( + credits_data[credits_data['transaction_type'] == 'issuance'] + .groupby('project_id')['transaction_date'] + .min() + .reset_index() + ) + first_retirement = ( + credits_data[credits_data['transaction_type'] != 'issuance'] + .groupby('project_id')['transaction_date'] + .min() + .reset_index() + ) + + # Merge the projects DataFrame with the first issuance and retirement dates + projects_with_dates = pd.merge(projects_data, first_issuance, on='project_id', how='left') + projects_with_dates = pd.merge( + projects_with_dates, first_retirement, on='project_id', how='left' + ) + + # Rename the merged columns for clarity + projects_with_dates = projects_with_dates.rename( + columns={ + 'transaction_date_x': 'first_issuance_at', + 'transaction_date_y': 'first_retirement_at', + } + ) + + return projects_with_dates + + def add_retired_and_issued_totals( *, credits_data: pd.DataFrame, projects_data: pd.DataFrame ) -> pd.DataFrame: @@ -112,6 +162,8 @@ def filter_project_data(data: pd.DataFrame) -> pd.DataFrame: 'retired': float, 'issued': float, 'listed_at': pd.DatetimeTZDtype(tz='UTC'), + 'first_issuance_at': pd.DatetimeTZDtype(tz='UTC'), + 'first_retirement_at': pd.DatetimeTZDtype(tz='UTC'), } for filtered_column in filtered_columns_dtypes: