Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Commit

Permalink
[Task]: Finish adding Postgres Integration to Analytics Library (#72)
Browse files Browse the repository at this point in the history
Fixes #45

* update `config.py` database url
* add function in `cli.py`
* updated packages in `poetry.lock`

N/A

Row created manually in the database alongside a row created via
`test_connection`
![Screen Shot 2024-06-11 at 1 49 53
PM](https://github.com/navapbc/simpler-grants-gov/assets/37313082/b83afad8-5fe1-404f-adf3-c94945740bbe)
  • Loading branch information
aplybeah authored and acouch committed Sep 18, 2024
1 parent c3503b7 commit 1055985
Show file tree
Hide file tree
Showing 7 changed files with 804 additions and 500 deletions.
3 changes: 3 additions & 0 deletions analytics/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
For more information visit: https://www.dynaconf.com/
"""

from dynaconf import Dynaconf, Validator, ValidationError

settings = Dynaconf(
Expand All @@ -16,6 +17,8 @@
# looks for config vars in the following files
# with vars in .secrets.toml overriding vars in settings.toml
settings_files=["settings.toml", ".secrets.toml"],
# merge the settings found in all files
merge_enabled= True,
# add validators for our required config vars
validators=[
Validator("SLACK_BOT_TOKEN", must_exist=True),
Expand Down
1,159 changes: 660 additions & 499 deletions analytics/poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions analytics/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pydantic = "^2.0.3"
python = "^3.11"
slack-sdk = "^3.23.0"
typer = { extras = ["all"], version = "^0.9.0" }
sqlalchemy = "^2.0.30"

[tool.poetry.group.dev.dependencies]
black = "^23.7.0"
Expand Down
4 changes: 4 additions & 0 deletions analytics/settings.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
POSTGRES_NAME = "app"
POSTGRES_HOST = "0.0.0.0"
POSTGRES_USER = "app"
POSTGRES_PORT = 5432
29 changes: 28 additions & 1 deletion analytics/src/analytics/cli.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
# pylint: disable=C0415
"""Expose a series of CLI entrypoints for the analytics package."""
import logging
from pathlib import Path
from typing import Annotated, Optional

import typer
from slack_sdk import WebClient
from sqlalchemy import text

from analytics.datasets.deliverable_tasks import DeliverableTasks
from analytics.datasets.sprint_board import SprintBoard
from analytics.integrations import github, slack
from analytics.integrations import db, github, slack
from analytics.metrics.base import BaseMetric, Unit
from analytics.metrics.burndown import SprintBurndown
from analytics.metrics.burnup import SprintBurnup
from analytics.metrics.percent_complete import DeliverablePercentComplete

logger = logging.getLogger(__name__)

# fmt: off
# Instantiate typer options with help text for the commands below
SPRINT_FILE_ARG = typer.Option(help="Path to file with exported sprint data")
Expand Down Expand Up @@ -122,6 +126,29 @@ def calculate_sprint_burnup(
)


@export_app.command(name="test_connection")
def test_connection() -> None:
"""Test function that ensures the DB connection works."""
engine = db.get_db()
# connection method from sqlalchemy
connection = engine.connect()

# Test INSERT INTO action
result = connection.execute(
text(
"INSERT INTO audit_log (topic,timestamp, end_timestamp, user_id, details)"
"VALUES('test','2024-06-11 10:41:15','2024-06-11 10:54:15',87654,'test from command');",
),
)
# Test SELECT action
result = connection.execute(text("SELECT * FROM audit_log WHERE user_id=87654;"))
for row in result:
print(row)
# commits the transaction to the db
connection.commit()
result.close()


@metrics_app.command(name="deliverable_percent_complete")
def calculate_deliverable_percent_complete(
sprint_file: Annotated[str, SPRINT_FILE_ARG],
Expand Down
79 changes: 79 additions & 0 deletions analytics/src/analytics/datasets/base.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# ruff: noqa: E501
# pylint: disable=C0301
"""Base class for all datasets which provides an interface for metrics."""
from pathlib import Path
from typing import Self

import pandas as pd
from sqlalchemy import Engine


class BaseDataset:
Expand All @@ -22,6 +25,82 @@ def from_dict(cls, data: list[dict]) -> Self:
"""Load the dataset from a list of python dictionaries representing records."""
return cls(df=pd.DataFrame(data))

def to_sql(
self,
output_table: str,
engine: Engine,
*,
replace_table: bool = True,
) -> None:
"""
Write the contents of a pandas DataFrame to a SQL table.
This function takes a pandas DataFrame (`self.df`), an output table name (`output_table`),
and a SQLAlchemy Engine object (`engine`) as required arguments. It optionally accepts
a `replace_table` argument (default: True) that determines how existing data in the
target table is handled.
**Parameters:**
* self (required): The instance of the class containing the DataFrame (`self.df`)
to be written to the database.
* output_table (str, required): The name of the table in the database where the
data will be inserted.
* engine (sqlalchemy.engine.Engine, required): A SQLAlchemy Engine object representing
the connection to the database.
* replace_table (bool, default=True):
* If True (default), the function will completely replace the contents of the
existing table with the data from the DataFrame. (if_exists="replace")
* If False, the data from the DataFrame will be appended to the existing table.
(if_exists="append")
**Returns:**
* None
**Raises:**
* Potential exceptions raised by the underlying pandas.to_sql function, such as
database connection errors or errors related to data type mismatches.
"""
if replace_table:
self.df.to_sql(output_table, engine, if_exists="replace", index=False)
else:
self.df.to_sql(output_table, engine, if_exists="append", index=False)

@classmethod
def from_sql(
cls,
source_table: str,
engine: Engine,
) -> Self:
"""
Read data from a SQL table into a pandas DataFrame and creates an instance of the current class.
This function takes a source table name (`source_table`) and a SQLAlchemy Engine object (`engine`) as required arguments.
It utilizes pandas.read_sql to retrieve the data from the database and then creates a new instance of the current class (`cls`) initialized with the resulting DataFrame (`df`).
**Parameters:**
* cls (class, required): The class that will be instantiated with the data from the
SQL table. This allows for creating objects of the same type as the function is called on.
* source_table (str, required): The name of the table in the database from which the
data will be read.
* engine (sqlalchemy.engine.Engine, required): A SQLAlchemy Engine object representing
the connection to the database.
**Returns:**
* Self: A new instance of the current class (`cls`) initialized with the DataFrame
containing the data from the SQL table.
**Raises:**
* Potential exceptions raised by the underlying pandas.read_sql function, such as
database connection errors or errors related to data type mismatches.
"""
return cls(df=pd.read_sql(source_table, engine))

def to_csv(
self,
output_file: Path,
Expand Down
29 changes: 29 additions & 0 deletions analytics/src/analytics/integrations/db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# pylint: disable=invalid-name, line-too-long
"""Get a connection to the database using a SQLAlchemy engine object."""

from sqlalchemy import Engine, create_engine

from config import settings


# The variables used in the connection url are set in settings.toml and
# .secrets.toml. These can be overridden with the custom prefix defined in config.py: "ANALYTICS".
# e.g. `export ANALYTICS_POSTGRES_USER=new_usr`.
# Docs: https://www.dynaconf.com/envvars/
def get_db() -> Engine:
"""
Get a connection to the database using a SQLAlchemy engine object.
This function retrieves the database connection URL from the configuration
and creates a SQLAlchemy engine object.
Yields
------
sqlalchemy.engine.Engine
A SQLAlchemy engine object representing the connection to the database.
"""
return create_engine(
f"postgresql+psycopg://{settings.postgres_user}:{settings.postgres_password}@{settings.postgres_host}:{settings.postgres_port}",
pool_pre_ping=True,
hide_parameters=True,
)

0 comments on commit 1055985

Please sign in to comment.