Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DH5669/store db dialect in database connection collection #447

Merged
merged 7 commits into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions dataherald/scripts/populate_dialect_db_connection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import dataherald.config
from dataherald.config import System
from dataherald.db import DB
from dataherald.sql_database.models.types import DatabaseConnection
from dataherald.utils.encrypt import FernetEncrypt

if __name__ == "__main__":
settings = dataherald.config.Settings()
system = System(settings)
system.start()
storage = system.instance(DB)
fernet_encrypt = FernetEncrypt()
database_connections = storage.find_all("database_connections")
for database_connection in database_connections:
if not database_connection.get("dialect"):
decrypted_uri = fernet_encrypt.decrypt(
database_connection["connection_uri"]
)
dialect_prefix = DatabaseConnection.get_dialect(decrypted_uri)
dialect = DatabaseConnection.set_dialect(dialect_prefix)
storage.update_or_create(
"database_connections",
{"_id": database_connection["_id"]},
{"dialect": dialect},
)
33 changes: 28 additions & 5 deletions dataherald/sql_database/models/types.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import re
from datetime import datetime
from enum import Enum
from typing import Any

from pydantic import BaseModel, BaseSettings, Extra, Field, validator
Expand Down Expand Up @@ -75,9 +76,23 @@ class InvalidURIFormatError(Exception):
pass


class SupportedDialects(Enum):
POSTGRES = "postgresql"
MYSQL = "mysql"
MSSQL = "mssql"
DATABRICKS = "databricks"
SNOWFLAKE = "snowflake"
CLICKHOUSE = "clickhouse"
AWSATHENA = "awsathena"
DUCKDB = "duckdb"
BIGQUERY = "bigquery"
SQLITE = "sqlite"


class DatabaseConnection(BaseModel):
id: str | None
alias: str
dialect: SupportedDialects | None
use_ssh: bool = False
connection_uri: str | None
path_to_credentials_file: str | None
Expand All @@ -88,21 +103,29 @@ class DatabaseConnection(BaseModel):
created_at: datetime = Field(default_factory=datetime.now)

@classmethod
def validate_uri(cls, input_string):
def get_dialect(cls, input_string):
pattern = r"([^:/]+):/+([^/]+)/([^/]+)"
match = re.match(pattern, input_string)
if not match:
raise InvalidURIFormatError(f"Invalid URI format: {input_string}")
return match.group(1)

@classmethod
def set_dialect(cls, input_string):
for dialect in SupportedDialects:
if dialect.value in input_string:
return dialect.value
return None

@validator("connection_uri", pre=True, always=True)
def connection_uri_format(cls, value: str):
def connection_uri_format(cls, value: str, values):
MohammadrezaPourreza marked this conversation as resolved.
Show resolved Hide resolved
fernet_encrypt = FernetEncrypt()
try:
fernet_encrypt.decrypt(value)
return value
except Exception:
cls.validate_uri(value)
return fernet_encrypt.encrypt(value)
dialect_prefix = cls.get_dialect(value)
values["dialect"] = cls.set_dialect(dialect_prefix)
value = fernet_encrypt.encrypt(value)
return value

@validator("llm_api_key", pre=True, always=True)
Expand Down
1 change: 1 addition & 0 deletions docs/api.create_database_connection.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ HTTP 201 code response
{
"id": "64f251ce9614e0e94b0520bc",
"alias": "string_999",
dialect: "postgresql",
"use_ssh": true,
"connection_uri": "gAAAAABk8lHQNAUn5XARb94Q8H1OfHpVzOtzP3b2LCpwxUsNCe7LGkwkN8FX-IF3t65oI5mTzgDMR0BY2lzvx55gO0rxlQxRDA==",
"path_to_credentials_file": "string",
Expand Down
2 changes: 2 additions & 0 deletions docs/api.list_database_connections.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ HTTP 200 code response
{
"id": "64dfa0e103f5134086f7090c",
"alias": "databricks",
"dialect": "databricks",
"use_ssh": false,
"connection_uri": "foooAABk91Q4wjoR2h07GR7_72BdQnxi8Rm6i_EjyS-mzz_o2c3RAWaEqnlUvkK5eGD5kUfE5xheyivl1Wfbk_EM7CgV4SvdLmOOt7FJV-3kG4zAbar=",
"path_to_credentials_file": null,
Expand All @@ -27,6 +28,7 @@ HTTP 200 code response
{
"id": "64e52c5f7d6dc4bc510d6d28",
"alias": "postgres",
"dialect": "postgres",
"use_ssh": true,
"connection_uri": null,
"path_to_credentials_file": "bar-LWxPdFcjQw9lU7CeK_2ELR3jGBq0G_uQ7E2rfPLk2RcFR4aDO9e2HmeAQtVpdvtrsQ_0zjsy9q7asdsadXExYJ0g==",
Expand Down
1 change: 1 addition & 0 deletions docs/api.update_database_connection.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ HTTP 200 code response
{
"id": "64f251ce9614e0e94b0520bc",
"alias": "string_999",
"dialect": "sqlite",
"use_ssh": false,
"connection_uri": "gAAAAABk8lHQNAUn5XARb94Q8H1OfHpVzOtzP3b2LCpwxUsNCe7LGkwkN8FX-IF3t65oI5mTzgDMR0BY2lzvx55gO0rxlQxRDA==",
"path_to_credentials_file": "string",
Expand Down
Loading