Skip to content

Commit

Permalink
[DH-5457] Add MS SQL Server support (#422)
Browse files Browse the repository at this point in the history
  • Loading branch information
jcjc712 authored and DishenWang2023 committed May 7, 2024
1 parent 2230e78 commit 396eb5f
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 2 deletions.
3 changes: 2 additions & 1 deletion dataherald/db_scanner/repository/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def get_all_tables_by_db(self, query: dict) -> List[TableDescription]:
def save_table_info(self, table_info: TableDescription) -> TableDescription:
table_info_dict = table_info.dict(exclude={"id"})
table_info_dict["db_connection_id"] = str(table_info.db_connection_id)
table_info_dict["table_name"] = table_info.table_name.lower()
table_info_dict = {
k: v for k, v in table_info_dict.items() if v is not None and v != []
}
Expand All @@ -57,7 +58,7 @@ def save_table_info(self, table_info: TableDescription) -> TableDescription:
DB_COLLECTION,
{
"db_connection_id": table_info_dict["db_connection_id"],
"table_name": table_info.table_name,
"table_name": table_info_dict["table_name"],
},
table_info_dict,
)
Expand Down
38 changes: 38 additions & 0 deletions dataherald/db_scanner/services/sql_server_scanner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from overrides import override
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.sql.schema import Column

from dataherald.db_scanner.models.types import QueryHistory
from dataherald.db_scanner.services.abstract_scanner import AbstractScanner
from dataherald.sql_database.base import SQLDatabase

MIN_CATEGORY_VALUE = 1
MAX_CATEGORY_VALUE = 100
MAX_LOGS = 5_000


class SqlServerScanner(AbstractScanner):
@override
def cardinality_values(self, column: Column, db_engine: SQLDatabase) -> list | None:
try:
count_query = f"SELECT APPROX_COUNT_DISTINCT({column.name}) FROM {column.table.name}" # noqa: S608
rs = db_engine.engine.execute(count_query).fetchall()
except SQLAlchemyError:
return None

if (
len(rs) > 0
and len(rs[0]) > 0
and MIN_CATEGORY_VALUE < rs[0][0] <= MAX_CATEGORY_VALUE
):
cardinality_query = f"SELECT TOP 101 {column.name} FROM (SELECT DISTINCT {column.name} FROM [{column.table.name}]) AS subquery;" # noqa: E501 S608
cardinality = db_engine.engine.execute(cardinality_query).fetchall()
return [str(category[0]) for category in cardinality]

return None

@override
def get_logs(
self, table: str, db_engine: SQLDatabase, db_connection_id: str # noqa: ARG002
) -> list[QueryHistory]:
return []
2 changes: 2 additions & 0 deletions dataherald/db_scanner/sqlalchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from dataherald.db_scanner.services.big_query_scanner import BigQueryScanner
from dataherald.db_scanner.services.postgre_sql_scanner import PostgreSqlScanner
from dataherald.db_scanner.services.snowflake_scanner import SnowflakeScanner
from dataherald.db_scanner.services.sql_server_scanner import SqlServerScanner
from dataherald.sql_database.base import SQLDatabase
from dataherald.types import ScannerRequest

Expand Down Expand Up @@ -273,6 +274,7 @@ def scan(
"snowflake": SnowflakeScanner,
"bigquery": BigQueryScanner,
"psycopg2": PostgreSqlScanner,
"pymssql": SqlServerScanner,
}
self.scanner_service = BaseScanner()
if db_engine.engine.driver in services.keys():
Expand Down
2 changes: 1 addition & 1 deletion dataherald/sql_database/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def get_tables_and_views(self) -> List[str]:
rows = inspector.get_table_names() + inspector.get_view_names()
if len(rows) == 0:
raise EmptyDBError("The db is empty it could be a permission issue")
return rows
return [row.lower() for row in rows]

@property
def dialect(self) -> str:
Expand Down
15 changes: 15 additions & 0 deletions docs/api.create_database_connection.rst
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,21 @@ Example::

"connection_uri": mysql+pymysql://admin:[email protected]:3306/my-database

Microsoft SQL Server
^^^^^^^^^^^^

Uri structure::

"connection_uri": mssql+pymssql://<user>:<password>@<host>:<port>/<db-name>

Example::

"connection_uri": mssql+pymssql://admin:[email protected]:1433/my-database

To specify a schema other than the default dbo, execute the following command::

ALTER USER <your_username> WITH DEFAULT_SCHEMA = <your_schema_name>;

Databricks
^^^^^^^^^^^^

Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,4 @@ duckdb==0.9.1
PyMySQL==1.1.0
clickhouse-sqlalchemy==0.2.5
astrapy==0.7.6
pymssql==2.2.11

0 comments on commit 396eb5f

Please sign in to comment.