Skip to content

Commit

Permalink
DH-4608 Refactor Dataherald Engine API and resources (#151)
Browse files Browse the repository at this point in the history
* DH-4608 Replace db_alias by db_connection_id

* Improve database-connections endpoints

* Improve table-descriptions endpoints

* Improve queries endpoints

* Add filters for table descriptions endpoint

* Fix nl-query-responses endpoints and sql-query-executions endpoint

* Use DatabaseConnectionRepository

* Fix nl-query-responses POST endpoint

* Fix tests

* Add scrip to update version

* Update documentation and readme file

* DH-4598 ported changes in other branch for docs into the refactor branch

* remove reference to contributing from modules

---------

Co-authored-by: Amir A. Zohrenejad <[email protected]>
  • Loading branch information
jcjc712 and aazo11 authored Sep 14, 2023
1 parent c167a43 commit 86467b9
Show file tree
Hide file tree
Showing 52 changed files with 1,122 additions and 360 deletions.
16 changes: 9 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ Dataherald is a natural language-to-SQL engine built for enterprise-level questi

This project is undergoing swift development, and as such, the API may be subject to change at any time.

If you would like to learn more, you can join the <a href="https://discord.gg/A59Uxyy2k9" target="_blank">Discord</a> or <a href="https://dataherald.readthedocs.io/" target="_blank">read the docs</a>.

## Overview

### Background
Expand Down Expand Up @@ -162,7 +164,7 @@ curl -X 'POST' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"db_alias": "my_db_alias_identifier",
"alias": "my_db_alias",
"use_ssh": false,
"connection_uri": "sqlite:///mydb.db",
"path_to_credentials_file": "my-folder/my-secret.json" # Required for bigquery
Expand All @@ -176,7 +178,7 @@ curl -X 'POST' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"db_alias": "my_db_alias_identifier",
"alias": "my_db_alias",
"use_ssh": true,
"ssh_settings": {
"db_name": "db_name",
Expand Down Expand Up @@ -254,7 +256,7 @@ curl -X 'POST' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"db_alias": "db_name",
"db_connection_id": "db_connection_id",
"table_name": "table_name"
}'
```
Expand All @@ -267,7 +269,7 @@ Once a database was scanned you can use this endpoint to retrieve the tables nam

```
curl -X 'GET' \
'<host>/api/v1/scanned-databases?db_alias=databricks' \
'<host>/api/v1/scanned-databases?db_connection_id=64dfa0e103f5134086f7090c' \
-H 'accept: application/json'
```

Expand All @@ -289,11 +291,11 @@ curl -X 'POST' \
```

#### Adding string descriptions
In addition to database table_info and golden_sql, you can add strings describing tables and/or columns to the context store manually from the `PATCH /api/v1/scanned-db/{db_name}/{table_name}` endpoint
In addition to database table_info and golden_sql, you can add strings describing tables and/or columns to the context store manually from the `PATCH /api/v1/scanned-db/{db_connection_id}/{table_name}` endpoint

```
curl -X 'PATCH' \
'<host>/api/v1/scanned-db/db_name/table_name' \
'<host>/api/v1/scanned-db/db_connection_id/table_name' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
Expand Down Expand Up @@ -322,7 +324,7 @@ curl -X 'POST' \
-H 'Content-Type: application/json' \
-d '{
"question": "Your question in natural language",
"db_alias": "db_name"
"db_connection_id": "db_connection_id"
}'
```

Expand Down
46 changes: 30 additions & 16 deletions dataherald/api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from abc import ABC, abstractmethod
from typing import Any, List
from typing import List

from dataherald.api.types import Query
from dataherald.config import Component
from dataherald.eval import Evaluation
from dataherald.db_scanner.models.types import TableSchemaDetail
from dataherald.sql_database.models.types import DatabaseConnection, SSHSettings
from dataherald.types import (
DatabaseConnectionRequest,
Expand All @@ -12,7 +12,6 @@
GoldenRecordRequest,
NLQueryResponse,
QuestionRequest,
ScannedDBResponse,
ScannerRequest,
TableDescriptionRequest,
UpdateQueryRequest,
Expand All @@ -34,42 +33,57 @@ def answer_question(self, question_request: QuestionRequest) -> NLQueryResponse:
pass

@abstractmethod
def connect_database(
def create_database_connection(
self, database_connection_request: DatabaseConnectionRequest
) -> DatabaseConnection:
pass

@abstractmethod
def add_description(
def list_database_connections(self) -> list[DatabaseConnection]:
pass

@abstractmethod
def update_database_connection(
self,
db_connection_id: str,
database_connection_request: DatabaseConnectionRequest,
) -> DatabaseConnection:
pass

@abstractmethod
def update_table_description(
self,
db_name: str,
table_name: str,
table_description_id: str,
table_description_request: TableDescriptionRequest,
) -> bool:
) -> TableSchemaDetail:
pass

@abstractmethod
def add_golden_records(
self, golden_records: List[GoldenRecordRequest]
) -> List[GoldenRecord]:
def list_table_descriptions(
self, db_connection_id: str | None = None, table_name: str | None = None
) -> list[TableSchemaDetail]:
pass

@abstractmethod
def execute_query(self, query: Query) -> tuple[str, dict]:
def add_golden_records(
self, golden_records: List[GoldenRecordRequest]
) -> List[GoldenRecord]:
pass

@abstractmethod
def update_query(self, query_id: str, query: UpdateQueryRequest) -> NLQueryResponse:
def execute_sql_query(self, query: Query) -> tuple[str, dict]:
pass

@abstractmethod
def execute_temp_query(
self, query_id: str, query: ExecuteTempQueryRequest
def update_nl_query_response(
self, query_id: str, query: UpdateQueryRequest
) -> NLQueryResponse:
pass

@abstractmethod
def get_scanned_databases(self, db_alias: str) -> ScannedDBResponse:
def get_nl_query_response(
self, query_request: ExecuteTempQueryRequest
) -> NLQueryResponse:
pass

@abstractmethod
Expand Down
Loading

0 comments on commit 86467b9

Please sign in to comment.