From 7a188c41c333205cee58bad494131dc84c6d0644 Mon Sep 17 00:00:00 2001 From: Krishna Gopal Date: Tue, 30 Jan 2024 13:35:38 -0600 Subject: [PATCH] Add table column statistics to text2sql query info (#1402) * Add table column statistics to text2sql query info --- package.json | 2 +- .../lib/ai_assistant/tools/table_schema.py | 7 ++++++- querybook/server/models/metastore.py | 16 ++++++++++------ 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/package.json b/package.json index ddadce730..43a574e77 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "querybook", - "version": "3.29.1", + "version": "3.30.0", "description": "A Big Data Webapp", "private": true, "scripts": { diff --git a/querybook/server/lib/ai_assistant/tools/table_schema.py b/querybook/server/lib/ai_assistant/tools/table_schema.py index f3d2cf218..a9726e60d 100644 --- a/querybook/server/lib/ai_assistant/tools/table_schema.py +++ b/querybook/server/lib/ai_assistant/tools/table_schema.py @@ -1,9 +1,9 @@ from typing import Callable from app.db import with_session +from lib.vector_store import get_vector_store from logic import metastore as m_logic from models.metastore import DataTable, DataTableColumn -from lib.vector_store import get_vector_store def get_table_documentation(table: DataTable) -> str: @@ -33,6 +33,11 @@ def _get_column(column: DataTableColumn) -> dict[str, str]: column_json["description"] = column.data_elements[0].description column_json["data_element"] = column.data_elements[0].name + if len(column.statistics): + column_json["statistics"] = { + stat.key: stat.value for stat in column.statistics if stat.value is not None + } + return column_json diff --git a/querybook/server/models/metastore.py b/querybook/server/models/metastore.py index 221455d7a..bdde3d514 100644 --- a/querybook/server/models/metastore.py +++ b/querybook/server/models/metastore.py @@ -1,18 +1,17 @@ import sqlalchemy as sql -from sqlalchemy.orm import backref, relationship - from app import db from const.db import ( - utf8mb4_name_length, - name_length, - now, description_length, - url_length, mediumtext_length, + name_length, + now, type_length, + url_length, + utf8mb4_name_length, ) from const.metastore import DataTableWarningSeverity from lib.sqlalchemy import CRUDMixin, TruncateString +from sqlalchemy.orm import backref, relationship Base = db.Base @@ -301,6 +300,11 @@ class DataTableColumn(TruncateString("name", "type", "comment"), Base): data_elements = relationship( "DataElement", secondary="data_element_association", uselist=True, viewonly=True ) + statistics = relationship( + "DataTableColumnStatistics", + uselist=True, + viewonly=True, + ) def to_dict(self, include_table=False): column_dict = {