diff --git a/lib/dl_connector_ydb/LICENSE b/lib/dl_connector_ydb/LICENSE new file mode 100644 index 000000000..74ba5f6c7 --- /dev/null +++ b/lib/dl_connector_ydb/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2023 YANDEX LLC + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/lib/dl_connector_ydb/README.md b/lib/dl_connector_ydb/README.md new file mode 100644 index 000000000..0b5a93cef --- /dev/null +++ b/lib/dl_connector_ydb/README.md @@ -0,0 +1 @@ +# dl_connector_ydb diff --git a/lib/dl_connector_ydb/dl_connector_ydb/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb/__init__.py new file mode 100644 index 000000000..0bfdfc461 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/__init__.py @@ -0,0 +1,6 @@ +try: + from ydb_proto_stubs_import import init_ydb_stubs + + init_ydb_stubs() +except ImportError: + pass # stubs will be initialized from the ydb package diff --git a/lib/dl_connector_ydb/dl_connector_ydb/api/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb/api/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/api_schema/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/api_schema/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/api_schema/connection.py b/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/api_schema/connection.py new file mode 100644 index 000000000..7973fe6bc --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/api_schema/connection.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from marshmallow import fields as ma_fields + +from dl_api_connector.api_schema.connection_base import ConnectionSchema +from dl_api_connector.api_schema.connection_base_fields import ( + cache_ttl_field, + secret_string_field, +) +from dl_api_connector.api_schema.connection_mixins import RawSQLLevelMixin +from dl_api_connector.api_schema.connection_sql import DBHostField +from dl_api_connector.api_schema.extras import FieldExtra + +from dl_connector_ydb.core.ydb.us_connection import YDBConnection + + +class YDBConnectionSchema(RawSQLLevelMixin, ConnectionSchema): + TARGET_CLS = YDBConnection + + host = DBHostField(attribute="data.host", required=True, bi_extra=FieldExtra(editable=True)) + port = ma_fields.Integer(attribute="data.port", required=True, bi_extra=FieldExtra(editable=True)) + db_name = ma_fields.String(attribute="data.db_name", required=True, bi_extra=FieldExtra(editable=True)) + + token = secret_string_field(attribute="data.token", required=False, allow_none=True) + cache_ttl_sec = cache_ttl_field(attribute="data.cache_ttl_sec") diff --git a/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/connection_form/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/connection_form/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/connection_form/form_config.py b/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/connection_form/form_config.py new file mode 100644 index 000000000..653e13bcf --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/connection_form/form_config.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +from enum import Enum +from typing import ( + Optional, + Sequence, + Type, +) + +from dl_api_commons.base_models import TenantDef +from dl_api_connector.form_config.models.api_schema import ( + FormActionApiSchema, + FormApiSchema, + FormFieldApiSchema, +) +from dl_api_connector.form_config.models.base import ( + ConnectionForm, + ConnectionFormFactory, + ConnectionFormMode, +) +from dl_api_connector.form_config.models.common import ( + CommonFieldName, + OAuthApplication, +) +import dl_api_connector.form_config.models.rows as C +from dl_api_connector.form_config.models.rows.base import FormRow +from dl_api_connector.form_config.models.shortcuts.rows import RowConstructor +from dl_configs.connectors_settings import ConnectorSettingsBase + +from dl_connector_ydb.api.ydb.connection_info import YDBConnectionInfoProvider +from dl_connector_ydb.core.ydb.settings import YDBConnectorSettings + + +class YDBOAuthApplication(OAuthApplication): + ydb = "ydb" + + +class YDBConnectionFormFactory(ConnectionFormFactory): + def _get_base_common_api_schema_items(self, names_source: Type[Enum]) -> list[FormFieldApiSchema]: + return [ + FormFieldApiSchema(name=names_source.host, required=True), + FormFieldApiSchema(name=names_source.port, required=True), + FormFieldApiSchema(name=names_source.db_name, required=True), + ] + + def _get_default_db_section(self, rc: RowConstructor, connector_settings: YDBConnectorSettings) -> list[FormRow]: + return [ + C.OAuthTokenRow( + name=CommonFieldName.token, + fake_value="******" if self.mode == ConnectionFormMode.edit else None, + application=YDBOAuthApplication.ydb, + ), + rc.host_row(default_value=connector_settings.DEFAULT_HOST_VALUE), + rc.port_row(default_value="2135"), + rc.db_name_row(), + ] + + def _get_base_edit_api_schema(self) -> FormActionApiSchema: + return FormActionApiSchema( + items=[ + FormFieldApiSchema(name=CommonFieldName.cache_ttl_sec, nullable=True), + FormFieldApiSchema(name=CommonFieldName.raw_sql_level), + ] + ) + + def _get_base_create_api_schema(self, edit_api_schema: FormActionApiSchema) -> FormActionApiSchema: + return FormActionApiSchema( + items=[ + *edit_api_schema.items, + *self._get_top_level_create_api_schema_items(), + ] + ) + + def _get_base_check_api_schema(self, common_api_schema_items: list[FormFieldApiSchema]) -> FormActionApiSchema: + return FormActionApiSchema( + items=[ + *common_api_schema_items, + *self._get_top_level_check_api_schema_items(), + ] + ) + + def _get_base_form_config( + self, + db_section_rows: Sequence[FormRow], + create_api_schema: FormActionApiSchema, + edit_api_schema: FormActionApiSchema, + check_api_schema: FormActionApiSchema, + rc: RowConstructor, + ) -> ConnectionForm: + return ConnectionForm( + title=YDBConnectionInfoProvider.get_title(self._localizer), + rows=[ + *db_section_rows, + C.CacheTTLRow(name=CommonFieldName.cache_ttl_sec), + rc.raw_sql_level_row(), + ], + api_schema=FormApiSchema( + create=create_api_schema if self.mode == ConnectionFormMode.create else None, + edit=edit_api_schema if self.mode == ConnectionFormMode.edit else None, + check=check_api_schema, + ), + ) + + def get_form_config( + self, + connector_settings: Optional[ConnectorSettingsBase], + tenant: Optional[TenantDef], + ) -> ConnectionForm: + assert connector_settings is not None and isinstance(connector_settings, YDBConnectorSettings) + rc = RowConstructor(localizer=self._localizer) + + edit_api_schema = self._get_base_edit_api_schema() + common_api_schema_items = self._get_base_common_api_schema_items(names_source=CommonFieldName) + db_section_rows = self._get_default_db_section(rc=rc, connector_settings=connector_settings) + common_api_schema_items.append( + FormFieldApiSchema(name=CommonFieldName.token, required=self.mode == ConnectionFormMode.create) + ) + edit_api_schema.items.extend(common_api_schema_items) + + create_api_schema = self._get_base_create_api_schema(edit_api_schema=edit_api_schema) + check_api_schema = self._get_base_check_api_schema(common_api_schema_items=common_api_schema_items) + return self._get_base_form_config( + db_section_rows=db_section_rows, + create_api_schema=create_api_schema, + edit_api_schema=edit_api_schema, + check_api_schema=check_api_schema, + rc=rc, + ) diff --git a/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/connection_info.py b/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/connection_info.py new file mode 100644 index 000000000..0c6f84a4d --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/connection_info.py @@ -0,0 +1,7 @@ +from dl_api_connector.connection_info import ConnectionInfoProvider + +from dl_connector_ydb.api.ydb.i18n.localizer import Translatable + + +class YDBConnectionInfoProvider(ConnectionInfoProvider): + title_translatable = Translatable("label_connector-ydb") diff --git a/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/connector.py b/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/connector.py new file mode 100644 index 000000000..dd945884e --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/connector.py @@ -0,0 +1,53 @@ +from dl_api_connector.api_schema.source_base import ( + SQLDataSourceSchema, + SQLDataSourceTemplateSchema, + SubselectDataSourceSchema, + SubselectDataSourceTemplateSchema, +) +from dl_api_connector.connector import ( + ApiConnectionDefinition, + ApiConnector, + ApiSourceDefinition, +) + +from dl_connector_ydb.api.ydb.api_schema.connection import YDBConnectionSchema +from dl_connector_ydb.api.ydb.connection_form.form_config import YDBConnectionFormFactory +from dl_connector_ydb.api.ydb.connection_info import YDBConnectionInfoProvider +from dl_connector_ydb.api.ydb.i18n.localizer import CONFIGS +from dl_connector_ydb.core.ydb.connector import ( + YDBCoreConnectionDefinition, + YDBCoreConnector, + YDBCoreSourceDefinition, + YDBCoreSubselectSourceDefinition, +) +from dl_connector_ydb.formula.constants import DIALECT_NAME_YDB + + +class YDBApiTableSourceDefinition(ApiSourceDefinition): + core_source_def_cls = YDBCoreSourceDefinition + api_schema_cls = SQLDataSourceSchema + template_api_schema_cls = SQLDataSourceTemplateSchema + + +class YDBApiSubselectSourceDefinition(ApiSourceDefinition): + core_source_def_cls = YDBCoreSubselectSourceDefinition + api_schema_cls = SubselectDataSourceSchema + template_api_schema_cls = SubselectDataSourceTemplateSchema + + +class YDBApiConnectionDefinition(ApiConnectionDefinition): + core_conn_def_cls = YDBCoreConnectionDefinition + api_generic_schema_cls = YDBConnectionSchema + info_provider_cls = YDBConnectionInfoProvider + form_factory_cls = YDBConnectionFormFactory + + +class YDBApiConnector(ApiConnector): + core_connector_cls = YDBCoreConnector + connection_definitions = (YDBApiConnectionDefinition,) + source_definitions = ( + YDBApiTableSourceDefinition, + YDBApiSubselectSourceDefinition, + ) + formula_dialect_name = DIALECT_NAME_YDB + translation_configs = frozenset(CONFIGS) diff --git a/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/i18n/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/i18n/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/i18n/localizer.py b/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/i18n/localizer.py new file mode 100644 index 000000000..d9dca6626 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/api/ydb/i18n/localizer.py @@ -0,0 +1,28 @@ +import os + +import attr + +from dl_i18n.localizer_base import Translatable as BaseTranslatable +from dl_i18n.localizer_base import TranslationConfig + +import dl_connector_ydb as package + + +DOMAIN = f"{package.__name__}" +CONFIGS = [ + TranslationConfig( + path=os.path.relpath(os.path.join(os.path.dirname(__file__), "../../../locales")), + domain=DOMAIN, + locale="en", + ), + TranslationConfig( + path=os.path.relpath(os.path.join(os.path.dirname(__file__), "../../../locales")), + domain=DOMAIN, + locale="ru", + ), +] + + +@attr.s +class Translatable(BaseTranslatable): + domain: str = attr.ib(default=DOMAIN) diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb/core/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/base/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb/core/base/__init__.py new file mode 100644 index 000000000..acd7a25f4 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/core/base/__init__.py @@ -0,0 +1 @@ +""" Shared connector logic for YDB-YQL-ScanQuery and YQ connectors """ diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/base/adapter.py b/lib/dl_connector_ydb/dl_connector_ydb/core/base/adapter.py new file mode 100644 index 000000000..f82f4c59a --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/core/base/adapter.py @@ -0,0 +1,126 @@ +from __future__ import annotations + +import datetime +import logging +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Optional, + Tuple, + Type, + TypeVar, +) + +import attr +import sqlalchemy as sa + +from dl_core import exc +from dl_core.connection_executors.adapters.adapters_base_sa_classic import BaseClassicAdapter +from dl_core.connection_models import TableIdent + + +if TYPE_CHECKING: + from dl_core.connection_executors.models.connection_target_dto_base import BaseSQLConnTargetDTO # noqa: F401 + from dl_core.connection_executors.models.db_adapter_data import ExecutionStepCursorInfo + from dl_core.connection_models import DBIdent + from dl_core.connectors.base.error_transformer import DBExcKWArgs + from dl_core.db.native_type import SATypeSpec + + +LOGGER = logging.getLogger(__name__) + +_DBA_YQL_BASE_DTO_TV = TypeVar("_DBA_YQL_BASE_DTO_TV", bound="BaseSQLConnTargetDTO") + + +@attr.s +class YQLAdapterBase(BaseClassicAdapter[_DBA_YQL_BASE_DTO_TV]): + def _get_db_version(self, db_ident: DBIdent) -> Optional[str]: + # Not useful. + return None + + def _is_table_exists(self, table_ident: TableIdent) -> bool: + # TODO?: use get_columns for this. + return True + + _type_code_to_sa = { + None: sa.TEXT, # fallback + "Int8": sa.INTEGER, + "Int16": sa.INTEGER, + "Int32": sa.INTEGER, + "Int64": sa.INTEGER, + "Uint8": sa.INTEGER, + "Uint16": sa.INTEGER, + "Uint32": sa.INTEGER, + "Uint64": sa.INTEGER, + "Float": sa.FLOAT, + "Double": sa.FLOAT, + "String": sa.TEXT, + "Utf8": sa.TEXT, + "Json": sa.TEXT, + "Yson": sa.TEXT, + "Uuid": sa.TEXT, + "Date": sa.DATE, + "Datetime": sa.DATETIME, + "Timestamp": sa.DATETIME, + "Interval": sa.INTEGER, + "Bool": sa.BOOLEAN, + } + _type_code_to_sa = { + **_type_code_to_sa, + # Nullable types: + **{name + "?": sa_type for name, sa_type in _type_code_to_sa.items() if name}, + } + _type_code_to_sa_prefixes = { + "Decimal(": sa.FLOAT, + } + + def _cursor_column_to_sa(self, cursor_col: Tuple[Any, ...], require: bool = True) -> Optional[SATypeSpec]: + result = super()._cursor_column_to_sa(cursor_col) + if result is not None: + return result + # Fallback: prefix + type_code = cursor_col[1] + for type_prefix, sa_type in self._type_code_to_sa_prefixes.items(): + if type_code.startswith(type_prefix): + return sa_type + if require: + raise ValueError(f"Unknown type_code: {type_code!r}") + return None + + _subselect_cursor_info_where_false: ClassVar[bool] = False + + @staticmethod + def _convert_bytes(value: bytes) -> str: + return value.decode("utf-8", errors="replace") + + @staticmethod + def _convert_ts(value: int) -> datetime.datetime: + return datetime.datetime.utcfromtimestamp(value / 1e6).replace(tzinfo=datetime.timezone.utc) + + def _get_row_converters(self, cursor_info: ExecutionStepCursorInfo) -> Tuple[Optional[Callable[[Any], Any]], ...]: + type_names_norm = [col[1].lower().strip("?") for col in cursor_info.raw_cursor_description] + return tuple( + self._convert_bytes + if type_name_norm == "string" + else self._convert_ts + if type_name_norm == "timestamp" + else None + for type_name_norm in type_names_norm + ) + + @classmethod + def make_exc( # TODO: Move to ErrorTransformer + cls, wrapper_exc: Exception, orig_exc: Optional[Exception], debug_compiled_query: Optional[str] + ) -> Tuple[Type[exc.DatabaseQueryError], DBExcKWArgs]: + exc_cls, kw = super().make_exc(wrapper_exc, orig_exc, debug_compiled_query) + + try: + message = wrapper_exc.message + except Exception: + pass + else: + kw["db_message"] = kw.get("db_message") or message + + return exc_cls, kw diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/base/data_source.py b/lib/dl_connector_ydb/dl_connector_ydb/core/base/data_source.py new file mode 100644 index 000000000..93692788a --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/core/base/data_source.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from typing import Optional + +from dl_core.data_source.sql import BaseSQLDataSource + + +class YQLDataSourceMixin(BaseSQLDataSource): + @property + def db_version(self) -> Optional[str]: + return None # not expecting anything useful diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/base/query_compiler.py b/lib/dl_connector_ydb/dl_connector_ydb/core/base/query_compiler.py new file mode 100644 index 000000000..d8b94d678 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/core/base/query_compiler.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from typing import ClassVar + +from dl_core.connectors.base.query_compiler import ( + QueryCompiler, + SectionAliasMode, +) + + +class YQLQueryCompiler(QueryCompiler): + groupby_alias_mode: ClassVar[SectionAliasMode] = SectionAliasMode.by_alias_in_select diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/base/type_transformer.py b/lib/dl_connector_ydb/dl_connector_ydb/core/base/type_transformer.py new file mode 100644 index 000000000..5fb79f6dd --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/core/base/type_transformer.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + ClassVar, + Dict, + Tuple, +) + +import sqlalchemy as sa +import ydb.sqlalchemy as ydb_sa + +from dl_constants.enums import ( + ConnectionType, + UserDataType, +) +from dl_core.db.conversion_base import ( + TypeTransformer, + make_native_type, +) + + +if TYPE_CHECKING: + from dl_core.db.native_type import SATypeSpec + + +class YQLTypeTransformerBase(TypeTransformer): + conn_type: ClassVar[ConnectionType] + + _base_type_map: Dict[UserDataType, Tuple[SATypeSpec, ...]] = { + # Note: first SA type is used as the default. + UserDataType.integer: ( + sa.BIGINT, + sa.SMALLINT, + sa.INTEGER, + ydb_sa.types.UInt32, + ydb_sa.types.UInt64, + ydb_sa.types.UInt8, + ), + UserDataType.float: ( + sa.FLOAT, + sa.REAL, + sa.NUMERIC, + # see also: DOUBLE_PRECISION, + ), + UserDataType.boolean: (sa.BOOLEAN,), + UserDataType.string: ( + sa.TEXT, + sa.CHAR, + sa.VARCHAR, + # see also: ENUM, + ), + # see also: UUID + UserDataType.date: (sa.DATE,), + UserDataType.datetime: ( + sa.DATETIME, + sa.TIMESTAMP, + ), + UserDataType.genericdatetime: ( + sa.DATETIME, + sa.TIMESTAMP, + ), + UserDataType.unsupported: (sa.sql.sqltypes.NullType,), # Actually the default, so should not matter much. + } + _extra_type_map: Dict[UserDataType, SATypeSpec] = { # user-to-native only + UserDataType.geopoint: sa.TEXT, + UserDataType.geopolygon: sa.TEXT, + UserDataType.uuid: sa.TEXT, # see also: UUID + UserDataType.markup: sa.TEXT, + } + + native_to_user_map = { + make_native_type(ConnectionType.unknown, sa_type): bi_type + for bi_type, sa_types in _base_type_map.items() + for sa_type in sa_types + if bi_type != UserDataType.datetime + } + user_to_native_map = { + **{ + bi_type: make_native_type(ConnectionType.unknown, sa_types[0]) + for bi_type, sa_types in _base_type_map.items() + }, + **{bi_type: make_native_type(ConnectionType.unknown, sa_type) for bi_type, sa_type in _extra_type_map.items()}, + } diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/adapter.py b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/adapter.py new file mode 100644 index 000000000..1945c63eb --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/adapter.py @@ -0,0 +1,117 @@ +from __future__ import annotations + +import logging +from typing import ( + TYPE_CHECKING, + ClassVar, + Iterable, + TypeVar, +) + +import attr +import grpc +import ydb.dbapi as ydb_dbapi +import ydb.issues as ydb_cli_err + +from dl_constants.enums import ConnectionType +from dl_core import exc +from dl_core.connection_models import TableIdent + +from dl_connector_ydb.core.base.adapter import YQLAdapterBase +from dl_connector_ydb.core.ydb.constants import CONNECTION_TYPE_YDB +from dl_connector_ydb.core.ydb.target_dto import YDBConnTargetDTO + + +if TYPE_CHECKING: + from dl_core.connection_models import SchemaIdent + + +LOGGER = logging.getLogger(__name__) + + +_DBA_YDB_BASE_DTO_TV = TypeVar("_DBA_YDB_BASE_DTO_TV", bound=YDBConnTargetDTO) + + +@attr.s +class YDBAdapterBase(YQLAdapterBase[_DBA_YDB_BASE_DTO_TV]): + conn_type: ClassVar[ConnectionType] = CONNECTION_TYPE_YDB + dsn_template: ClassVar[str] = "{dialect}:///ydb/" # 'yql:///ydb/' + + proto_schema: ClassVar[str] = "grpc" + + def _update_connect_args(self, args: dict) -> None: + args.update(auth_token=self._target_dto.password) + + def get_connect_args(self) -> dict: + target_dto = self._target_dto + args = dict( + endpoint="{}://{}:{}".format( + self.proto_schema, + target_dto.host, + target_dto.port, + ), + database=target_dto.db_name, + ) + self._update_connect_args(args) + return args + + EXTRA_EXC_CLS = (ydb_dbapi.Error, ydb_cli_err.Error, grpc.RpcError) + + def _list_table_names_i(self, db_name: str, show_dot: bool = False) -> Iterable[str]: + assert db_name, "db_name is required here" + db_engine = self.get_db_engine(db_name) + connection = db_engine.connect() + try: + # SA db_engine -> SA connection -> DBAPI connection -> YDB driver + driver = connection.connection.driver + assert driver + + queue = [db_name] + # Relative paths in `select` are also valid (i.e. "... from `some_dir/some_table`"), + # so, for visual convenience, remove the db prefix. + unprefix = db_name.rstrip("/") + "/" + while queue: + path = queue.pop(0) + resp = driver.scheme_client.async_list_directory(path) + res = resp.result() + children = [ + ( + "{}/{}".format(path, child.name), + child, + ) + for child in res.children + if show_dot or not child.name.startswith(".") + ] + children.sort() + for full_path, child in children: + if child.is_any_table(): + yield full_path.removeprefix(unprefix) + elif child.is_directory(): + queue.append(full_path) + finally: + connection.close() + + def _list_table_names(self, db_name: str, show_dot: bool = False) -> Iterable[str]: + driver_excs = self.EXTRA_EXC_CLS + try: + result = self._list_table_names_i(db_name=db_name, show_dot=show_dot) + for item in result: + yield item + except driver_excs as err: + raise exc.DatabaseQueryError(db_message=str(err), query="list_directory()") + + def _get_tables(self, schema_ident: SchemaIdent) -> list[TableIdent]: + db_name = schema_ident.db_name + assert db_name is not None + return [ + TableIdent( + schema_name=None, + db_name=db_name, + table_name=name, + ) + for name in self._list_table_names(db_name) + ] + + +class YDBAdapter(YDBAdapterBase[YDBConnTargetDTO]): + pass diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/connection_executors.py b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/connection_executors.py new file mode 100644 index 000000000..c47d15682 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/connection_executors.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Sequence, +) + +import attr + +from dl_core.connection_executors.async_sa_executors import DefaultSqlAlchemyConnExecutor + +from dl_connector_ydb.core.ydb.adapter import YDBAdapter +from dl_connector_ydb.core.ydb.target_dto import YDBConnTargetDTO + + +if TYPE_CHECKING: + from dl_connector_ydb.core.ydb.dto import YDBConnDTO + + +@attr.s(cmp=False, hash=False) +class YDBAsyncAdapterConnExecutor(DefaultSqlAlchemyConnExecutor[YDBAdapter]): + TARGET_ADAPTER_CLS = YDBAdapter + + _conn_dto: YDBConnDTO = attr.ib() + + async def _make_target_conn_dto_pool(self) -> Sequence[YDBConnTargetDTO]: + return [ + YDBConnTargetDTO( + conn_id=self._conn_dto.conn_id, + pass_db_messages_to_user=self._conn_options.pass_db_messages_to_user, + pass_db_query_to_user=self._conn_options.pass_db_query_to_user, + host=self._conn_dto.host, + port=self._conn_dto.port, + db_name=self._conn_dto.db_name, + username=self._conn_dto.username or "", + password=self._conn_dto.password or "", + ) + ] diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/connector.py b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/connector.py new file mode 100644 index 000000000..12f1c080d --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/connector.py @@ -0,0 +1,73 @@ +from ydb.sqlalchemy import register_dialect as yql_register_dialect + +from dl_core.connectors.base.connector import ( + CoreConnectionDefinition, + CoreConnector, + CoreSourceDefinition, +) +from dl_core.data_source_spec.sql import ( + StandardSQLDataSourceSpec, + SubselectDataSourceSpec, +) +from dl_core.us_manager.storage_schemas.data_source_spec_base import ( + SQLDataSourceSpecStorageSchema, + SubselectDataSourceSpecStorageSchema, +) + +from dl_connector_ydb.core.base.query_compiler import YQLQueryCompiler +from dl_connector_ydb.core.ydb.adapter import YDBAdapter +from dl_connector_ydb.core.ydb.connection_executors import YDBAsyncAdapterConnExecutor +from dl_connector_ydb.core.ydb.constants import ( + BACKEND_TYPE_YDB, + CONNECTION_TYPE_YDB, + SOURCE_TYPE_YDB_SUBSELECT, + SOURCE_TYPE_YDB_TABLE, +) +from dl_connector_ydb.core.ydb.data_source import ( + YDBSubselectDataSource, + YDBTableDataSource, +) +from dl_connector_ydb.core.ydb.settings import YDBSettingDefinition +from dl_connector_ydb.core.ydb.storage_schemas.connection import YDBConnectionDataStorageSchema +from dl_connector_ydb.core.ydb.type_transformer import YDBTypeTransformer +from dl_connector_ydb.core.ydb.us_connection import YDBConnection + + +class YDBCoreConnectionDefinition(CoreConnectionDefinition): + conn_type = CONNECTION_TYPE_YDB + connection_cls = YDBConnection + us_storage_schema_cls = YDBConnectionDataStorageSchema + type_transformer_cls = YDBTypeTransformer + sync_conn_executor_cls = YDBAsyncAdapterConnExecutor + async_conn_executor_cls = YDBAsyncAdapterConnExecutor + dialect_string = "yql" + settings_definition = YDBSettingDefinition + + +class YDBCoreSourceDefinition(CoreSourceDefinition): + source_type = SOURCE_TYPE_YDB_TABLE + source_cls = YDBTableDataSource + source_spec_cls = StandardSQLDataSourceSpec + us_storage_schema_cls = SQLDataSourceSpecStorageSchema + + +class YDBCoreSubselectSourceDefinition(CoreSourceDefinition): + source_type = SOURCE_TYPE_YDB_SUBSELECT + source_cls = YDBSubselectDataSource + source_spec_cls = SubselectDataSourceSpec + us_storage_schema_cls = SubselectDataSourceSpecStorageSchema + + +class YDBCoreConnector(CoreConnector): + backend_type = BACKEND_TYPE_YDB + connection_definitions = (YDBCoreConnectionDefinition,) + source_definitions = ( + YDBCoreSourceDefinition, + YDBCoreSubselectSourceDefinition, + ) + rqe_adapter_classes = frozenset({YDBAdapter}) + compiler_cls = YQLQueryCompiler + + @classmethod + def registration_hook(cls) -> None: + yql_register_dialect() diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/constants.py b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/constants.py new file mode 100644 index 000000000..85cbbfb00 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/constants.py @@ -0,0 +1,13 @@ +from dl_constants.enums import ( + ConnectionType, + DataSourceType, + SourceBackendType, +) + + +BACKEND_TYPE_YDB = SourceBackendType.declare("YDB") + +CONNECTION_TYPE_YDB = ConnectionType.declare("ydb") + +SOURCE_TYPE_YDB_TABLE = DataSourceType.declare("YDB_TABLE") +SOURCE_TYPE_YDB_SUBSELECT = DataSourceType.declare("YDB_SUBSELECT") diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/data_source.py b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/data_source.py new file mode 100644 index 000000000..443bac7ac --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/data_source.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +from typing import ( + Any, + Optional, +) + +from dl_constants.enums import DataSourceType +from dl_core.data_source.sql import ( + StandardSQLDataSource, + SubselectDataSource, + require_table_name, +) +from dl_core.utils import sa_plain_text + +from dl_connector_ydb.core.base.data_source import YQLDataSourceMixin +from dl_connector_ydb.core.ydb.constants import ( + CONNECTION_TYPE_YDB, + SOURCE_TYPE_YDB_SUBSELECT, + SOURCE_TYPE_YDB_TABLE, +) + + +class YDBDataSourceMixin(YQLDataSourceMixin): + conn_type = CONNECTION_TYPE_YDB + + @classmethod + def is_compatible_with_type(cls, source_type: DataSourceType) -> bool: + return source_type in (SOURCE_TYPE_YDB_TABLE, SOURCE_TYPE_YDB_SUBSELECT) + + +class YDBTableDataSource(YDBDataSourceMixin, StandardSQLDataSource): + """YDB table""" + + @require_table_name + def get_sql_source(self, alias: Optional[str] = None) -> Any: + # cross-db joins are not supported + assert not self.db_name or self.db_name == self.connection.db_name + + # Unlike `super()`, not adding the database name here. + q = self.quote + alias_str = "" if alias is None else f" AS {q(alias)}" + return sa_plain_text(f"{q(self.table_name)}{alias_str}") + + +class YDBSubselectDataSource(YDBDataSourceMixin, SubselectDataSource): + """YDB subselect""" diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/dto.py b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/dto.py new file mode 100644 index 000000000..c292aa569 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/dto.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +import attr + +from dl_core.connection_models.dto_defs import DefaultSQLDTO + +from dl_connector_ydb.core.ydb.constants import CONNECTION_TYPE_YDB + + +@attr.s(frozen=True) +class YDBConnDTO(DefaultSQLDTO): + conn_type = CONNECTION_TYPE_YDB diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/settings.py b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/settings.py new file mode 100644 index 000000000..7d32efa0a --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/settings.py @@ -0,0 +1,24 @@ +from typing import Optional + +import attr + +from dl_configs.connectors_settings import ( + ConnectorsConfigType, + ConnectorSettingsBase, +) +from dl_configs.settings_loaders.meta_definition import s_attrib +from dl_core.connectors.settings.primitives import ConnectorSettingsDefinition + + +@attr.s(frozen=True) +class YDBConnectorSettings(ConnectorSettingsBase): + DEFAULT_HOST_VALUE: Optional[str] = s_attrib("DEFAULT_HOST_VALUE", missing=None) # type: ignore + + +def ydb_settings_fallback(full_cfg: ConnectorsConfigType) -> dict[str, ConnectorSettingsBase]: + return dict(YDB=YDBConnectorSettings()) + + +class YDBSettingDefinition(ConnectorSettingsDefinition): + settings_class = YDBConnectorSettings + fallback = ydb_settings_fallback diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/storage_schemas/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/storage_schemas/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/storage_schemas/connection.py b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/storage_schemas/connection.py new file mode 100644 index 000000000..192a484ac --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/storage_schemas/connection.py @@ -0,0 +1,14 @@ +from marshmallow import fields as ma_fields + +from dl_core.us_manager.storage_schemas.connection import ConnectionSQLDataStorageSchema + +from dl_connector_ydb.core.ydb.us_connection import YDBConnection + + +class YDBConnectionDataStorageSchema(ConnectionSQLDataStorageSchema[YDBConnection.DataModel]): + TARGET_CLS = YDBConnection.DataModel + + token = ma_fields.String(required=False, allow_none=True, dump_default=None, load_default=None) + + username = ma_fields.String(required=False, allow_none=True, dump_default=None, load_default=None) + password = ma_fields.String(required=False, allow_none=True, dump_default=None, load_default=None) diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/target_dto.py b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/target_dto.py new file mode 100644 index 000000000..7bef7e77e --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/target_dto.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +import attr + +from dl_core.connection_executors.models.connection_target_dto_base import BaseSQLConnTargetDTO + + +@attr.s(frozen=True) +class YDBConnTargetDTO(BaseSQLConnTargetDTO): + """""" diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/type_transformer.py b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/type_transformer.py new file mode 100644 index 000000000..918d5d205 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/type_transformer.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from dl_connector_ydb.core.base.type_transformer import YQLTypeTransformerBase +from dl_connector_ydb.core.ydb.constants import CONNECTION_TYPE_YDB + + +class YDBTypeTransformer(YQLTypeTransformerBase): + conn_type = CONNECTION_TYPE_YDB + + native_to_user_map = { + nt.clone(conn_type=CONNECTION_TYPE_YDB): bi_t for nt, bi_t in YQLTypeTransformerBase.native_to_user_map.items() + } + user_to_native_map = { + bi_t: nt.clone(conn_type=CONNECTION_TYPE_YDB) for bi_t, nt in YQLTypeTransformerBase.user_to_native_map.items() + } diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/us_connection.py b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/us_connection.py new file mode 100644 index 000000000..764bb4170 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/us_connection.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Callable, + ClassVar, + Optional, +) + +import attr + +from dl_core.connection_executors.sync_base import SyncConnExecutorBase +from dl_core.us_connection_base import ( + ClassicConnectionSQL, + ConnectionBase, + DataSourceTemplate, +) +from dl_core.utils import secrepr +from dl_i18n.localizer_base import Localizer +from dl_utils.utils import DataKey + +from dl_connector_ydb.api.ydb.i18n.localizer import Translatable +from dl_connector_ydb.core.ydb.constants import ( + SOURCE_TYPE_YDB_SUBSELECT, + SOURCE_TYPE_YDB_TABLE, +) +from dl_connector_ydb.core.ydb.dto import YDBConnDTO + + +if TYPE_CHECKING: + from dl_core.connection_models.common_models import TableIdent + + +class YDBConnection(ClassicConnectionSQL): + allow_cache: ClassVar[bool] = True + is_always_user_source: ClassVar[bool] = True + allow_dashsql: ClassVar[bool] = True + + source_type = SOURCE_TYPE_YDB_TABLE + + @attr.s(kw_only=True) + class DataModel(ClassicConnectionSQL.DataModel): + token: Optional[str] = attr.ib(default=None, repr=secrepr) + + username = None # type: ignore # not applicable + password = None # type: ignore # -> 'token' + + @classmethod + def get_secret_keys(cls) -> set[DataKey]: + return { + *super().get_secret_keys(), + DataKey(parts=("token",)), + } + + def get_conn_dto(self) -> YDBConnDTO: + assert self.data.db_name + return YDBConnDTO( + conn_id=self.uuid, + host=self.data.host, + multihosts=(), + port=self.data.port, + db_name=self.data.db_name, + username="", # not applicable + password=self.data.token, + ) + + def get_data_source_template_templates(self, localizer: Localizer) -> list[DataSourceTemplate]: + return [ + DataSourceTemplate( + title="YDB table", + tab_title=localizer.translate(Translatable("source_templates-tab_title-table")), + source_type=SOURCE_TYPE_YDB_TABLE, + parameters=dict(), + form=[ + { + "name": "table_name", + "input_type": "text", + "default": "", + "required": True, + "title": localizer.translate(Translatable("source_templates-label-ydb_table")), + "field_doc_key": "YDB_TABLE/table_name", + }, + ], + group=[], + connection_id=self.uuid, # type: ignore # TODO: fix + ), + ] + self._make_subselect_templates( + title="Subselect over YDB", + source_type=SOURCE_TYPE_YDB_SUBSELECT, + localizer=localizer, + ) + + def get_tables( + self, + conn_executor_factory: Callable[[ConnectionBase], SyncConnExecutorBase], + db_name: Optional[str] = None, + schema_name: Optional[str] = None, + ) -> list[TableIdent]: + if db_name is None: + # Only current-database listing is feasible here. + db_name = self.data.db_name + return super().get_tables( + conn_executor_factory=conn_executor_factory, + db_name=db_name, + schema_name=schema_name, + ) + + @property + def allow_public_usage(self) -> bool: + return True diff --git a/lib/dl_connector_ydb/dl_connector_ydb/db_testing/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb/db_testing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb/db_testing/connector.py b/lib/dl_connector_ydb/dl_connector_ydb/db_testing/connector.py new file mode 100644 index 000000000..940d3f307 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/db_testing/connector.py @@ -0,0 +1,7 @@ +from dl_db_testing.connectors.base.connector import DbTestingConnector + +from dl_connector_ydb.db_testing.engine_wrapper import YQLEngineWrapper + + +class YQLDbTestingConnector(DbTestingConnector): + engine_wrapper_classes = (YQLEngineWrapper,) diff --git a/lib/dl_connector_ydb/dl_connector_ydb/db_testing/engine_wrapper.py b/lib/dl_connector_ydb/dl_connector_ydb/db_testing/engine_wrapper.py new file mode 100644 index 000000000..71fc68a1f --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/db_testing/engine_wrapper.py @@ -0,0 +1,144 @@ +from __future__ import annotations + +import os +from typing import ( + Any, + Callable, + NamedTuple, + Optional, + Sequence, + Type, +) + +import shortuuid +import sqlalchemy as sa +from sqlalchemy.types import TypeEngine +import ydb + +from dl_db_testing.database.engine_wrapper import EngineWrapperBase + + +class YdbTypeSpec(NamedTuple): + type: ydb.PrimitiveType + to_sql_str: Callable[[Any], str] + + +SA_TYPE_TO_YDB_TYPE: dict[Type[TypeEngine], YdbTypeSpec] = { + sa.SmallInteger: YdbTypeSpec(type=ydb.PrimitiveType.Uint8, to_sql_str=str), + sa.Integer: YdbTypeSpec(type=ydb.PrimitiveType.Int32, to_sql_str=str), + sa.BigInteger: YdbTypeSpec(type=ydb.PrimitiveType.Int64, to_sql_str=str), + sa.Float: YdbTypeSpec(type=ydb.PrimitiveType.Double, to_sql_str=str), + sa.Boolean: YdbTypeSpec(type=ydb.PrimitiveType.Bool, to_sql_str=lambda x: str(bool(x))), + sa.String: YdbTypeSpec(type=ydb.PrimitiveType.String, to_sql_str=lambda x: f'"{x}"'), + sa.Unicode: YdbTypeSpec(type=ydb.PrimitiveType.Utf8, to_sql_str=lambda x: f'"{x}"'), + sa.Date: YdbTypeSpec(type=ydb.PrimitiveType.Date, to_sql_str=lambda x: f'DateTime::MakeDate($date_parse("{x}"))'), + sa.DateTime: YdbTypeSpec( + ydb.PrimitiveType.Datetime, to_sql_str=lambda x: f'DateTime::MakeDatetime($datetime_parse("{x}"))' + ), + sa.TIMESTAMP: YdbTypeSpec( + ydb.PrimitiveType.Timestamp, to_sql_str=lambda x: f'DateTime::MakeTimestamp($datetime_parse("{x}"))' + ), +} + + +class YQLEngineWrapper(EngineWrapperBase): + URL_PREFIX = "yql" + + def get_conn_credentials(self, full: bool = False) -> dict: + return dict( + endpoint=self.engine.url.query["endpoint"], + db_name=self.engine.url.query["database"], + ) + + def get_version(self) -> Optional[str]: + return None + + def _generate_table_description(self, columns: Sequence[sa.Column]) -> ydb.TableDescription: + table = ydb.TableDescription().with_columns( + *[ydb.Column(col.name, ydb.OptionalType(SA_TYPE_TO_YDB_TYPE[type(col.type)].type)) for col in columns] + ) + primary_keys = [col.name for col in columns if False] # if primary_key] # FIXME + if not primary_keys: + primary_keys = [columns[0].name] + return table.with_primary_keys(*primary_keys) + + def _get_table_path(self, table: sa.Table) -> str: + return os.path.join(self.engine.url.query["database"], table.name) + + def _get_connection_params(self) -> ydb.DriverConfig: + return ydb.DriverConfig( + endpoint=self.engine.url.query["endpoint"], + database=self.engine.url.query["database"], + ) + + def table_from_columns( + self, + columns: Sequence[sa.Column], + *, + schema: Optional[str] = None, + table_name: Optional[str] = None, + ) -> sa.Table: + table_name = table_name or f"test_table_{shortuuid.uuid()[:10]}" + table = sa.Table(table_name, sa.MetaData(), *columns, schema=schema) + return table + + def create_table(self, table: sa.Table) -> None: + table_description = self._generate_table_description(table.columns) + table_path = self._get_table_path(table) + connection_params = self._get_connection_params() + driver = ydb.Driver(connection_params) + driver.wait(timeout=5) + session = driver.table_client.session().create() + session.create_table(table_path, table_description) + driver.stop(timeout=5) + + def insert_into_table(self, table: sa.Table, data: Sequence[dict]) -> None: + connection_params = ydb.DriverConfig( + endpoint=self.engine.url.query["endpoint"], + database=self.engine.url.query["database"], + ) + driver = ydb.Driver(connection_params) + driver.wait(timeout=5) + session = driver.table_client.session().create() + + table_path = self._get_table_path(table) + + upsert_query_prefix = f""" + $date_parse = DateTime::Parse("%Y-%m-%d"); + $datetime_parse = DateTime::Parse("%Y-%m-%d %H:%M:%S"); + UPSERT INTO `{table_path}` ({", ".join([column.name for column in table.columns])}) VALUES + """ + upserts = ( + "({})".format( + ", ".join( + [ + ( + "NULL" + if data[column.name] is None + else SA_TYPE_TO_YDB_TYPE[type(column.type)].to_sql_str(data[column.name]) + ) + for column in table.columns + ] + ) + ) + for data in data + ) + session.transaction().execute(upsert_query_prefix + ",\n".join(upserts) + ";", commit_tx=True) + driver.stop(timeout=5) + + def drop_table(self, db_name: str, table: sa.Table) -> None: + connection_params = self._get_connection_params() + driver = ydb.Driver(connection_params) + driver.wait(timeout=5) + session = driver.table_client.session().create() + table_path = self._get_table_path(table) + + try: + session.drop_table(table_path) + except ydb.issues.SchemeError as err: + if "does not exist" in str(err): + pass # Table does not exist + else: + raise + + driver.stop(timeout=5) diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb/formula/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula/connector.py b/lib/dl_connector_ydb/dl_connector_ydb/formula/connector.py new file mode 100644 index 000000000..7d87087c4 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/formula/connector.py @@ -0,0 +1,13 @@ +from ydb.sqlalchemy import YqlDialect as SAYqlDialect + +from dl_formula.connectors.base.connector import FormulaConnector + +from dl_connector_ydb.formula.constants import YqlDialect as YqlDialectNS +from dl_connector_ydb.formula.definitions.all import DEFINITIONS + + +class YQLFormulaConnector(FormulaConnector): + dialect_ns_cls = YqlDialectNS + dialects = YqlDialectNS.YQL + op_definitions = DEFINITIONS + sa_dialect = SAYqlDialect() diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula/constants.py b/lib/dl_connector_ydb/dl_connector_ydb/formula/constants.py new file mode 100644 index 000000000..5cb951655 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/formula/constants.py @@ -0,0 +1,15 @@ +from dl_formula.core.dialect import ( + DialectName, + DialectNamespace, + simple_combo, +) + + +DIALECT_NAME_YDB = DialectName.declare("YDB") # YDB ScanQuery connection (YQL dialect) +DIALECT_NAME_YQ = DialectName.declare("YQ") # YQ (Yandex Query) (YQL dialect) + + +class YqlDialect(DialectNamespace): + YDB = simple_combo(name=DIALECT_NAME_YDB) + YQ = simple_combo(name=DIALECT_NAME_YQ) + YQL = YDB | YQ diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/all.py b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/all.py new file mode 100644 index 000000000..4ad0a9540 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/all.py @@ -0,0 +1,26 @@ +from dl_connector_ydb.formula.definitions.conditional_blocks import DEFINITIONS_COND_BLOCKS +from dl_connector_ydb.formula.definitions.functions_aggregation import DEFINITIONS_AGG +from dl_connector_ydb.formula.definitions.functions_datetime import DEFINITIONS_DATETIME +from dl_connector_ydb.formula.definitions.functions_logical import DEFINITIONS_LOGICAL +from dl_connector_ydb.formula.definitions.functions_markup import DEFINITIONS_MARKUP +from dl_connector_ydb.formula.definitions.functions_math import DEFINITIONS_MATH +from dl_connector_ydb.formula.definitions.functions_string import DEFINITIONS_STRING +from dl_connector_ydb.formula.definitions.functions_type import DEFINITIONS_TYPE +from dl_connector_ydb.formula.definitions.operators_binary import DEFINITIONS_BINARY +from dl_connector_ydb.formula.definitions.operators_ternary import DEFINITIONS_TERNARY +from dl_connector_ydb.formula.definitions.operators_unary import DEFINITIONS_UNARY + + +DEFINITIONS = [ + *DEFINITIONS_COND_BLOCKS, + *DEFINITIONS_AGG, + *DEFINITIONS_DATETIME, + *DEFINITIONS_LOGICAL, + *DEFINITIONS_MARKUP, + *DEFINITIONS_MATH, + *DEFINITIONS_STRING, + *DEFINITIONS_TYPE, + *DEFINITIONS_UNARY, + *DEFINITIONS_BINARY, + *DEFINITIONS_TERNARY, +] diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/conditional_blocks.py b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/conditional_blocks.py new file mode 100644 index 000000000..7944cb16b --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/conditional_blocks.py @@ -0,0 +1,22 @@ +import sqlalchemy as sa + +from dl_formula.definitions.base import TranslationVariant +import dl_formula.definitions.conditional_blocks as base + +from dl_connector_ydb.formula.constants import YqlDialect as D + + +V = TranslationVariant.make + + +DEFINITIONS_COND_BLOCKS = [ + # _case_block_ + base.CaseBlock.for_dialect(D.YQL), + # _if_block_ + base.IfBlock3( + variants=[ + V(D.YQL, sa.func.IF), + ] + ), + base.IfBlockMulti.for_dialect(D.YQL), +] diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_aggregation.py b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_aggregation.py new file mode 100644 index 000000000..6abe584da --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_aggregation.py @@ -0,0 +1,173 @@ +import sqlalchemy as sa +import ydb.sqlalchemy as ydb_sa + +from dl_formula.definitions.base import ( + TranslationVariant, + TranslationVariantWrapped, +) +from dl_formula.definitions.common import quantile_value +import dl_formula.definitions.functions_aggregation as base +from dl_formula.definitions.literals import un_literal +from dl_formula.shortcuts import n + +from dl_connector_ydb.formula.constants import YqlDialect as D + + +V = TranslationVariant.make +VW = TranslationVariantWrapped.make + + +def _all_concat_yql(expr, sep=", "): # type: ignore + res = expr + res = sa.cast(res, sa.TEXT) + res = sa.func.AGGREGATE_LIST_DISTINCT(res) + res = sa.func.ListSortAsc(res) + # Would be nicer to cast List to List at this point. + res = sa.func.String.JoinFromList(res, sep) + return res + + +DEFINITIONS_AGG = [ + # all_concat + base.AggAllConcat1( + variants=[ + V(D.YQL, _all_concat_yql), + ] + ), + base.AggAllConcat2( + variants=[ + V(D.YQL, _all_concat_yql), + ] + ), + # any + base.AggAny( + variants=[ + V(D.YQL, sa.func.SOME), + ] + ), + # arg_max + base.AggArgMax( + variants=[ + V(D.YQL, sa.func.MAX_BY), + ] + ), + # arg_min + base.AggArgMin( + variants=[ + V(D.YQL, sa.func.MIN_BY), + ] + ), + # avg + base.AggAvgFromNumber.for_dialect(D.YQL), + base.AggAvgFromDate( + variants=[ + # in YQL AVG returns Double which can not be casted to Datetime so we have to convert it to INT explicitly + VW(D.YQL, lambda date_val: n.func.DATE(n.func.INT(n.func.AVG(n.func.INT(date_val))))) + ] + ), + base.AggAvgFromDatetime.for_dialect(D.YQL), + # avg_if + base.AggAvgIf( + variants=[ + V(D.YQL, sa.func.avg_if), + ] + ), + # count + base.AggCount0.for_dialect(D.YQL), + base.AggCount1.for_dialect(D.YQL), + # count_if + base.AggCountIf( + variants=[ + V(D.YQL, sa.func.COUNT_IF), + ] + ), + # countd + base.AggCountd.for_dialect(D.YQL), + # countd_approx + base.AggCountdApprox( + variants=[ + V(D.YQL, sa.func.CountDistinctEstimate), + ] + ), + # countd_if + base.AggCountdIf.for_dialect(D.YQL), + # max + base.AggMax.for_dialect(D.YQL), + # median + base.AggMedian( + variants=[ + V(D.YQL, sa.func.MEDIAN), + ] + ), + # min + base.AggMin.for_dialect(D.YQL), + # quantile + base.AggQuantile( + variants=[ + V( + D.YQL, + lambda expr, quant: sa.func.PERCENTILE( + expr, + quantile_value(un_literal(quant)), + ), + ), + ] + ), + # stdev + base.AggStdev( + variants=[ + V(D.YQL, sa.func.STDDEVSAMP), + ] + ), + # stdevp + base.AggStdevp( + variants=[ + V(D.YQL, sa.func.STDDEVPOP), + ] + ), + # sum + base.AggSum.for_dialect(D.YQL), + # sum_if + base.AggSumIf( + variants=[ + V(D.YQL, sa.func.SUM_IF), + ] + ), + # top_concat + base.AggTopConcat1( + variants=[ + # String::JoinFromList(ListMap(TOPFREQ(expr, amount), ($x) -> { RETURN cast($x.Value as Utf8); }), sep) + V( + D.YQL, + lambda expr, amount, sep=", ": sa.func.String.JoinFromList( + sa.func.ListMap(sa.func.TOPFREQ(expr, amount), ydb_sa.types.Lambda(lambda x: sa.cast(x, sa.Text))), + ", ", + ), + ), + ] + ), + base.AggTopConcat2( + variants=[ + # String::JoinFromList(ListMap(TOPFREQ(expr, amount), ($x) -> { RETURN cast($x.Value as Utf8); }), sep) + V( + D.YQL, + lambda expr, amount, sep=", ": sa.func.String.JoinFromList( + sa.func.ListMap(sa.func.TOPFREQ(expr, amount), ydb_sa.types.Lambda(lambda x: sa.cast(x, sa.Text))), + ", ", + ), + ), + ] + ), + # var + base.AggVar( + variants=[ + V(D.YQL, sa.func.VARSAMP), + ] + ), + # varp + base.AggVarp( + variants=[ + V(D.YQL, sa.func.VARPOP), + ] + ), +] diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_datetime.py b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_datetime.py new file mode 100644 index 000000000..816bf43e9 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_datetime.py @@ -0,0 +1,188 @@ +from __future__ import annotations + +import sqlalchemy as sa + +from dl_formula.definitions.base import ( + TranslationVariant, + TranslationVariantWrapped, +) +from dl_formula.definitions.common_datetime import ( + YQL_INTERVAL_FUNCS, + date_add_yql, + datetime_add_yql, +) +import dl_formula.definitions.functions_datetime as base + +from dl_connector_ydb.formula.constants import YqlDialect as D + + +V = TranslationVariant.make +VW = TranslationVariantWrapped.make + + +YQL_DATE_DATETRUNC_FUNCS = { + "year": "StartOfYear", + "quarter": "StartOfQuarter", + "month": "StartOfMonth", + "week": "StartOfWeek", +} + + +def _datetrunc2_yql_impl(date_ctx, unit_ctx): # type: ignore + date_expr = date_ctx.expression + unit = base.norm_datetrunc_unit(unit_ctx.expression) + + func_name = YQL_DATE_DATETRUNC_FUNCS.get(unit) + if func_name is not None: + func = getattr(sa.func.DateTime, func_name) + return sa.func.DateTime.MakeDatetime(func(date_expr)) + + amount = 1 + func_name = YQL_INTERVAL_FUNCS.get(unit) + if func_name is not None: + func = getattr(sa.func.DateTime, func_name) + return sa.func.DateTime.MakeDatetime( + sa.func.DateTime.StartOf( + date_expr, + func(amount), + ) + ) + + # This normally should not happen + raise NotImplementedError(f"Unsupported unit {unit}") + + +DEFINITIONS_DATETIME = [ + # dateadd + base.FuncDateadd1.for_dialect(D.YQL), + base.FuncDateadd2Unit.for_dialect(D.YQL), + base.FuncDateadd2Number.for_dialect(D.YQL), + base.FuncDateadd3DateConstNum( + variants=[ + V(D.YQL, lambda date, what, num: date_add_yql(date, what, num, const_mult=True)), + ] + ), + base.FuncDateadd3DateNonConstNum( + variants=[ + V(D.YQL, lambda date, what, num: date_add_yql(date, what, num, const_mult=False)), + ] + ), + base.FuncDateadd3DatetimeConstNum( + variants=[ + V(D.YQL, lambda date, what, num: datetime_add_yql(date, what, num, const_mult=True)), + ] + ), + base.FuncDateadd3DatetimeNonConstNum( + variants=[ + V(D.YQL, lambda date, what, num: datetime_add_yql(date, what, num, const_mult=False)), + ] + ), + base.FuncDateadd3GenericDatetimeNonConstNum( + variants=[ + V(D.YQL, lambda date, what, num: datetime_add_yql(date, what, num, const_mult=False)), + ] + ), + # datepart + base.FuncDatepart2.for_dialect(D.YQL), + base.FuncDatepart3Const.for_dialect(D.YQL), + base.FuncDatepart3NonConst.for_dialect(D.YQL), + # datetrunc + base.FuncDatetrunc2Date( + variants=[ + V( + D.YQL, + lambda date, unit: sa.func.DateTime.MakeDate( + getattr(sa.func.DateTime, YQL_DATE_DATETRUNC_FUNCS[base.norm_datetrunc_unit(unit)])(date) + ) + if base.norm_datetrunc_unit(unit) in YQL_DATE_DATETRUNC_FUNCS + else date, + ), + ] + ), + base.FuncDatetrunc2Datetime( + variants=[ + VW(D.YQL, _datetrunc2_yql_impl), + ] + ), + # day + base.FuncDay( + variants=[ + V(D.YQL, sa.func.DateTime.GetDayOfMonth), + ] + ), + # dayofweek + base.FuncDayofweek1.for_dialect(D.YQL), + base.FuncDayofweek2( + variants=[ + V( + D.YQL, + lambda date_expr, firstday_expr: base.dow_firstday_shift( + sa.func.DateTime.GetDayOfWeek(date_expr), firstday_expr + ), + ), + ] + ), + # genericnow + base.FuncGenericNow( + variants=[ + V(D.YQL, sa.func.CurrentUtcDatetime), + ] + ), + # hour + base.FuncHourDate.for_dialect(D.YQL), + base.FuncHourDatetime( + variants=[ + V(D.YQL, sa.func.DateTime.GetHour), + ] + ), + # minute + base.FuncMinuteDate.for_dialect(D.YQL), + base.FuncMinuteDatetime( + variants=[ + V(D.YQL, sa.func.DateTime.GetMinute), + ] + ), + # month + base.FuncMonth( + variants=[ + V(D.YQL, sa.func.DateTime.GetMonth), + ] + ), + # now + base.FuncNow( + variants=[ + V(D.YQL, sa.func.CurrentUtcDatetime), + ] + ), + # quarter + base.FuncQuarter( + variants=[ + V(D.YQL, lambda date: sa.cast((sa.func.DateTime.GetMonth(date) + 2) / 3, sa.INTEGER)), + ] + ), + # second + base.FuncSecondDate.for_dialect(D.YQL), + base.FuncSecondDatetime( + variants=[ + V(D.YQL, sa.func.DateTime.GetSecond), + ] + ), + # today + base.FuncToday( + variants=[ + V(D.YQL, sa.func.CurrentUtcDate), # https://ydb.tech/en/docs/yql/reference/syntax/not_yet_supported#now + ] + ), + # week + base.FuncWeek( + variants=[ + V(D.YQL, sa.func.DateTime.GetWeekOfYearIso8601), + ] + ), + # year + base.FuncYear( + variants=[ + V(D.YQL, sa.func.DateTime.GetYear), + ] + ), +] diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_logical.py b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_logical.py new file mode 100644 index 000000000..fdf9fa4fa --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_logical.py @@ -0,0 +1,34 @@ +import sqlalchemy as sa + +from dl_formula.definitions.base import TranslationVariant +import dl_formula.definitions.functions_logical as base + +from dl_connector_ydb.formula.constants import YqlDialect as D + + +V = TranslationVariant.make + + +DEFINITIONS_LOGICAL = [ + # case + base.FuncCase.for_dialect(D.YQL), + # if + base.FuncIf.for_dialect(D.YQL), + # ifnull + base.FuncIfnull( + variants=[ + V(D.YQL, sa.func.coalesce), + ] + ), + # iif + base.FuncIif3Legacy.for_dialect(D.YQL), + # isnull + base.FuncIsnull.for_dialect(D.YQL), + # zn + base.FuncZn( + variants=[ + # See also: `NANVL()` to also replace `NaN`s. + V(D.YQL, lambda x: sa.func.coalesce(x, 0)), + ] + ), +] diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_markup.py b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_markup.py new file mode 100644 index 000000000..2cf9677ba --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_markup.py @@ -0,0 +1,20 @@ +import dl_formula.definitions.functions_markup as base + +from dl_connector_ydb.formula.constants import YqlDialect as D + + +DEFINITIONS_MARKUP = [ + # + + base.BinaryPlusMarkup.for_dialect(D.YQL), + # __str + base.FuncInternalStrConst.for_dialect(D.YQL), + base.FuncInternalStr.for_dialect(D.YQL), + # bold + base.FuncBold.for_dialect(D.YQL), + # italic + base.FuncItalics.for_dialect(D.YQL), + # markup + base.ConcatMultiMarkup.for_dialect(D.YQL), + # url + base.FuncUrl.for_dialect(D.YQL), +] diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_math.py b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_math.py new file mode 100644 index 000000000..751ccb306 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_math.py @@ -0,0 +1,204 @@ +import sqlalchemy as sa + +from dl_formula.definitions.base import ( + TranslationVariant, + TranslationVariantWrapped, +) +import dl_formula.definitions.functions_math as base +from dl_formula.shortcuts import n + +from dl_connector_ydb.formula.constants import YqlDialect as D + + +V = TranslationVariant.make +VW = TranslationVariantWrapped.make + + +DEFINITIONS_MATH = [ + # abs + base.FuncAbs( + variants=[ + V(D.YQL, sa.func.Math.Abs), # `Math::Abs(…)` + ] + ), + # acos + base.FuncAcos( + variants=[ + V(D.YQL, sa.func.Math.Acos), + ] + ), + # asin + base.FuncAsin( + variants=[ + V(D.YQL, sa.func.Math.Asin), + ] + ), + # atan + base.FuncAtan( + variants=[ + V(D.YQL, sa.func.Math.Atan), + ] + ), + # atan2 + base.FuncAtan2( + variants=[ + V(D.YQL, sa.func.Math.Atan2), + ] + ), + # ceiling + base.FuncCeiling( + variants=[ + V(D.YQL, sa.func.Math.Ceil), + ] + ), + # cos + base.FuncCos( + variants=[ + V(D.YQL, sa.func.Math.Cos), + ] + ), + # cot + base.FuncCot( + variants=[ + V(D.YQL, lambda x: sa.func.Math.Cos(x) / sa.func.Math.Sin(x)), + ] + ), + # degrees + base.FuncDegrees( + variants=[ + V(D.YQL, lambda x: x / sa.func.Math.Pi() * 180.0), + ] + ), + # div + base.FuncDivBasic( + variants=[ + V(D.YQL, lambda x, y: sa.cast(x / y, sa.BIGINT)), + ] + ), + # div_safe + base.FuncDivSafe2( + variants=[ + V(D.YQL, lambda x, y: sa.cast(sa.func.IF(y != 0, x / y), sa.BIGINT)), + ] + ), + base.FuncDivSafe3( + variants=[ + V(D.YQL, lambda x, y, default: sa.cast(sa.func.IF(y != 0, x / y, default), sa.BIGINT)), + ] + ), + # exp + base.FuncExp( + variants=[ + V(D.YQL, sa.func.Math.Exp), + ] + ), + # fdiv_safe + base.FuncFDivSafe2( + variants=[ + V(D.YQL, lambda x, y: sa.func.IF(y != 0, x / y)), + ] + ), + base.FuncFDivSafe3( + variants=[ + V(D.YQL, lambda x, y, default: sa.func.IF(y != 0, x / y, default)), + ] + ), + # floor + base.FuncFloor( + variants=[ + V(D.YQL, sa.func.Math.Floor), + ] + ), + # greatest + base.FuncGreatest1.for_dialect(D.YQL), + base.FuncGreatestMain.for_dialect(D.YQL), + base.GreatestMulti.for_dialect(D.YQL), + # least + base.FuncLeast1.for_dialect(D.YQL), + base.FuncLeastMain.for_dialect(D.YQL), + base.LeastMulti.for_dialect(D.YQL), + # ln + base.FuncLn( + variants=[ + V(D.YQL, sa.func.Math.Log), + ] + ), + # log + base.FuncLog( + variants=[ + V(D.YQL, lambda x, y: sa.func.Math.Log(x) / sa.func.Math.Log(y)), + ] + ), + # log10 + base.FuncLog10( + variants=[ + V(D.YQL, sa.func.Math.Log10), + ] + ), + # pi + base.FuncPi( + variants=[ + V(D.YQL, sa.func.Math.Pi), + ] + ), + # power + base.FuncPower( + variants=[ + V(D.YQL, sa.func.Math.Pow), + ] + ), + # radians + base.FuncRadians( + variants=[ + V(D.YQL, lambda x: x * sa.func.Math.Pi() / 180.0), + ] + ), + # round + base.FuncRound1( + variants=[ + V(D.YQL, sa.func.Math.Round), + ] + ), + base.FuncRound2( + variants=[ + # in YQL Math::Round takes power of 10 instead of precision, so we have to invert the `num` value + V(D.YQL, lambda x, num: sa.func.Math.Round(x, -num)), + ] + ), + # sign + base.FuncSign( + variants=[ + V( + D.YQL, + lambda x: n.if_( + n.if_(x < 0).then(-1), # type: ignore # TODO: fix + n.if_(x > 0).then(1), # type: ignore # TODO: fix + ).else_(0), + ), + ] + ), + # sin + base.FuncSin( + variants=[ + V(D.YQL, sa.func.Math.Sin), + ] + ), + # sqrt + base.FuncSqrt( + variants=[ + V(D.YQL, sa.func.Math.Sqrt), + ] + ), + # square + base.FuncSquare( + variants=[ + V(D.YQL, lambda x: sa.func.Math.Pow(x, 2)), + ] + ), + # tan + base.FuncTan( + variants=[ + V(D.YQL, sa.func.Math.Tan), + ] + ), +] diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_string.py b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_string.py new file mode 100644 index 000000000..74310c529 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_string.py @@ -0,0 +1,232 @@ +import sqlalchemy as sa +import ydb.sqlalchemy as ydb_sa + +from dl_formula.definitions.base import TranslationVariant +from dl_formula.definitions.common import ( + ifnotnull, + make_binary_chain, +) +import dl_formula.definitions.functions_string as base + +from dl_connector_ydb.formula.constants import YqlDialect as D + + +V = TranslationVariant.make + + +DEFINITIONS_STRING = [ + # ascii + base.FuncAscii( + variants=[ + # ListHead(Unicode::ToCodePointList(Unicode::Substring('ы', 0, 1))) = 1099 + V( + D.YQL, + lambda x: sa.func.ListHead( + sa.func.Unicode.ToCodePointList(sa.func.Unicode.Substring(sa.cast(x, sa.TEXT), 0, 1)) + ), + ), + ] + ), + # char + base.FuncChar( + variants=[ + # CASE WHEN value IS NULL THEN NULL ELSE + # Unicode::FromCodePointList(AsList(COALESCE(CAST(value AS UInt32), 0))) + # END + V( + D.YQL, + lambda value: ifnotnull( + value, + # int -> List -> utf8 + sa.func.Unicode.FromCodePointList( + sa.func.AsList( + # coalesce is needed to un-Nullable the type. + sa.func.COALESCE(sa.cast(value, ydb_sa.types.UInt32), 0), + ) + ), + ), + ), + ] + ), + # concat + base.Concat1.for_dialect((D.YQL)), + base.ConcatMultiStrConst.for_dialect(D.YQL), + base.ConcatMultiStr( + variants=[ + V( + D.YQL, + lambda *args: make_binary_chain( + (lambda x, y: x.concat(y)), # should result in `x || y` SQL. + *args, # should result in `x || y || z || ...` SQL + wrap_as_nodes=False, + ), + ), + ] + ), + base.ConcatMultiAny.for_dialect(D.YQL), + # contains + base.FuncContainsConst( + variants=[ + # V(D.YQL, + # # # “'%', '_' and '\' are currently not supported in ESCAPE clause,” + # lambda x, y: x.like('%{}%'.format(quote_like(y.value, escape='!')), escape='!')), + # # Allows UTF8; also, notably, does not allow a nullable second argument: + V(D.YQL, sa.func.String.Contains), + ] + ), + base.FuncContainsNonConst( + variants=[ + # `''` shouldn't be ever used due to `ifnotnull`. + V(D.YQL, lambda x, y: ifnotnull(y, sa.func.String.Contains(x, sa.func.COALESCE(y, "")))), + ] + ), + base.FuncContainsNonString.for_dialect(D.YQL), + # notcontains + base.FuncNotContainsConst.for_dialect(D.YQL), + base.FuncNotContainsNonConst.for_dialect(D.YQL), + base.FuncNotContainsNonString.for_dialect(D.YQL), + # endswith + base.FuncEndswithConst( + variants=[ + V(D.YQL, sa.func.String.EndsWith), + ] + ), + base.FuncEndswithNonConst( + variants=[ + # `''` shouldn't ever happen due to `ifnotnull`. + V(D.YQL, lambda x, y: ifnotnull(y, sa.func.String.EndsWith(x, sa.func.COALESCE(y, "")))), + ] + ), + base.FuncEndswithNonString.for_dialect(D.YQL), + # find + base.FuncFind2( + variants=[ + # In YQL indices start from 0, but we count them from 1, so have to do -1/+1 here + V(D.YQL, lambda text, piece: sa.func.COALESCE(sa.func.Unicode.Find(text, piece), -1) + 1), + ] + ), + base.FuncFind3( + variants=[ + # In YQL indices start from 0, but we count them from 1, so have to do -1/+1 here + V( + D.YQL, + lambda text, piece, startpos: sa.func.COALESCE(sa.func.Unicode.Find(text, piece, startpos), -1) + 1, + ), + ] + ), + # icontains + base.FuncIContainsNonConst.for_dialect(D.YQL), + base.FuncIContainsNonString.for_dialect(D.YQL), + # iendswith + base.FuncIEndswithNonConst.for_dialect(D.YQL), + base.FuncIEndswithNonString.for_dialect(D.YQL), + # istartswith + base.FuncIStartswithNonConst.for_dialect(D.YQL), + base.FuncIStartswithNonString.for_dialect(D.YQL), + # left + base.FuncLeft( + variants=[ + V(D.YQL, lambda x, y: sa.func.Unicode.Substring(sa.cast(x, sa.TEXT), 0, y)), + ] + ), + # len + base.FuncLenString( + variants=[ + V(D.YQL, lambda val: sa.func.Unicode.GetLength(sa.cast(val, sa.TEXT))), + ] + ), + # lower + base.FuncLowerConst.for_dialect(D.YQL), + base.FuncLowerNonConst( + variants=[ + V(D.YQL, lambda val: sa.func.Unicode.ToLower(sa.cast(val, sa.TEXT))), + ] + ), + # regexp_extract + # TODO: YQL + # https://ydb.tech/en/docs/yql/reference/udf/list/hyperscan + # Problem: "By default, all functions work in the single-byte mode. + # However, if the regular expression is a valid UTF-8 string but is not a valid ASCII string, + # the UTF-8 mode is enabled automatically." However, we can't use higher-order functions yet. + # replace + base.FuncReplace( + variants=[ + V( + D.YQL, + lambda val, repl_from, repl_with: sa.func.Unicode.ReplaceAll( + sa.cast(val, sa.TEXT), + sa.func.COALESCE(sa.cast(repl_from, sa.TEXT), ""), + sa.func.COALESCE(sa.cast(repl_with, sa.TEXT), ""), + ), + ), + ] + ), + # right + base.FuncRight( + variants=[ + V( + D.YQL, + lambda x, y: sa.func.Unicode.Substring( + sa.cast(x, sa.TEXT), + sa.func.Unicode.GetLength(sa.cast(x, sa.TEXT)) - y, + ), + ), + ] + ), + # space + base.FuncSpaceConst.for_dialect(D.YQL), + base.FuncSpaceNonConst( + variants=[ + # YQL string multiplication: also consider + # sa.func.ListConcat(sa.func.ListReplicate(sa.cast(' ', sa.TEXT), size)) + V(D.YQL, lambda size: sa.cast(sa.func.String.LeftPad("", size, " "), sa.TEXT)), + ] + ), + # split + base.FuncSplit3( + variants=[ + V( + D.YQL, + lambda text, delim, ind: sa.func.ListHead( + sa.func.ListSkip( + sa.func.Unicode.SplitToList(sa.cast(text, sa.TEXT), delim), # must be non-nullable + ind - 1, + ) + ), + ), + ] + ), + # startswith + base.FuncStartswithConst( + variants=[ + V(D.YQL, sa.func.String.StartsWith), + ] + ), + base.FuncStartswithNonConst( + variants=[ + # `''` shouldn't ever happen due to `ifnotnull`. + V(D.YQL, lambda x, y: ifnotnull(y, sa.func.String.StartsWith(x, sa.func.COALESCE(y, "")))), + ] + ), + base.FuncStartswithNonString.for_dialect(D.YQL), + # substr + base.FuncSubstr2( + variants=[ + # In YQL indices start from 0, but we count them from 1, so have to do -1 here + V(D.YQL, lambda val, start: sa.func.Unicode.Substring(sa.cast(val, sa.TEXT), start - 1)), + ] + ), + base.FuncSubstr3( + variants=[ + # In YQL indices start from 0, but we count them from 1, so have to do -1 here + V(D.YQL, lambda val, start, length: sa.func.Unicode.Substring(sa.cast(val, sa.TEXT), start - 1, length)), + ] + ), + # upper + base.FuncUpperConst.for_dialect(D.YQL), + base.FuncUpperNonConst( + variants=[ + V(D.YQL, lambda val: sa.func.Unicode.ToUpper(sa.cast(val, sa.TEXT))), + ] + ), +] diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_type.py b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_type.py new file mode 100644 index 000000000..7402a16b8 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/functions_type.py @@ -0,0 +1,185 @@ +import sqlalchemy as sa + +from dl_formula.definitions.base import TranslationVariant +import dl_formula.definitions.functions_type as base + +from dl_connector_ydb.formula.constants import YqlDialect as D + + +V = TranslationVariant.make + + +DEFINITIONS_TYPE = [ + # bool + base.FuncBoolFromNull.for_dialect(D.YQL), + base.FuncBoolFromNumber.for_dialect(D.YQL), + base.FuncBoolFromBool.for_dialect(D.YQL), + base.FuncBoolFromStrGeo.for_dialect(D.YQL), + base.FuncBoolFromDateDatetime.for_dialect(D.YQL), + # date + base.FuncDate1FromNull.for_dialect(D.YQL), + base.FuncDate1FromDatetime.for_dialect(D.YQL), + base.FuncDate1FromString.for_dialect(D.YQL), + base.FuncDate1FromNumber( + variants=[ + V( + D.YQL, lambda expr: sa.cast(sa.cast(sa.cast(expr, sa.BIGINT), sa.DATETIME), sa.DATE) + ), # number -> dt -> date + ] + ), + # datetime + base.FuncDatetime1FromNull.for_dialect(D.YQL), + base.FuncDatetime1FromDatetime.for_dialect(D.YQL), + base.FuncDatetime1FromDate.for_dialect(D.YQL), + base.FuncDatetime1FromNumber( + variants=[ + V(D.YQL, lambda expr: sa.cast(sa.cast(expr, sa.BIGINT), sa.DateTime())), + ] + ), + base.FuncDatetime1FromString( + variants=[ + # e.g. `DateTime::MakeDatetime(DateTime::ParseIso8601('2021-06-01 18:00:59')) as c` + V(D.YQL, lambda expr: sa.func.DateTime.MakeDatetime(sa.func.DateTime.ParseIso8601(expr))), + ] + ), + # datetimetz + base.FuncDatetimeTZConst.for_dialect(D.YQL), + # float + base.FuncFloatNumber( + variants=[ + V(D.YQL, lambda value: sa.cast(value, sa.FLOAT)), # TODO: need it to become SQL `CAST(… AS DOUBLE)`. + ] + ), + base.FuncFloatString( + variants=[ + V(D.YQL, lambda value: sa.cast(value, sa.FLOAT)), + ] + ), + base.FuncFloatFromBool( + variants=[ + V(D.YQL, lambda value: sa.cast(value, sa.FLOAT)), + ] + ), + base.FuncFloatFromDate( + variants=[ + V(D.YQL, lambda expr: sa.cast(sa.cast(expr, sa.DATETIME), sa.FLOAT)), # date -> dt -> number + ] + ), + base.FuncFloatFromDatetime( + variants=[ + V(D.YQL, lambda expr: sa.cast(expr, sa.FLOAT)), + ] + ), + base.FuncFloatFromGenericDatetime( + variants=[ + V(D.YQL, lambda expr: sa.cast(expr, sa.FLOAT)), + ] + ), + # genericdatetime + base.FuncGenericDatetime1FromNull.for_dialect(D.YQL), + base.FuncGenericDatetime1FromDatetime.for_dialect(D.YQL), + base.FuncGenericDatetime1FromDate.for_dialect(D.YQL), + base.FuncGenericDatetime1FromNumber( + variants=[ + V(D.YQL, lambda expr: sa.cast(sa.cast(expr, sa.BIGINT), sa.DateTime())), + ] + ), + base.FuncGenericDatetime1FromString( + variants=[ + # e.g. `DateTime::MakeDatetime(DateTime::ParseIso8601('2021-06-01 18:00:59')) as c` + V(D.YQL, lambda expr: sa.func.DateTime.MakeDatetime(sa.func.DateTime.ParseIso8601(expr))), + ] + ), + # geopoint + base.FuncGeopointFromStr.for_dialect(D.YQL), + base.FuncGeopointFromCoords.for_dialect(D.YQL), + # geopolygon + base.FuncGeopolygon.for_dialect(D.YQL), + # int + base.FuncIntFromNull( + variants=[ + V(D.YQL, lambda _: sa.cast(sa.null(), sa.BIGINT())), + ] + ), + base.FuncIntFromInt.for_dialect(D.YQL), + base.FuncIntFromFloat( + variants=[ + V(D.YQL, lambda value: sa.cast(value, sa.BIGINT())), + ] + ), + base.FuncIntFromBool( + variants=[ + V(D.YQL, lambda value: sa.cast(value, sa.BIGINT)), + ] + ), + base.FuncIntFromStr( + variants=[ + V(D.YQL, lambda expr: sa.func.cast(expr, sa.BIGINT)), + ] + ), + base.FuncIntFromDate( + variants=[ + V(D.YQL, lambda expr: sa.cast(sa.cast(expr, sa.DATETIME), sa.BIGINT)), + ] + ), + base.FuncIntFromDatetime( + variants=[ + V(D.YQL, lambda expr: sa.cast(expr, sa.BIGINT)), + ] + ), + base.FuncIntFromGenericDatetime( + variants=[ + V(D.YQL, lambda expr: sa.cast(expr, sa.BIGINT)), + ] + ), + # str + base.FuncStrFromNull( + variants=[ + V(D.YQL, lambda value: sa.cast(sa.null(), sa.TEXT)), + ] + ), + base.FuncStrFromUnsupported( + variants=[ + # YQL: uncertain. + # Does not work for e.g. arrays: + # V(D.YQL, lambda value: sa.cast(value, sa.TEXT)), + # Does not work for e.g. Decimal: + V( + D.YQL, + lambda value: sa.cast(sa.func.ToBytes(sa.func.Yson.SerializePretty(sa.func.Yson.From(value))), sa.TEXT), + ), + ] + ), + base.FuncStrFromInteger( + variants=[ + V(D.YQL, lambda value: sa.cast(value, sa.TEXT)), + ] + ), + base.FuncStrFromFloat( + variants=[ + V(D.YQL, lambda value: sa.cast(value, sa.TEXT)), + ] + ), + base.FuncStrFromBool( + variants=[ + V(D.YQL, lambda value: sa.case(whens=[(value.is_(None), sa.null()), (value, "True")], else_="False")), + ] + ), + base.FuncStrFromStrGeo.for_dialect(D.YQL), + base.FuncStrFromDate( + variants=[ + V(D.YQL, lambda value: sa.cast(value, sa.TEXT)), + ] + ), + base.FuncStrFromDatetime( + variants=[ + V(D.YQL, lambda value: sa.cast(value, sa.TEXT)), # results in e.g. "2021-06-01T15:20:24Z" + ] + ), + base.FuncStrFromString.for_dialect(D.YQL), + base.FuncStrFromUUID( + variants=[ + V(D.YQL, lambda value: sa.cast(value, sa.TEXT)), + ] + ), +] diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/operators_binary.py b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/operators_binary.py new file mode 100644 index 000000000..3a3ab1a96 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/operators_binary.py @@ -0,0 +1,147 @@ +import sqlalchemy as sa + +from dl_formula.definitions.base import TranslationVariant +from dl_formula.definitions.common_datetime import DAY_USEC +import dl_formula.definitions.operators_binary as base + +from dl_connector_ydb.formula.constants import YqlDialect as D + + +V = TranslationVariant.make + + +DEFINITIONS_BINARY = [ + # != + base.BinaryNotEqual.for_dialect(D.YQL), + # % + base.BinaryModInteger.for_dialect(D.YQL), + base.BinaryModFloat.for_dialect(D.YQL), + # * + base.BinaryMultNumbers.for_dialect(D.YQL), + base.BinaryMultStringConst.for_dialect(D.YQL), + # + + base.BinaryPlusNumbers.for_dialect(D.YQL), + base.BinaryPlusStrings.for_dialect(D.YQL), + base.BinaryPlusDateInt( + variants=[ + V(D.YQL, lambda date, days: date + sa.func.DateTime.IntervalFromDays(days)), + ] + ), + base.BinaryPlusDateFloat( + variants=[ + V(D.YQL, lambda date, days: date + sa.func.DateTime.IntervalFromDays(sa.cast(days, sa.INTEGER))), + ] + ), + base.BinaryPlusDatetimeNumber( + variants=[ + V( + D.YQL, + lambda date, days: (date + sa.func.DateTime.IntervalFromMicroseconds(base.as_bigint(days * DAY_USEC))), + ), + ] + ), + base.BinaryPlusGenericDatetimeNumber( + variants=[ + V( + D.YQL, + lambda dt, days: (dt + sa.func.DateTime.IntervalFromMicroseconds(base.as_bigint(days * DAY_USEC))), + ), + ] + ), + # - + base.BinaryMinusInts( + variants=[ + V(D.YQL, lambda num1, num2: (sa.cast(num1, sa.INTEGER) - sa.cast(num2, sa.INTEGER))), + ] + ), + base.BinaryMinusNumbers.for_dialect(D.YQL), + base.BinaryMinusDateInt( + variants=[ + V(D.YQL, lambda date, days: date - sa.func.DateTime.IntervalFromDays(days)), + ] + ), + base.BinaryMinusDateFloat( + variants=[ + V( + D.YQL, + lambda date, days: ( + date - sa.func.DateTime.IntervalFromDays(sa.cast(sa.func.Math.Ceil(days), sa.INTEGER)) + ), + ), + ] + ), + base.BinaryMinusDatetimeNumber( + variants=[ + V( + D.YQL, + lambda date, days: (date - sa.func.DateTime.IntervalFromMicroseconds(base.as_bigint(days * DAY_USEC))), + ), + ] + ), + base.BinaryMinusGenericDatetimeNumber( + variants=[ + V( + D.YQL, + lambda dt, days: (dt - sa.func.DateTime.IntervalFromMicroseconds(base.as_bigint(days * DAY_USEC))), + ), + ] + ), + base.BinaryMinusDates( + variants=[ + V(D.YQL, lambda left, right: sa.func.DateTime.ToDays(left - right)), + ] + ), + base.BinaryMinusDatetimes( + variants=[ + V(D.YQL, lambda left, right: sa.func.DateTime.ToMicroseconds(left - right) / float(DAY_USEC)), + ] + ), + base.BinaryMinusGenericDatetimes( + variants=[ + V(D.YQL, lambda left, right: sa.func.DateTime.ToMicroseconds(left - right) / float(DAY_USEC)), + ] + ), + # / + base.BinaryDivInt( + variants=[ + # See also: https://ydb.tech/en/docs/yql/reference/syntax/pragma#classicdivision + V(D.YQL, lambda x, y: sa.cast(x, sa.FLOAT) / y), + ] + ), + base.BinaryDivFloat.for_dialect(D.YQL), + # < + base.BinaryLessThan.for_dialect(D.YQL), + # <= + base.BinaryLessThanOrEqual.for_dialect(D.YQL), + # == + base.BinaryEqual.for_dialect(D.YQL), + # > + base.BinaryGreaterThan.for_dialect(D.YQL), + # >= + base.BinaryGreaterThanOrEqual.for_dialect(D.YQL), + # ^ + base.BinaryPower.for_dialect(D.YQL), + # _!= + base.BinaryNotEqualInternal.for_dialect(D.YQL), + # _== + base.BinaryEqualInternal.for_dialect(D.YQL), + # _dneq + base.BinaryEqualDenullified( + variants=[ + # YQL does not support ISNULL and other complex operations in JOIN conditions + V(D.YQL, lambda left, right: left == right), # type: ignore + ] + ), + # and + base.BinaryAnd.for_dialect(D.YQL), + # in + base.BinaryIn.for_dialect(D.YQL), + # like + base.BinaryLike.for_dialect(D.YQL), + # notin + base.BinaryNotIn.for_dialect(D.YQL), + # notlike + base.BinaryNotLike.for_dialect(D.YQL), + # or + base.BinaryOr.for_dialect(D.YQL), +] diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/operators_ternary.py b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/operators_ternary.py new file mode 100644 index 000000000..ac01ba047 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/operators_ternary.py @@ -0,0 +1,11 @@ +import dl_formula.definitions.operators_ternary as base + +from dl_connector_ydb.formula.constants import YqlDialect as D + + +DEFINITIONS_TERNARY = [ + # between + base.TernaryBetween.for_dialect(D.YQL), + # notbetween + base.TernaryNotBetween.for_dialect(D.YQL), +] diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/operators_unary.py b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/operators_unary.py new file mode 100644 index 000000000..ea2090e7b --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/formula/definitions/operators_unary.py @@ -0,0 +1,36 @@ +from dl_formula.definitions.base import TranslationVariant +import dl_formula.definitions.operators_unary as base + +from dl_connector_ydb.formula.constants import YqlDialect as D + + +V = TranslationVariant.make + + +DEFINITIONS_UNARY = [ + # isfalse + base.UnaryIsFalseStringGeo.for_dialect(D.YQL), + base.UnaryIsFalseNumbers.for_dialect(D.YQL), + base.UnaryIsFalseDateTime.for_dialect(D.YQL), + base.UnaryIsFalseBoolean( + variants=[ + V(D.YQL, lambda x: x == False), # noqa: E712 + ] + ), + # istrue + base.UnaryIsTrueStringGeo.for_dialect(D.YQL), + base.UnaryIsTrueNumbers.for_dialect(D.YQL), + base.UnaryIsTrueDateTime.for_dialect(D.YQL), + base.UnaryIsTrueBoolean( + variants=[ + V(D.YQL, lambda x: x == True), # noqa: E712 + ] + ), + # neg + base.UnaryNegate.for_dialect(D.YQL), + # not + base.UnaryNotBool.for_dialect(D.YQL), + base.UnaryNotNumbers.for_dialect(D.YQL), + base.UnaryNotStringGeo.for_dialect(D.YQL), + base.UnaryNotDateDatetime.for_dialect(D.YQL), +] diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula_ref/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb/formula_ref/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula_ref/human_dialects.py b/lib/dl_connector_ydb/dl_connector_ydb/formula_ref/human_dialects.py new file mode 100644 index 000000000..f3b08485a --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/formula_ref/human_dialects.py @@ -0,0 +1,22 @@ +from dl_formula_ref.texts import StyledDialect + +from dl_connector_ydb.formula.constants import YqlDialect + + +HUMAN_DIALECTS = { + YqlDialect.YDB: StyledDialect( + "`YDB`", + "`YDB`
(`YQL`)", + "`YDB` (`YQL`)", + ), + YqlDialect.YQ: StyledDialect( + "`YQ`", + "`YQ`", + "`YQ`", + ), + YqlDialect.YQL: StyledDialect( + "`YQL`", + "`YQL`", + "`YQL`", + ), +} diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula_ref/i18n.py b/lib/dl_connector_ydb/dl_connector_ydb/formula_ref/i18n.py new file mode 100644 index 000000000..b8a6d8028 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/formula_ref/i18n.py @@ -0,0 +1,23 @@ +import os + +import attr + +from dl_i18n.localizer_base import Translatable as BaseTranslatable +from dl_i18n.localizer_base import TranslationConfig + +import dl_connector_ydb as package + + +DOMAIN = f"dl_formula_ref_{package.__name__}" + +_LOCALE_DIR = os.path.join(os.path.dirname(__file__), "..", "locales") + +CONFIGS = [ + TranslationConfig(path=_LOCALE_DIR, domain=DOMAIN, locale="en"), + TranslationConfig(path=_LOCALE_DIR, domain=DOMAIN, locale="ru"), +] + + +@attr.s +class Translatable(BaseTranslatable): + domain: str = attr.ib(default=DOMAIN) diff --git a/lib/dl_connector_ydb/dl_connector_ydb/formula_ref/plugin.py b/lib/dl_connector_ydb/dl_connector_ydb/formula_ref/plugin.py new file mode 100644 index 000000000..f857c19de --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/formula_ref/plugin.py @@ -0,0 +1,22 @@ +from dl_formula_ref.functions.date import FUNCTION_NOW +from dl_formula_ref.plugins.base.plugin import FormulaRefPlugin +from dl_formula_ref.registry.note import Note + +from dl_connector_ydb.formula.constants import YqlDialect +from dl_connector_ydb.formula_ref.human_dialects import HUMAN_DIALECTS +from dl_connector_ydb.formula_ref.i18n import ( + CONFIGS, + Translatable, +) + + +class YQLFormulaRefPlugin(FormulaRefPlugin): + any_dialects = frozenset((*YqlDialect.YDB.to_list(),)) + human_dialects = HUMAN_DIALECTS + translation_configs = frozenset(CONFIGS) + function_extensions = [ + FUNCTION_NOW.extend( + dialect=YqlDialect.YDB, + notes=(Note(Translatable("On {dialects:YQL}, the function always returns the UTC date and time.")),), + ), + ] diff --git a/lib/dl_connector_ydb/dl_connector_ydb/locales/en/LC_MESSAGES/dl_connector_ydb.mo b/lib/dl_connector_ydb/dl_connector_ydb/locales/en/LC_MESSAGES/dl_connector_ydb.mo new file mode 100644 index 000000000..5add0edcb Binary files /dev/null and b/lib/dl_connector_ydb/dl_connector_ydb/locales/en/LC_MESSAGES/dl_connector_ydb.mo differ diff --git a/lib/dl_connector_ydb/dl_connector_ydb/locales/en/LC_MESSAGES/dl_connector_ydb.po b/lib/dl_connector_ydb/dl_connector_ydb/locales/en/LC_MESSAGES/dl_connector_ydb.po new file mode 100644 index 000000000..d9968f043 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/locales/en/LC_MESSAGES/dl_connector_ydb.po @@ -0,0 +1,19 @@ +# Copyright (c) 2023 YANDEX LLC +# This file is distributed under the same license as the DataLens package. +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: datalens-opensource@yandex-team.ru\n" +"POT-Creation-Date: 2023-09-22 08:16+0000\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "label_connector-ydb" +msgstr "YDB" + +msgid "source_templates-tab_title-table" +msgstr "Table" + +msgid "source_templates-label-ydb_table" +msgstr "YDB table path" diff --git a/lib/dl_connector_ydb/dl_connector_ydb/locales/en/LC_MESSAGES/dl_formula_ref_dl_connector_ydb.mo b/lib/dl_connector_ydb/dl_connector_ydb/locales/en/LC_MESSAGES/dl_formula_ref_dl_connector_ydb.mo new file mode 100644 index 000000000..00261bf60 Binary files /dev/null and b/lib/dl_connector_ydb/dl_connector_ydb/locales/en/LC_MESSAGES/dl_formula_ref_dl_connector_ydb.mo differ diff --git a/lib/dl_connector_ydb/dl_connector_ydb/locales/en/LC_MESSAGES/dl_formula_ref_dl_connector_ydb.po b/lib/dl_connector_ydb/dl_connector_ydb/locales/en/LC_MESSAGES/dl_formula_ref_dl_connector_ydb.po new file mode 100644 index 000000000..90b0c3a48 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/locales/en/LC_MESSAGES/dl_formula_ref_dl_connector_ydb.po @@ -0,0 +1,13 @@ +# Copyright (c) 2023 YANDEX LLC +# This file is distributed under the same license as the DataLens package. +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: datalens-opensource@yandex-team.ru\n" +"POT-Creation-Date: 2023-09-22 08:16+0000\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "On {dialects:YQL}, the function always returns the UTC date and time." +msgstr "" diff --git a/lib/dl_connector_ydb/dl_connector_ydb/locales/ru/LC_MESSAGES/dl_connector_ydb.mo b/lib/dl_connector_ydb/dl_connector_ydb/locales/ru/LC_MESSAGES/dl_connector_ydb.mo new file mode 100644 index 000000000..2b3674e77 Binary files /dev/null and b/lib/dl_connector_ydb/dl_connector_ydb/locales/ru/LC_MESSAGES/dl_connector_ydb.mo differ diff --git a/lib/dl_connector_ydb/dl_connector_ydb/locales/ru/LC_MESSAGES/dl_connector_ydb.po b/lib/dl_connector_ydb/dl_connector_ydb/locales/ru/LC_MESSAGES/dl_connector_ydb.po new file mode 100644 index 000000000..79eb7d17f --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/locales/ru/LC_MESSAGES/dl_connector_ydb.po @@ -0,0 +1,19 @@ +# Copyright (c) 2023 YANDEX LLC +# This file is distributed under the same license as the DataLens package. +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: datalens-opensource@yandex-team.ru\n" +"POT-Creation-Date: 2023-09-22 08:16+0000\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "label_connector-ydb" +msgstr "YDB" + +msgid "source_templates-tab_title-table" +msgstr "Таблица" + +msgid "source_templates-label-ydb_table" +msgstr "Путь к таблице в YDB" diff --git a/lib/dl_connector_ydb/dl_connector_ydb/locales/ru/LC_MESSAGES/dl_formula_ref_bi_connector_ydb.mo b/lib/dl_connector_ydb/dl_connector_ydb/locales/ru/LC_MESSAGES/dl_formula_ref_bi_connector_ydb.mo new file mode 100644 index 000000000..9f0eac8e1 Binary files /dev/null and b/lib/dl_connector_ydb/dl_connector_ydb/locales/ru/LC_MESSAGES/dl_formula_ref_bi_connector_ydb.mo differ diff --git a/lib/dl_connector_ydb/dl_connector_ydb/locales/ru/LC_MESSAGES/dl_formula_ref_bi_connector_ydb.po b/lib/dl_connector_ydb/dl_connector_ydb/locales/ru/LC_MESSAGES/dl_formula_ref_bi_connector_ydb.po new file mode 100644 index 000000000..2757f6bf2 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb/locales/ru/LC_MESSAGES/dl_formula_ref_bi_connector_ydb.po @@ -0,0 +1,13 @@ +# Copyright (c) 2023 YANDEX LLC +# This file is distributed under the same license as the DataLens package. +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: datalens-opensource@yandex-team.ru\n" +"POT-Creation-Date: 2023-09-22 08:16+0000\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "On {dialects:YQL}, the function always returns the UTC date and time." +msgstr "В {dialects:YQL} функция всегда возвращает дату и время в зоне UTC." diff --git a/lib/dl_connector_ydb/dl_connector_ydb/py.typed b/lib/dl_connector_ydb/dl_connector_ydb/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/base.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/base.py new file mode 100644 index 000000000..0a5debfbc --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/base.py @@ -0,0 +1,90 @@ +import pytest + +from dl_api_lib_testing.configuration import ApiTestEnvironmentConfiguration +from dl_api_lib_testing.connection_base import ConnectionTestBase +from dl_api_lib_testing.data_api_base import ( + DataApiTestParams, + StandardizedDataApiTestBase, +) +from dl_api_lib_testing.dataset_base import DatasetTestBase +from dl_constants.enums import RawSQLLevel +from dl_core_testing.database import ( + C, + Db, + DbTable, + make_table, +) + +from dl_connector_ydb.core.ydb.constants import ( + CONNECTION_TYPE_YDB, + SOURCE_TYPE_YDB_TABLE, +) +from dl_connector_ydb_tests.db.config import ( + API_TEST_CONFIG, + CONNECTION_PARAMS, + DB_CORE_URL, + TABLE_DATA, + TABLE_NAME, + TABLE_SCHEMA, +) + + +class YDBConnectionTestBase(ConnectionTestBase): + bi_compeng_pg_on = False + conn_type = CONNECTION_TYPE_YDB + + @pytest.fixture(scope="class") + def db_url(self) -> str: + return DB_CORE_URL + + @pytest.fixture(scope="class") + def bi_test_config(self) -> ApiTestEnvironmentConfiguration: + return API_TEST_CONFIG + + @pytest.fixture(scope="class") + def connection_params(self) -> dict: + return CONNECTION_PARAMS + + @pytest.fixture(scope="class") + def sample_table(self, db: Db) -> DbTable: + db_table = make_table( + db=db, + name=TABLE_NAME, + columns=[C(name=name, user_type=user_type, sa_type=sa_type) for name, user_type, sa_type in TABLE_SCHEMA], + data=[], # to avoid producing a sample data + create_in_db=False, + ) + db.create_table(db_table.table) + db.insert_into_table(db_table.table, TABLE_DATA) + return db_table + + +class YDBDashSQLConnectionTest(YDBConnectionTestBase): + @pytest.fixture(scope="class") + def connection_params(self) -> dict: + return CONNECTION_PARAMS | dict(raw_sql_level=RawSQLLevel.dashsql.value) + + +class YDBDatasetTestBase(YDBConnectionTestBase, DatasetTestBase): + @pytest.fixture(scope="class") + def dataset_params(self, sample_table: DbTable) -> dict: + return dict( + source_type=SOURCE_TYPE_YDB_TABLE.name, + parameters=dict( + table_name=sample_table.name, + ), + ) + + +class YDBDataApiTestBase(YDBDatasetTestBase, StandardizedDataApiTestBase): + mutation_caches_on = False + + @pytest.fixture(scope="class") + def data_api_test_params(self) -> DataApiTestParams: + return DataApiTestParams( + two_dims=("some_string", "some_int32"), + summable_field="some_int32", + range_field="some_int64", + distinct_field="id", + date_field="some_date", + ) diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/test_connection.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/test_connection.py new file mode 100644 index 000000000..89221454b --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/test_connection.py @@ -0,0 +1,7 @@ +from dl_api_lib_testing.connector.connection_suite import DefaultConnectorConnectionTestSuite + +from dl_connector_ydb_tests.db.api.base import YDBConnectionTestBase + + +class TestYDBConnection(YDBConnectionTestBase, DefaultConnectorConnectionTestSuite): + pass diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/test_dashsql.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/test_dashsql.py new file mode 100644 index 000000000..5cf29077e --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/test_dashsql.py @@ -0,0 +1,98 @@ +from aiohttp.test_utils import TestClient +import pytest + +from dl_api_lib_testing.connector.dashsql_suite import DefaultDashSQLTestSuite +from dl_core_testing.database import DbTable + +from dl_connector_ydb_tests.db.api.base import YDBDashSQLConnectionTest +from dl_connector_ydb_tests.db.config import DASHSQL_QUERY + + +class TestYDBDashSQL(YDBDashSQLConnectionTest, DefaultDashSQLTestSuite): + @pytest.mark.asyncio + async def test_result( + self, + data_api_lowlevel_aiohttp_client: TestClient, + saved_connection_id: str, + sample_table: DbTable, + ): + resp = await self.get_dashsql_response( + data_api_aio=data_api_lowlevel_aiohttp_client, + conn_id=saved_connection_id, + query=DASHSQL_QUERY.format(table_name=sample_table.name), + ) + + resp_data = await resp.json() + assert resp_data[0]["event"] == "metadata", resp_data + assert resp_data[0]["data"]["names"][:12] == [ + "id", + "some_str", + "some_utf8", + "some_int", + "some_uint8", + "some_int64", + "some_uint64", + "some_double", + "some_bool", + "some_date", + "some_datetime", + "some_timestamp", + ] + assert resp_data[0]["data"]["driver_types"][:12] == [ + "int32?", + "string", + "utf8?", + "int32", + "uint8?", + "int64", + "uint64", + "double", + "bool", + "date", + "datetime", + "timestamp", + ] + assert resp_data[0]["data"]["db_types"][:12] == [ + "integer", + "text", + "text", + "integer", + "integer", + "integer", + "integer", + "float", + "boolean", + "date", + "datetime", + "datetime", + ] + assert resp_data[0]["data"]["bi_types"][:12] == [ + "integer", + "string", + "string", + "integer", + "integer", + "integer", + "integer", + "float", + "boolean", + "date", + "genericdatetime", + "genericdatetime", + ] + + assert resp_data[-1]["event"] == "footer", resp_data[-1] + + @pytest.mark.asyncio + async def test_result_with_error(self, data_api_lowlevel_aiohttp_client: TestClient, saved_connection_id: str): + resp = await self.get_dashsql_response( + data_api_aio=data_api_lowlevel_aiohttp_client, + conn_id=saved_connection_id, + query="select 1/", + fail_ok=True, + ) + + resp_data = await resp.json() + assert resp.status == 400, resp_data + assert resp_data["code"] == "ERR.DS_API.DB", resp_data + assert resp_data.get("details", {}).get("db_message"), resp_data diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/test_dataset.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/test_dataset.py new file mode 100644 index 000000000..7756b270b --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/test_dataset.py @@ -0,0 +1,12 @@ +from dl_api_client.dsmaker.primitives import Dataset +from dl_api_lib_testing.connector.dataset_suite import DefaultConnectorDatasetTestSuite + +from dl_connector_ydb_tests.db.api.base import YDBDatasetTestBase +from dl_connector_ydb_tests.db.config import TABLE_SCHEMA + + +class TestYDBDataset(YDBDatasetTestBase, DefaultConnectorDatasetTestSuite): + def check_basic_dataset(self, ds: Dataset) -> None: + assert ds.id + field_names = {field.title for field in ds.result_schema} + assert field_names == {column[0] for column in TABLE_SCHEMA} diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/config.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/config.py new file mode 100644 index 000000000..92825577a --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/config.py @@ -0,0 +1,340 @@ +import sqlalchemy as sa + +from dl_api_lib_testing.configuration import ApiTestEnvironmentConfiguration +from dl_constants.enums import UserDataType +from dl_core_testing.configuration import DefaultCoreTestConfiguration +from dl_testing.containers import get_test_container_hostport + +from dl_connector_ydb.formula.constants import YqlDialect as D + + +# Infra settings +CORE_TEST_CONFIG = DefaultCoreTestConfiguration( + host_us_http=get_test_container_hostport("us", fallback_port=51911).host, + port_us_http=get_test_container_hostport("us", fallback_port=51911).port, + host_us_pg=get_test_container_hostport("pg-us", fallback_port=51910).host, + port_us_pg_5432=get_test_container_hostport("pg-us", fallback_port=51910).port, + us_master_token="AC1ofiek8coB", + core_connector_ep_names=["ydb"], +) + +_DB_URL = f'yql:///?endpoint={get_test_container_hostport("db-ydb", fallback_port=51900).host}%3A{get_test_container_hostport("db-ydb", fallback_port=51900).port}&database=%2Flocal' +DB_CORE_URL = _DB_URL +DB_CONFIGURATIONS = { + D.YDB: _DB_URL, +} + +CONNECTION_PARAMS = dict( + host=get_test_container_hostport("db-ydb", fallback_port=51900).host, + port=get_test_container_hostport("db-ydb", fallback_port=51900).port, + db_name="/local", +) +TABLE_SCHEMA = ( + ("id", UserDataType.integer, sa.Integer), + ("some_int32", UserDataType.integer, sa.Integer), + ("some_int64", UserDataType.integer, sa.BigInteger), + ("some_uint8", UserDataType.integer, sa.SmallInteger), + ("some_bool", UserDataType.boolean, sa.Boolean), + ("some_double", UserDataType.float, sa.Float), + ("some_string", UserDataType.string, sa.String), + ("some_utf8", UserDataType.string, sa.Unicode), + ("some_date", UserDataType.date, sa.Date), + ("some_datetime", UserDataType.genericdatetime, sa.DateTime), + ("some_timestamp", UserDataType.unsupported, sa.TIMESTAMP), +) +TABLE_DATA = [ + { + "id": 1, + "some_int32": 1073741824, + "some_int64": 4611686018427387904, + "some_uint8": 254, + "some_bool": True, + "some_double": None, + "some_string": None, + "some_utf8": None, + "some_date": None, + "some_datetime": None, + "some_timestamp": None, + }, + { + "id": 2, + "some_int32": -1, + "some_int64": -2, + "some_uint8": 3, + "some_bool": False, + "some_double": None, + "some_string": None, + "some_utf8": None, + "some_date": None, + "some_datetime": None, + "some_timestamp": None, + }, + { + "id": 3, + "some_int32": None, + "some_int64": None, + "some_uint8": None, + "some_bool": None, + "some_double": 79079710.35104989, + "some_string": None, + "some_utf8": None, + "some_date": None, + "some_datetime": None, + "some_timestamp": None, + }, + { + "id": 4, + "some_int32": None, + "some_int64": None, + "some_uint8": None, + "some_bool": None, + "some_double": 7.8, + "some_string": None, + "some_utf8": None, + "some_date": None, + "some_datetime": None, + "some_timestamp": None, + }, + { + "id": 5, + "some_int32": None, + "some_int64": None, + "some_uint8": None, + "some_bool": None, + "some_double": None, + "some_string": "ff0aff", + "some_utf8": "… «C≝⋯≅M»!", + "some_date": None, + "some_datetime": None, + "some_timestamp": None, + }, + { + "id": 6, + "some_int32": None, + "some_int64": None, + "some_uint8": None, + "some_bool": None, + "some_double": None, + "some_string": None, + "some_utf8": None, + "some_date": "2021-06-07", + "some_datetime": "2021-06-07T18:19:20Z", + "some_timestamp": "2021-06-07T18:19:20Z", + }, + { + "id": 7, + "some_int32": None, + "some_int64": None, + "some_uint8": None, + "some_bool": None, + "some_double": None, + "some_string": None, + "some_utf8": None, + "some_date": "1970-12-31", + "some_datetime": "1970-12-31T23:58:57Z", + "some_timestamp": "1970-12-31T23:58:57Z", + }, +] +TABLE_NAME = "test_table_h" + +DASHSQL_QUERY = r""" +select + id, + MAX('⋯') as some_str, + MAX(CAST('⋯' AS UTF8)) as some_utf8, + MAX(111) as some_int, + MAX(CAST(111 AS UInt8)) as some_uint8, + MAX(4398046511104) as some_int64, + MAX(18446744073709551606) as some_uint64, + MAX(1.11e-11) as some_double, + MAX(true) as some_bool, + MAX(Date('2021-06-09')) as some_date, + MAX(Datetime('2021-06-09T20:50:47Z')) as some_datetime, + MAX(Timestamp('2021-07-10T21:51:48.841512Z')) as some_timestamp, + + MAX(ListHead(ListSkip(Unicode::SplitToList(CAST(some_string AS UTF8), ''), 3))) as str_split, + MAX(ListConcat(ListReplicate(CAST(' ' AS UTF8), 5))) as num_space_by_lst, + MAX(CAST(String::LeftPad('', 5, ' ') AS UTF8)) as num_space, + MAX(Unicode::ReplaceAll(CAST(some_string AS UTF8), COALESCE(CAST('f' AS UTF8), ''), COALESCE(CAST(some_string AS UTF8), ''))) as str_replace, + MAX(Unicode::Substring(CAST(some_utf8 AS UTF8), 3, 3)) as utf8_tst, + MAX(Unicode::Substring(CAST(some_string AS UTF8), Unicode::GetLength(CAST(some_string AS UTF8)) - 3)) as str_right, + MAX(Unicode::Substring(CAST(some_utf8 AS UTF8), Unicode::GetLength(CAST(some_utf8 AS UTF8)) - 3)) as utf8_right, + MAX(Unicode::Substring(CAST(some_string AS UTF8), 0, 3)) as str_left, + MAX(Unicode::Substring(CAST(some_utf8 AS UTF8), 0, 3)) as utf8_left, + MAX(Unicode::Substring(some_utf8, CAST(String::Find(some_utf8, '≝') AS UInt64))) as utf8_find_substring_wrong, + MAX(String::StartsWith(some_utf8, '…')) as utf8_startswith_const, + MAX(String::EndsWith(some_utf8, '!')) as utf8_endswith_const, + MAX(Unicode::ToLower(CAST(some_string AS UTF8))) as str_lower, + MAX(Unicode::ToUpper(CAST(some_utf8 AS UTF8))) as utf8_upper, + MAX(Unicode::ToLower(CAST(some_utf8 AS UTF8))) as utf8_lower, + MAX(CASE WHEN some_string IS NULL THEN NULL ELSE String::Contains(some_utf8, COALESCE(some_string, '')) END) as utf8_contains_nonconst, + MIN(String::Contains(some_string, 'a')) as str_contains_const, + MIN(String::Contains(some_utf8, '!')) as utf8_contains_const, + MIN(some_string LIKE '%a%' ESCAPE '!') as str_contains_like_const, + MIN(some_utf8 LIKE '%!!%' ESCAPE '!') as utf8_contains_like_const, + MIN(some_utf8 || some_utf8) as utf8_concat, + MIN(CASE WHEN some_uint8 IS NULL THEN NULL ELSE Unicode::FromCodePointList(AsList(COALESCE(CAST(some_uint8 AS SMALLINT), 0))) END) as num_char, + MIN(ListHead(Unicode::ToCodePointList(Unicode::Substring(cast(some_utf8 as utf8), 0, 1)))) as utf8_ascii, + + MIN(some_string) as str_straight, + MIN(some_utf8) as utf8_straight, + MIN(some_uint8) as uint8_straight, + MIN(Math::Tan(some_double)) as dbl_tan, + MIN(Math::Pow(some_double, 2)) as dbl_square, + MIN(Math::Sqrt(some_double)) as dbl_sqrt, + MIN(Math::Sin(some_double)) as dbl_sin, + MIN(CASE WHEN some_double < 0 THEN -1 WHEN some_double > 0 THEN 1 ELSE 0 END) as dbl_sign, + MIN(Math::Round(some_double, -2)) as dbl_round2n, + MIN(Math::Round(some_double, 2)) as dbl_round2, + MIN(Math::Round(some_double)) as dbl_round, + MIN(CAST(some_int64 / some_uint8 AS BIGINT)) as int_int_div, + MIN(LEAST(some_uint8, some_int64)) as int_least, + MIN(GREATEST(some_uint8, some_int64)) as int_greatest, + MIN(Math::Log10(some_uint8)) as int_log10, + MIN(Math::Log(some_uint8)) as int_log, + MIN(Math::Exp(some_uint8)) as int_exp, + MIN(some_uint8 / Math::Pi() * 180.0) as int_degrees, + MAX(Math::Cos(some_double)) as dbl_cos, + MAX(Math::Floor(some_double)) as dbl_floor, + MAX(Math::Ceil(some_double)) as dbl_ceil, + MAX(some_double) as dbl_straight, + MIN(Math::Atan2(some_uint8, some_int32)) as int_atan2, + MIN(Math::Atan(some_uint8)) as int_atan, + MIN(Math::Asin(some_uint8)) as int_asin, + MIN(Math::Acos(some_uint8)) as int_acos, + MIN(COALESCE(some_uint8, some_int64)) as int_coalesce, + MIN_BY(some_int64, Math::Abs(some_int64)) as int_min_by, + MAX_BY(some_int64, Math::Abs(some_int64)) as int_max_by, + MAX(some_int64 IS NULL) as int_isnull, + COUNT(DISTINCT IF(some_int64 > -9999, some_int64, NULL)) as some_countd_if, + MAX(IF(some_bool, 1, 0)) as some_if, + MAX(some_datetime not between Datetime('2011-06-07T18:19:20Z') and Datetime('2031-06-07T18:19:20Z')) as dt_notbetween, + MAX(some_datetime between Datetime('2022-06-07T18:19:20Z') and Datetime('2031-06-07T18:19:20Z')) as dt_between_f, + MAX(some_datetime between Datetime('2011-06-07T18:19:20Z') and Datetime('2031-06-07T18:19:20Z')) as dt_between, + MAX(some_double in (1.0, NULL)) as dbl_in_null, + MIN(some_double not in (1.0, 2.2, 3579079710.351049881)) as dbl_notin, + MAX(some_double in (1.0, 2.2, 3579079710.351049881)) as dbl_in_f, + -- IN may produce unexpected result when used with nullable arguments. Consider adding 'PRAGMA AnsiInForEmptyOrNullableItemsCollections;' + MAX(some_double in (1.0, 2.2, 1579079710.351049881)) as dbl_in, + MAX(some_utf8 in ('a', 'b', 'C')) as text_in_f, + MAX(some_utf8 in ('a', 'b', '… «C≝⋯≅M»!')) as text_in, + SOME(some_bool or some_int64 is not null) as some_or, + MAX(some_bool and some_int64 is null) as some_and, + MAX(some_utf8 > some_string) as str_gt, + MAX(some_utf8 <= some_string) as str_lte, + MAX(some_double >= some_double) as dbl_gte, + MAX(some_double <= some_double) as dbl_lte, + MAX(some_double < some_double) as dbl_lt, + MAX(some_double != some_double) as dbl_neq, + MAX(some_double = some_double) as dbl_eq, + MAX(some_utf8 = some_utf8) as text_eq, + MAX(some_utf8 not like 'ы%') as text_not_like, + MAX(some_utf8 like 'ы%') as text_like_false, + MAX(some_utf8 like '%') as text_like, + MAX(some_string like 'ы%') as bytes_like_false, + MAX(some_string like '%') as bytes_like, + MAX(DateTime::ToMicroseconds( + some_datetime + - (some_datetime + DateTime::IntervalFromSeconds(12345)) + ) / 86400000000.0) as datetime_datetime_sub, + MAX(DateTime::ToDays( + some_date + - (some_date + DateTime::IntervalFromSeconds(1234567)) + )) as date_date_sub, + MAX(some_datetime - DateTime::IntervalFromMicroseconds(CAST(Math::Ceil(4.4 * 86400 * 1000000) AS INTEGER))) as datetime_sub, + MAX(some_datetime + DateTime::IntervalFromMicroseconds(CAST(4.4 * 86400 * 1000000 AS INTEGER))) as datetime_add, + MAX(some_date - DateTime::IntervalFromDays(CAST(Math::Ceil(4.4) AS INTEGER))) as date_subtract, + MAX(some_date + DateTime::IntervalFromDays(CAST(4.4 AS INTEGER))) as date_add, + MAX(some_int64 - some_uint8) as int_subtract, + MAX(some_int64 + some_uint8) as int_add, + MAX(some_double % (some_double / 3.456)) as dbl_mod, + MAX(some_int64 % some_uint8) as int_mod, + MAX(CAST(some_int64 / 10000 AS DOUBLE) / some_uint8) as int_div, + MIN(some_int64 * some_double) as num_mult, + MIN(Math::Pow(some_int64, 2)) as num_pow, + MAX(some_bool = FALSE) as bool_isfalse, + MAX(some_bool = TRUE) as bool_istrue, + MIN(some_int64 != 0.0) as num_istrue, + MIN(-some_int64) as num_neg, + MIN(some_utf8 = '') as text_not, + MIN(some_string = '') as bytes_not, + MIN(some_uint8 = 0) as num_not, + MIN(NOT some_bool) as bool_not, + + MIN(DateTime::GetWeekOfYearIso8601(some_datetime)) as datetime_yearweek, + MIN(DateTime::GetDayOfWeek(some_datetime)) as datetime_weekday, -- 1 .. 7 + MIN(DateTime::GetYear(some_datetime)) as datetime_year, + MIN(CAST((DateTime::GetMonth(some_datetime) + 2) / 3 AS INTEGER)) as datetime_quarter, + MIN(DateTime::GetMonth(some_datetime)) as datetime_month, + MIN(DateTime::GetDayOfMonth(some_datetime)) as datetime_day, + MIN(DateTime::GetHour(some_datetime)) as datetime_hour, + MIN(DateTime::GetMinute(some_datetime)) as datetime_minute, + MIN(DateTime::GetSecond(some_datetime)) as datetime_second, + + MIN(DateTime::MakeDatetime(DateTime::StartOfWeek(some_datetime))) as datetime_startofweek, + MIN(DateTime::MakeDatetime(DateTime::StartOfMonth(some_datetime))) as datetime_startofmonth, + MIN(DateTime::MakeDatetime(DateTime::StartOfQuarter(some_datetime))) as datetime_startofquarter, + MIN(DateTime::MakeDatetime(DateTime::StartOfYear(some_datetime))) as datetime_startofyear, + + MIN(DateTime::MakeDate(DateTime::StartOfWeek(some_date))) as date_startofweek, + MIN(DateTime::MakeDate(DateTime::StartOfMonth(some_date))) as date_startofmonth, + MIN(DateTime::MakeDate(DateTime::StartOfQuarter(some_date))) as date_startofquarter, + MIN(DateTime::MakeDate(DateTime::StartOfYear(some_date))) as date_startofyear, + + MIN(DateTime::MakeDate(DateTime::ShiftYears(some_date, coalesce(some_uint8, 0)))) as date_shiftyears, + MIN('[' || CAST(some_double AS UTF8) || ',' || CAST(37.622504 AS UTF8) || ']') as tst_geopoint, + MIN(CAST(CAST('2008e1c9-44a6-4fac-a61d-e42675b77309' AS UUID) AS UTF8)) as text_from_uuid, + -- SOME(CAST('2008e1c9-44a6-4fac-a61d-e42675b77309' AS UUID)) as some_uuid, + MIN(DateTime::Format('%Y-%m-%d %H:%M:%S')(some_datetime)) as text_from_datetime_proper, + MIN(CAST(some_datetime AS UTF8)) as text_from_datetime, + MIN(CAST(some_date AS UTF8)) as text_from_date, + MIN(CASE WHEN true IS NULL THEN NULL WHEN true = true THEN 'True' ELSE 'False' END) as text_from_bool, + MIN(CAST(ToBytes(Yson::SerializePretty(Yson::From(some_string))) AS UTF8)) as text_from_stuff, + MIN(CASE WHEN some_datetime IS NULL THEN NULL ELSE true END) as bool_from_datetime, + MIN(CAST(true as BIGINT)) as int_from_bool, + MIN(CAST(some_double AS UTF8)) as text_from_double, -- XXXXXXXXXX: 1579079710.35105 -> "1579079710" + DateTime::MakeDatetime(DateTime::ParseIso8601('2021-06-01 18:00:59')) as datetime_from_str, + MIN(CAST(NULL AS BOOL)) as null_boolean, + MIN(CAST(NULL AS BIGINT)) as null_bigint, + MIN(CAST(NULL AS DATETIME)) as null_datetime, + MIN(CAST(NULL AS DATE)) as null_date, + MIN(Math::Abs(-1 * some_double)) as dbl_abs, + String::JoinFromList(ListMap(TOPFREQ(some_datetime, 5), ($x) -> {{ RETURN cast($x.Value as Utf8); }}), ', ') as top_concat, + String::JoinFromList(ListSortAsc(AGGREGATE_LIST_DISTINCT(cast(some_date as Utf8))), ', ') as date_all_concat, + SOME(some_string) as bytes_some, + MEDIAN(some_int64) as int_median, + PERCENTILE(some_int64, 0.8) as int_percentile, + VARPOP(some_int64) as int_varpop, + VARSAMP(some_int64) as int_varsamp, + STDDEVPOP(some_int64) as int_stddevpop, + STDDEVSAMP(some_int64) as int_stddevsamp, + CountDistinctEstimate(some_double) as dbl_count_distinct_approx, + COUNT(DISTINCT some_double) as dbl_count_distinct, + COUNT_IF(some_double < 0) as dbl_count_if_empty, + COUNT_IF(some_double > 0) as dbl_count_if, + COUNT(1) as cnt, + MIN(some_date) as date_min, + MAX(some_datetime) as datetime_max, + CAST(CAST(AVG(CAST(some_datetime as DOUBLE)) AS BIGINT) AS DATETIME) as datetime_avg, + -- date -> dt -> bigint -> avg (-> double) -> bigint -> datetime -> date + CAST(CAST(CAST( + AVG( + CAST(CAST(some_date AS DATETIME) AS BIGINT) + ) + AS BIGINT) AS DATETIME) AS DATE) as date_avg, + AVG_IF(some_int64, some_int64 > -1) as int_avg_if, + AVG(some_int64) as int_avg, + SUM_IF(some_int64, some_int64 > 10) as int_sum_if, + SUM(some_int64) as int_sum, + +from `{table_name}` +group by id +order by id +limit 1000 +""" + +API_TEST_CONFIG = ApiTestEnvironmentConfiguration( + api_connector_ep_names=["ydb"], + core_test_config=CORE_TEST_CONFIG, + ext_query_executer_secret_key="_some_test_secret_key_", +) diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/conftest.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/conftest.py new file mode 100644 index 000000000..787557269 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/conftest.py @@ -0,0 +1,17 @@ +from dl_api_lib_testing.initialization import initialize_api_lib_test +from dl_formula_testing.forced_literal import forced_literal_use + +from dl_connector_ydb_tests.db.config import API_TEST_CONFIG + + +pytest_plugins = ("aiohttp.pytest_plugin",) # and it, in turn, includes 'pytest_asyncio.plugin' + + +def pytest_configure(config): # noqa + initialize_api_lib_test(pytest_config=config, api_test_config=API_TEST_CONFIG) + + +__all__ = ( + # auto-use fixtures: + "forced_literal_use", +) diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/base.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/base.py new file mode 100644 index 000000000..f96e81739 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/base.py @@ -0,0 +1,41 @@ +import pytest +import sqlalchemy as sa + +from dl_formula_testing.database import ( + Db, + FormulaDbDispenser, +) +from dl_formula_testing.testcases.base import FormulaConnectorTestBase + +from dl_connector_ydb.formula.constants import YqlDialect as D +from dl_connector_ydb_tests.db.config import DB_CONFIGURATIONS + + +class YqlDbDispenser(FormulaDbDispenser): + def ensure_db_is_up(self, db: Db) -> tuple[bool, str]: + # first, check that the db is up + status, msg = super().ensure_db_is_up(db) + if not status: + return status, msg + + test_table = db.table_from_columns([sa.Column(name="col1", type_=sa.Integer())]) + + # secondly, try to create a test table: for some reason + # it could be that YDB is up but you still can't do it + try: + db.create_table(test_table) + db.drop_table(test_table) + return True, "" + except Exception as exc: + return False, str(exc) + + +class YQLTestBase(FormulaConnectorTestBase): + dialect = D.YDB + supports_arrays = False + supports_uuid = True + db_dispenser = YqlDbDispenser() + + @pytest.fixture(scope="class") + def db_url(self) -> str: + return DB_CONFIGURATIONS[self.dialect] diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_conditional_blocks.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_conditional_blocks.py new file mode 100644 index 000000000..226f11ef1 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_conditional_blocks.py @@ -0,0 +1,7 @@ +from dl_formula_testing.testcases.conditional_blocks import DefaultConditionalBlockFormulaConnectorTestSuite + +from dl_connector_ydb_tests.db.formula.base import YQLTestBase + + +class TestConditionalBlockYQL(YQLTestBase, DefaultConditionalBlockFormulaConnectorTestSuite): + pass diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_aggregation.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_aggregation.py new file mode 100644 index 000000000..7a77489e2 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_aggregation.py @@ -0,0 +1,7 @@ +from dl_formula_testing.testcases.functions_aggregation import DefaultMainAggFunctionFormulaConnectorTestSuite + +from dl_connector_ydb_tests.db.formula.base import YQLTestBase + + +class TestMainAggFunctionYQL(YQLTestBase, DefaultMainAggFunctionFormulaConnectorTestSuite): + pass diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_datetime.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_datetime.py new file mode 100644 index 000000000..c820bc77f --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_datetime.py @@ -0,0 +1,7 @@ +from dl_formula_testing.testcases.functions_datetime import DefaultDateTimeFunctionFormulaConnectorTestSuite + +from dl_connector_ydb_tests.db.formula.base import YQLTestBase + + +class TestDateTimeFunctionYQL(YQLTestBase, DefaultDateTimeFunctionFormulaConnectorTestSuite): + supports_datepart_2_non_const = False diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_logical.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_logical.py new file mode 100644 index 000000000..3d7c14727 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_logical.py @@ -0,0 +1,7 @@ +from dl_formula_testing.testcases.functions_logical import DefaultLogicalFunctionFormulaConnectorTestSuite + +from dl_connector_ydb_tests.db.formula.base import YQLTestBase + + +class TestLogicalFunctionYQL(YQLTestBase, DefaultLogicalFunctionFormulaConnectorTestSuite): + supports_iif = True diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_markup.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_markup.py new file mode 100644 index 000000000..9d7708a29 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_markup.py @@ -0,0 +1,7 @@ +from dl_formula_testing.testcases.functions_markup import DefaultMarkupFunctionFormulaConnectorTestSuite + +from dl_connector_ydb_tests.db.formula.base import YQLTestBase + + +class TestMarkupFunctionYQL(YQLTestBase, DefaultMarkupFunctionFormulaConnectorTestSuite): + pass diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_math.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_math.py new file mode 100644 index 000000000..799e57200 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_math.py @@ -0,0 +1,7 @@ +from dl_formula_testing.testcases.functions_math import DefaultMathFunctionFormulaConnectorTestSuite + +from dl_connector_ydb_tests.db.formula.base import YQLTestBase + + +class TestMathFunctionYQL(YQLTestBase, DefaultMathFunctionFormulaConnectorTestSuite): + pass diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_string.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_string.py new file mode 100644 index 000000000..ceee2622e --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_string.py @@ -0,0 +1,13 @@ +from dl_formula_testing.testcases.functions_string import DefaultStringFunctionFormulaConnectorTestSuite + +from dl_connector_ydb_tests.db.formula.base import YQLTestBase + + +class TestStringFunctionYQL(YQLTestBase, DefaultStringFunctionFormulaConnectorTestSuite): + datetime_str_separator = "T" + datetime_str_ending = "Z" + supports_trimming_funcs = False + supports_regex_extract = False + supports_regex_extract_nth = False + supports_regex_replace = False + supports_regex_match = False diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_type_conversion.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_type_conversion.py new file mode 100644 index 000000000..7b7360f5e --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_functions_type_conversion.py @@ -0,0 +1,77 @@ +from dl_formula_testing.evaluator import DbEvaluator +from dl_formula_testing.testcases.functions_type_conversion import ( + DefaultBoolTypeFunctionFormulaConnectorTestSuite, + DefaultDateTypeFunctionFormulaConnectorTestSuite, + DefaultFloatTypeFunctionFormulaConnectorTestSuite, + DefaultGenericDatetimeTypeFunctionFormulaConnectorTestSuite, + DefaultGeopointTypeFunctionFormulaConnectorTestSuite, + DefaultGeopolygonTypeFunctionFormulaConnectorTestSuite, + DefaultIntTypeFunctionFormulaConnectorTestSuite, + DefaultStrTypeFunctionFormulaConnectorTestSuite, +) +from dl_formula_testing.util import to_str + +from dl_connector_ydb_tests.db.formula.base import YQLTestBase + + +# STR + + +class TestStrTypeFunctionYQL(YQLTestBase, DefaultStrTypeFunctionFormulaConnectorTestSuite): + zero_float_to_str_value = "0" + skip_custom_tz = True + + def test_str_from_datetime(self, dbe: DbEvaluator) -> None: + assert to_str(dbe.eval("STR(#2019-01-02 03:04:05#)")) == "2019-01-02T03:04:05Z" + + +# FLOAT + + +class TestFloatTypeFunctionYQL(YQLTestBase, DefaultFloatTypeFunctionFormulaConnectorTestSuite): + pass + + +# BOOL + + +class TestBoolTypeFunctionYQL(YQLTestBase, DefaultBoolTypeFunctionFormulaConnectorTestSuite): + pass + + +# INT + + +class TestIntTypeFunctionYQL(YQLTestBase, DefaultIntTypeFunctionFormulaConnectorTestSuite): + pass + + +# DATE + + +class TestDateTypeFunctionYQL(YQLTestBase, DefaultDateTypeFunctionFormulaConnectorTestSuite): + pass + + +# GENERICDATETIME (& DATETIME) + + +class TestGenericDatetimeTypeFunctionYQL( + YQLTestBase, + DefaultGenericDatetimeTypeFunctionFormulaConnectorTestSuite, +): + pass + + +# GEOPOINT + + +class TestGeopointTypeFunctionYQL(YQLTestBase, DefaultGeopointTypeFunctionFormulaConnectorTestSuite): + pass + + +# GEOPOLYGON + + +class TestGeopolygonTypeFunctionYQL(YQLTestBase, DefaultGeopolygonTypeFunctionFormulaConnectorTestSuite): + pass diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_literals.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_literals.py new file mode 100644 index 000000000..e0af98271 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_literals.py @@ -0,0 +1,9 @@ +from dl_formula_testing.testcases.literals import DefaultLiteralFormulaConnectorTestSuite + +from dl_connector_ydb_tests.db.formula.base import YQLTestBase + + +class TestConditionalBlockYQL(YQLTestBase, DefaultLiteralFormulaConnectorTestSuite): + supports_microseconds = False + supports_utc = False + supports_custom_tz = False diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_misc_funcs.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_misc_funcs.py new file mode 100644 index 000000000..9ec771371 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_misc_funcs.py @@ -0,0 +1,7 @@ +from dl_formula_testing.testcases.misc_funcs import DefaultMiscFunctionalityConnectorTestSuite + +from dl_connector_ydb_tests.db.formula.base import YQLTestBase + + +class TestMiscFunctionalityYQL(YQLTestBase, DefaultMiscFunctionalityConnectorTestSuite): + pass diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_operators.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_operators.py new file mode 100644 index 000000000..3410b710b --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/formula/test_operators.py @@ -0,0 +1,23 @@ +import sqlalchemy as sa + +from dl_formula_testing.evaluator import DbEvaluator +from dl_formula_testing.testcases.operators import DefaultOperatorFormulaConnectorTestSuite + +from dl_connector_ydb_tests.db.formula.base import YQLTestBase + + +class TestOperatorYQL(YQLTestBase, DefaultOperatorFormulaConnectorTestSuite): + subtraction_round_dt = False + supports_string_int_multiplication = False + + def test_subtraction_unsigned_ints(self, dbe: DbEvaluator) -> None: + assert dbe.eval("SECOND(#2019-01-23 15:07:47#) - SECOND(#2019-01-23 15:07:48#)") == -1 + + def test_in_date(self, dbe: DbEvaluator, data_table: sa.Table) -> None: + # YDB doesn't allow ordering by columns not from SELECT clause, so use WHERE instead + assert dbe.eval( + "[date_value] in (#2014-10-05#)", where="IF([date_value] = #2014-10-05#, TRUE, FALSE)", from_=data_table + ) + assert dbe.eval( + "[date_value] not in (#2014-10-05#)", where="IF([date_value] = #2014-10-06#, TRUE, FALSE)", from_=data_table + ) diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/unit/__init__.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/unit/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/unit/conftest.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/unit/conftest.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/unit/test_connection_form.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/unit/test_connection_form.py new file mode 100644 index 000000000..df0055f19 --- /dev/null +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/unit/test_connection_form.py @@ -0,0 +1,20 @@ +from typing import Optional + +import pytest + +from dl_api_connector.i18n.localizer import CONFIGS as DL_API_CONNECTOR_CONFIGS +from dl_api_lib_testing.connection_form_base import ConnectionFormTestBase +from dl_configs.connectors_settings import ConnectorSettingsBase + +from dl_connector_ydb.api.ydb.connection_form.form_config import YDBConnectionFormFactory +from dl_connector_ydb.api.ydb.i18n.localizer import CONFIGS as DL_CONNECTOR_YDB_CONFIGS +from dl_connector_ydb.core.ydb.settings import YDBConnectorSettings + + +class TestYDBConnectionForm(ConnectionFormTestBase): + CONN_FORM_FACTORY_CLS = YDBConnectionFormFactory + TRANSLATION_CONFIGS = DL_API_CONNECTOR_CONFIGS + DL_CONNECTOR_YDB_CONFIGS + + @pytest.fixture + def connectors_settings(self) -> Optional[ConnectorSettingsBase]: + return YDBConnectorSettings() diff --git a/lib/dl_connector_ydb/docker-compose.yml b/lib/dl_connector_ydb/docker-compose.yml new file mode 100644 index 000000000..7535877f3 --- /dev/null +++ b/lib/dl_connector_ydb/docker-compose.yml @@ -0,0 +1,39 @@ +version: '3.7' + +x-constants: + US_MASTER_TOKEN: &c-us-master-token "AC1ofiek8coB" + +services: + db-ydb: + image: "cr.yandex/yc/yandex-docker-local-ydb:latest" + environment: + YDB_LOCAL_SURVIVE_RESTART: "true" + GRPC_PORT: "51900" + # hostname: "localhost" # you might want to uncomment this for local testing + ports: + - "51900:51900" + + # INFRA + pg-us: + build: + context: ../testenv-common/images + dockerfile: Dockerfile.pg-us + environment: + POSTGRES_DB: us-db-ci_purgeable + POSTGRES_USER: us + POSTGRES_PASSWORD: us + ports: + - "51910:5432" + + us: + build: + context: ../testenv-common/images + dockerfile: Dockerfile.us + depends_on: + - pg-us + environment: + POSTGRES_DSN_LIST: "postgres://us:us@pg-us:5432/us-db-ci_purgeable" + AUTH_POLICY: "required" + MASTER_TOKEN: *c-us-master-token + ports: + - "51911:80" diff --git a/lib/dl_connector_ydb/pyproject.toml b/lib/dl_connector_ydb/pyproject.toml new file mode 100644 index 000000000..b1bff4720 --- /dev/null +++ b/lib/dl_connector_ydb/pyproject.toml @@ -0,0 +1,86 @@ + +[tool.poetry] +name = "datalens-connector-ydb" +version = "0.0.1" +description = "" +authors = ["DataLens Team "] +packages = [{include = "dl_connector_ydb"}] +license = "Apache 2.0" +readme = "README.md" + + +[tool.poetry.dependencies] +attrs = ">=22.2.0" +grpcio = ">=1.45.0rc1" +marshmallow = ">=3.19.0" +python = ">=3.10, <3.12" +sqlalchemy = ">=1.4.46, <2.0" +ydb = ">=3.5.1" +datalens-api-commons = {path = "../dl_api_commons"} +datalens-api-connector = {path = "../dl_api_connector"} +datalens-configs = {path = "../dl_configs"} +datalens-constants = {path = "../dl_constants"} +datalens-core = {path = "../dl_core"} +datalens-core-testing = {path = "../dl_core_testing"} +datalens-db-testing = {path = "../dl_db_testing"} +datalens-formula = {path = "../dl_formula"} +datalens-formula-ref = {path = "../dl_formula_ref"} +datalens-i18n = {path = "../dl_i18n"} + +[tool.poetry.plugins] +[tool.poetry.plugins."dl_api_lib.connectors"] +ydb = "dl_connector_ydb.api.ydb.connector:YDBApiConnector" + +[tool.poetry.plugins."dl_core.connectors"] +ydb = "dl_connector_ydb.core.ydb.connector:YDBCoreConnector" + +[tool.poetry.plugins."dl_db_testing.connectors"] +yql = "dl_connector_ydb.db_testing.connector:YQLDbTestingConnector" + +[tool.poetry.plugins."dl_formula.connectors"] +yql = "dl_connector_ydb.formula.connector:YQLFormulaConnector" + +[tool.poetry.plugins."dl_formula_ref.plugins"] +yql = "dl_connector_ydb.formula_ref.plugin:YQLFormulaRefPlugin" + +[tool.poetry.group.tests.dependencies] +pytest = ">=7.2.2" +datalens-formula-testing = {path = "../dl_formula_testing"} + +[build-system] +build-backend = "poetry.core.masonry.api" +requires = [ + "poetry-core", +] + +[tool.pytest.ini_options] +minversion = "6.0" +addopts = "-ra" +testpaths = [] + + + +[datalens.pytest.db] +root_dir = "dl_connector_ydb_tests/" +target_path = "db" +compose_file_base = "docker-compose" + +[datalens.pytest.unit] +root_dir = "dl_connector_ydb_tests/" +target_path = "unit" +skip_compose = "true" + +[tool.mypy] +warn_unused_configs = true +disallow_untyped_defs = true +check_untyped_defs = true +strict_optional = true + +[datalens.i18n.domains] +dl_connector_ydb = [ + {path = "dl_connector_ydb/api"}, + {path = "dl_connector_ydb/core"}, +] +dl_formula_ref_dl_connector_ydb = [ + {path = "dl_connector_ydb/formula_ref"}, +] diff --git a/metapkg/poetry.lock b/metapkg/poetry.lock index b86b70784..3a7547e6b 100644 --- a/metapkg/poetry.lock +++ b/metapkg/poetry.lock @@ -1636,6 +1636,36 @@ sqlalchemy = ">=1.4.46, <2.0" type = "directory" url = "../lib/dl_connector_snowflake" +[[package]] +name = "datalens-connector-ydb" +version = "0.0.1" +description = "" +optional = false +python-versions = ">=3.10, <3.12" +files = [] +develop = false + +[package.dependencies] +attrs = ">=22.2.0" +datalens-api-commons = {path = "../dl_api_commons"} +datalens-api-connector = {path = "../dl_api_connector"} +datalens-configs = {path = "../dl_configs"} +datalens-constants = {path = "../dl_constants"} +datalens-core = {path = "../dl_core"} +datalens-core-testing = {path = "../dl_core_testing"} +datalens-db-testing = {path = "../dl_db_testing"} +datalens-formula = {path = "../dl_formula"} +datalens-formula-ref = {path = "../dl_formula_ref"} +datalens-i18n = {path = "../dl_i18n"} +grpcio = ">=1.45.0rc1" +marshmallow = ">=3.19.0" +sqlalchemy = ">=1.4.46, <2.0" +ydb = ">=3.5.1" + +[package.source] +type = "directory" +url = "../lib/dl_connector_ydb" + [[package]] name = "datalens-constants" version = "0.0.1" @@ -6344,6 +6374,26 @@ files = [ idna = ">=2.0" multidict = ">=4.0" +[[package]] +name = "ydb" +version = "3.5.2" +description = "YDB Python SDK" +optional = false +python-versions = "*" +files = [ + {file = "ydb-3.5.2-py2.py3-none-any.whl", hash = "sha256:0ef3ae929c8267e18ce56a6a8979b3712a953fe2254e12909aa422b89637902a"}, + {file = "ydb-3.5.2.tar.gz", hash = "sha256:7e698843468a81976e5dd8b3d4324e02bf41a763a18eab2648d4401c95481c67"}, +] + +[package.dependencies] +aiohttp = "<4" +grpcio = ">=1.42.0" +packaging = "*" +protobuf = ">=3.13.0,<5.0.0" + +[package.extras] +yc = ["yandexcloud"] + [[package]] name = "zipp" version = "3.17.0" @@ -6362,4 +6412,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.10, <3.12" -content-hash = "3f5b4176287c0aa8efe9c53345031cba496ae1f93dd6d93c2ce5aef496b5240f" +content-hash = "2dc00c341f5ceda1eff986f55a7448dc6f236ff4ec02efc16a1be9991351fafd" diff --git a/metapkg/pyproject.toml b/metapkg/pyproject.toml index b3f92b982..cfd68cba8 100644 --- a/metapkg/pyproject.toml +++ b/metapkg/pyproject.toml @@ -148,6 +148,7 @@ datalens-maintenance = {path = "../lib/dl_maintenance"} datalens-connector-mssql = {path = "../lib/dl_connector_mssql"} datalens-attrs-model-mapper = {path = "../lib/dl_attrs_model_mapper"} datalens-attrs-model-mapper-doc-tools = {path = "../lib/dl_attrs_model_mapper_doc_tools"} +datalens-connector-ydb = {path = "../lib/dl_connector_ydb"} [tool.poetry.group.dev.dependencies] black = "==23.3.0"