From 841745f944fa54c06118c7895598237aebae2bc3 Mon Sep 17 00:00:00 2001 From: ylamgarchal Date: Mon, 1 Jul 2024 10:42:56 +0200 Subject: [PATCH] add new dsl to query elasticsearch Change-Id: I5a4a748c57a9fcc23d8e9681db0d7befd313bea7 --- dci/analytics/query_es_dsl.py | 204 +++++++++++++++ dci/api/v1/analytics.py | 46 ++++ tests/analytics/test_query_es_dsl.py | 363 +++++++++++++++++++++++++++ 3 files changed, 613 insertions(+) create mode 100644 dci/analytics/query_es_dsl.py create mode 100644 tests/analytics/test_query_es_dsl.py diff --git a/dci/analytics/query_es_dsl.py b/dci/analytics/query_es_dsl.py new file mode 100644 index 000000000..13d306f6e --- /dev/null +++ b/dci/analytics/query_es_dsl.py @@ -0,0 +1,204 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2023 Red Hat, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import pyparsing as pp + +_field = pp.Word(pp.alphanums + "_" + ".") +_value = pp.Word(pp.alphanums + "_" + "-" + "%" + "." + ":") +_word = pp.Word(pp.alphanums + "_" + "-" + "." + " ") +_comma = pp.Suppress(pp.Literal(",")) +_lp = pp.Suppress(pp.Literal("(")) +_rp = pp.Suppress(pp.Literal(")")) + +_lb = pp.Suppress(pp.Literal("[")) +_rb = pp.Suppress(pp.Literal("]")) + +_comma_string = _comma + _word +_list = _lb + _word + pp.ZeroOrMore(_comma_string) + _rb + +_comparison_operators = {"=", "!=", "<=" "<", ">=", ">"} +_comparison_operators = pp.oneOf(" ".join(_comparison_operators)) +_comparison = _field + _comparison_operators + _value + +_membership_operators = {"not_in"} +_membership_operators = pp.oneOf(" ".join(_membership_operators)) +_membership_operation = _field + _membership_operators + pp.Group(_list) + +_logical_operators = {"and", "or"} +_logical_operators = pp.oneOf(" ".join(_logical_operators)) +_logical_operation = ( + pp.Group(_lp + (_comparison | _membership_operation) + _rp) + + _logical_operators + + pp.Group(_lp + (_comparison | _membership_operation) + _rp) + | _lp + (_comparison | _membership_operation) + _rp + | (_comparison | _membership_operation) +) + +query = pp.Forward() +query << ( + (_lp + pp.Group(query) + _rp + pp.ZeroOrMore(_logical_operators + query)) + | _logical_operation +) + + +def parse(q): + return query.parseString(q).asList() + + +def _generate_from_comparison_operators(parsed_query, handle_nested=False): + operand_1 = parsed_query[0] + operator = parsed_query[1] + operand_2 = parsed_query[2] + + if operator == "=": + if handle_nested and "." in operand_1: + return { + "nested": { + "path": operand_1.split(".")[0], + "query": {"term": {operand_1: operand_2}}, + } + } + return {"term": {operand_1: operand_2}} + elif operator == "not_in": + if handle_nested and "." in operand_1: + return { + "nested": { + "path": operand_1.split(".")[0], + "query": {"must_not": {"terms": {operand_1: operand_2}}}, + } + } + return {"must_not": {"terms": {operand_1: operand_2}}} + + +def _split_on_or(parsed_query): + before_or = [] + after_or = [] + for i in range(len(parsed_query)): + if parsed_query[i] != "or": + before_or.append(parsed_query[i]) + elif parsed_query[i] == "or": + after_or = parsed_query[i + 1 :] + break + return before_or, after_or + + +def _get_logical_operands(parsed_query): + operands = [] + for q in parsed_query: + if q != "or" and q != "and": + operands.append(q) + return operands + + +def _is_nested_query(operands_1, operands_2=None): + path = None + if ( + isinstance(operands_1, list) + and isinstance(operands_1[0], list) + and "." in operands_1[0][0] + ): + path = operands_1[0][0].split(".")[0] + """if path: + for o in operands_1: + if o[0].split(".")[0] != path: + return None + if operands_2: + for o in operands_2: + if o[0].split(".")[0] != path: + return None """ + return path + + +def _generate_es_query(parsed_query, handle_nested=True): + if ( + len(parsed_query) <= 3 + and isinstance(parsed_query, list) + and isinstance(parsed_query[0], str) + ): + return _generate_from_comparison_operators(parsed_query, handle_nested) + elif ( + isinstance(parsed_query[0], list) + and len(parsed_query) == 1 + and isinstance(parsed_query[0][0], str) + ): + return _generate_from_comparison_operators(parsed_query[0], handle_nested) + else: + if "or" in parsed_query: + left_operands, right_operands = _split_on_or(parsed_query) + if ( + isinstance(left_operands, list) + and isinstance(left_operands[0], list) + and len(left_operands) == 1 + ): + left_operands = left_operands[0] + if ( + isinstance(right_operands, list) + and isinstance(right_operands[0], list) + and len(right_operands) == 1 + ): + right_operands = right_operands[0] + path = _is_nested_query(left_operands, right_operands) + if path: + return { + "nested": { + "path": path, + "query": { + "bool": { + "should": [ + _generate_es_query( + left_operands, handle_nested=False + ), + _generate_es_query( + right_operands, handle_nested=False + ), + ] + } + }, + } + } + else: + return { + "bool": { + "should": [ + _generate_es_query(left_operands, handle_nested=False), + _generate_es_query(right_operands, handle_nested=False), + ] + } + } + else: + operands = _get_logical_operands(parsed_query) + path = _is_nested_query(operands) + if path: + return { + "nested": { + "path": path, + "query": { + "bool": { + "filter": [ + _generate_es_query(o, handle_nested=False) + for o in operands + ] + } + }, + } + } + else: + return {"bool": {"filter": [_generate_es_query(o) for o in operands]}} + + +def build(query): + parsed_query = parse(query) + return {"query": _generate_es_query(parsed_query)} diff --git a/dci/api/v1/analytics.py b/dci/api/v1/analytics.py index f8cd68dd7..eda8a7a6f 100644 --- a/dci/api/v1/analytics.py +++ b/dci/api/v1/analytics.py @@ -21,6 +21,7 @@ from requests.exceptions import ConnectionError import uuid +from dci.analytics import query_es_dsl as qed from dci.api.v1 import api from dci.api.v1 import base from dci.api.v1 import export_control @@ -253,6 +254,51 @@ def tasks_jobs(user): if user.is_not_super_admin() and user.is_not_epm(): raise dci_exc.Unauthorized() + payload = flask.request.json + query_string = payload["query"] + es_query = qed.build(query_string) + es_query["sort"] = [ + {"created_at": {"order": "desc", "format": "strict_date_optional_time"}} + ] + + try: + res = requests.get( + "%s/analytics/jobs" % (CONFIG["ANALYTICS_URL"]), + headers={"Content-Type": "application/json"}, + json=es_query, + ) + res_json = res.json() + + if res.status_code == 200: + res_json["generated_query"] = es_query + return flask.jsonify(res_json) + else: + logger.error("analytics error: %s" % str(res.text)) + return flask.Response( + json.dumps( + { + "error": "error with backend service: %s" % str(res.text), + "generated_query": es_query, + } + ), + res.status_code, + content_type="application/json", + ) + except ConnectionError as e: + logger.error("analytics connection error: %s" % str(e)) + return flask.Response( + json.dumps({"error": "connection error with backend service: %s" % str(e)}), + 503, + content_type="application/json", + ) + + +@api.route("/analytics/jobs2", methods=["GET", "POST"]) +@decorators.login_required +def tasks_jobs2(user): + if user.is_not_super_admin() and user.is_not_epm(): + raise dci_exc.Unauthorized() + payload = flask.request.json try: diff --git a/tests/analytics/test_query_es_dsl.py b/tests/analytics/test_query_es_dsl.py new file mode 100644 index 000000000..37c4e85e3 --- /dev/null +++ b/tests/analytics/test_query_es_dsl.py @@ -0,0 +1,363 @@ +# -*- encoding: utf-8 -*- +# +# Copyright (C) 2024 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from dci.analytics import query_es_dsl as qed + +import pyparsing as pp +import pytest + + +def test_parse_query_invalid(): + with pytest.raises(pp.ParseException): + qed.parse("toto") + + +def test_parse_query_valid(): + ret = qed.parse("f1=v1") + assert ret == ["f1", "=", "v1"] + + ret = qed.parse("(f1=v1)") + assert ret == [["f1", "=", "v1"]] + + ret = qed.parse("(f1=v1) and (f2=v2)") + assert ret == [["f1", "=", "v1"], "and", ["f2", "=", "v2"]] + + ret = qed.parse("((f1=v1) and (f2=v2)) or (f3=v3)") + assert ret == [ + [["f1", "=", "v1"], "and", ["f2", "=", "v2"]], + "or", + ["f3", "=", "v3"], + ] + + ret = qed.parse("((f1=v1) and (f2=v2)) or ((f3=v3) and (f4=v4))") + assert ret == [ + [["f1", "=", "v1"], "and", ["f2", "=", "v2"]], + "or", + [["f3", "=", "v3"], "and", ["f4", "=", "v4"]], + ] + + ret = qed.parse("((f1=v1) and ((f2=v2) or (f2=v22))) or ((f3=v3) and (f4=v4))") + assert ret == [ + [["f1", "=", "v1"], "and", [["f2", "=", "v2"], "or", ["f2", "=", "v22"]]], + "or", + [["f3", "=", "v3"], "and", ["f4", "=", "v4"]], + ] + + ret = qed.parse( + "((f1=v1) and ((f2=v2) or (f2=v22))) or ((f3=v3) and ((f4=v4) or (f4=v44)))" + ) + assert ret == [ + [["f1", "=", "v1"], "and", [["f2", "=", "v2"], "or", ["f2", "=", "v22"]]], + "or", + [["f3", "=", "v3"], "and", [["f4", "=", "v4"], "or", ["f4", "=", "v44"]]], + ] + + ret = qed.parse("(f1=v1) and (name not_in [lol, kikoolol, lolipop])") + assert ret == [ + ["f1", "=", "v1"], + "and", + ["name", "not_in", ["lol", "kikoolol", "lolipop"]], + ] + + """ + ret = qed.parse("(f1=v1) and (f2=v2) and (f3=v3) and (f4=v4)") + assert ret == [ + ["f1", "=", "v1"], + "and", + ["f2", "=", "v2"], + "and", + ["f3", "=", "v3"], + "and", + ["f4", "=", "v4"], + ]""" + + +def test_build(): + ret = qed.build("f1=v1") + assert ret == {"query": {"term": {"f1": "v1"}}} + + ret = qed.build("(f1=v1)") + assert ret == {"query": {"term": {"f1": "v1"}}} + + ret = qed.build("(f1=v1) and (f2=v2)") + assert ret == { + "query": {"bool": {"filter": [{"term": {"f1": "v1"}}, {"term": {"f2": "v2"}}]}} + } + + ret = qed.build("((f1=v1) and (f2=v2)) or (f3=v3)") + assert ret == { + "query": { + "bool": { + "should": [ + { + "bool": { + "filter": [{"term": {"f1": "v1"}}, {"term": {"f2": "v2"}}] + } + }, + {"term": {"f3": "v3"}}, + ] + } + } + } + + ret = qed.build("((f1=v1) and (f2=v2)) or ((f3=v3) and (f4=v4))") + assert ret == { + "query": { + "bool": { + "should": [ + { + "bool": { + "filter": [{"term": {"f1": "v1"}}, {"term": {"f2": "v2"}}] + } + }, + { + "bool": { + "filter": [{"term": {"f3": "v3"}}, {"term": {"f4": "v4"}}] + } + }, + ] + } + } + } + + ret = qed.build("((f1=v1) and ((f2=v2) or (f2=v22))) or ((f3=v3) and (f4=v4))") + assert ret == { + "query": { + "bool": { + "should": [ + { + "bool": { + "filter": [ + {"term": {"f1": "v1"}}, + { + "bool": { + "should": [ + {"term": {"f2": "v2"}}, + {"term": {"f2": "v22"}}, + ] + } + }, + ] + } + }, + { + "bool": { + "filter": [{"term": {"f3": "v3"}}, {"term": {"f4": "v4"}}] + } + }, + ] + } + } + } + + ret = qed.build( + "((f1=v1) and ((f2=v2) or (f2=v22))) or ((f3=v3) and ((f4=v4) or (f4=v44)))" + ) + assert ret == { + "query": { + "bool": { + "should": [ + { + "bool": { + "filter": [ + {"term": {"f1": "v1"}}, + { + "bool": { + "should": [ + {"term": {"f2": "v2"}}, + {"term": {"f2": "v22"}}, + ] + } + }, + ] + } + }, + { + "bool": { + "filter": [ + {"term": {"f3": "v3"}}, + { + "bool": { + "should": [ + {"term": {"f4": "v4"}}, + {"term": {"f4": "v44"}}, + ] + } + }, + ] + } + }, + ] + } + } + } + + ret = qed.build( + "(name=vcp) and (((components.type=ocp) and (components.version=4.14.27)) and ((components.type=aspenmesh) and (components.version=1.18.7-am1)))" + ) + assert ret == { + "query": { + "bool": { + "filter": [ + {"term": {"name": "vcp"}}, + { + "bool": { + "filter": [ + { + "nested": { + "path": "components", + "query": { + "bool": { + "filter": [ + { + "term": { + "components.type": "ocp" + } + }, + { + "term": { + "components.version": "4.14.27" + } + }, + ] + } + }, + } + }, + { + "nested": { + "path": "components", + "query": { + "bool": { + "filter": [ + { + "term": { + "components.type": "aspenmesh" + } + }, + { + "term": { + "components.version": "1.18.7-am1" + } + }, + ] + } + }, + } + }, + ] + } + }, + ] + } + } + } + + """ ret = qed.build( + "(components.type=cnf-certification-test) and (team.name not_in [telcoci, RedHat])" + ) + assert ret == { + "query": { + "bool": { + "filter": [ + { + "nested": { + "path": "components", + "query": { + "term": {"components.type": "cnf-certification-test"} + }, + } + }, + { + "nested": { + "path": "team", + "query": { + "must_not": { + "terms": {"team.name": ["telcoci", "RedHat"]} + } + }, + } + }, + ] + } + } + } + """ + + +def test_query_1(): + ret = qed.build( + "(components.type=cnf-certification-test) and (components.name not_in [telcoci, Red Hat])" + ) + assert ret == { + "query": { + "nested": { + "path": "components", + "query": { + "bool": { + "filter": [ + {"term": {"components.type": "cnf-certification-test"}}, + { + "must_not": { + "terms": { + "components.name": ["telcoci", "Red " "Hat"] + } + } + }, + ] + } + }, + } + } + } + + +def test_query_2(): + + ret = qed.build("components.type=cpt_type") + assert ret == { + "query": { + "nested": { + "path": "components", + "query": {"term": {"components.type": "cpt_type"}}, + } + } + } + + +def not_yet_test_query_3(): + ret = qed.build("created_at>2024-06-01 and created_at<2024-06-30") + assert ret == { + "query": { + "range": { + "created_at": { + "gte": "2024-06-01T00:00:00", + "lte": "2024-06-30T23:59:59", + "format": "strict_date_optional_time", + } + } + } + } + + +def not_yet_test_query_4(): + """ + { + "size": 50, + "_source": ["created_at","team.name","remoteci.name","pipeline.name","name"], + "query": {} + } + """