Skip to content

Commit

Permalink
add new dsl to query elasticsearch
Browse files Browse the repository at this point in the history
Change-Id: I5a4a748c57a9fcc23d8e9681db0d7befd313bea7
  • Loading branch information
ylamgarchal committed Sep 18, 2024
1 parent 0f16dbc commit 841745f
Show file tree
Hide file tree
Showing 3 changed files with 613 additions and 0 deletions.
204 changes: 204 additions & 0 deletions dci/analytics/query_es_dsl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2023 Red Hat, Inc
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

import pyparsing as pp

_field = pp.Word(pp.alphanums + "_" + ".")
_value = pp.Word(pp.alphanums + "_" + "-" + "%" + "." + ":")
_word = pp.Word(pp.alphanums + "_" + "-" + "." + " ")
_comma = pp.Suppress(pp.Literal(","))
_lp = pp.Suppress(pp.Literal("("))
_rp = pp.Suppress(pp.Literal(")"))

_lb = pp.Suppress(pp.Literal("["))
_rb = pp.Suppress(pp.Literal("]"))

_comma_string = _comma + _word
_list = _lb + _word + pp.ZeroOrMore(_comma_string) + _rb

_comparison_operators = {"=", "!=", "<=" "<", ">=", ">"}
_comparison_operators = pp.oneOf(" ".join(_comparison_operators))
_comparison = _field + _comparison_operators + _value

_membership_operators = {"not_in"}
_membership_operators = pp.oneOf(" ".join(_membership_operators))
_membership_operation = _field + _membership_operators + pp.Group(_list)

_logical_operators = {"and", "or"}
_logical_operators = pp.oneOf(" ".join(_logical_operators))
_logical_operation = (
pp.Group(_lp + (_comparison | _membership_operation) + _rp)
+ _logical_operators
+ pp.Group(_lp + (_comparison | _membership_operation) + _rp)
| _lp + (_comparison | _membership_operation) + _rp
| (_comparison | _membership_operation)
)

query = pp.Forward()
query << (
(_lp + pp.Group(query) + _rp + pp.ZeroOrMore(_logical_operators + query))
| _logical_operation
)


def parse(q):
return query.parseString(q).asList()


def _generate_from_comparison_operators(parsed_query, handle_nested=False):
operand_1 = parsed_query[0]
operator = parsed_query[1]
operand_2 = parsed_query[2]

if operator == "=":
if handle_nested and "." in operand_1:
return {
"nested": {
"path": operand_1.split(".")[0],
"query": {"term": {operand_1: operand_2}},
}
}
return {"term": {operand_1: operand_2}}
elif operator == "not_in":
if handle_nested and "." in operand_1:
return {
"nested": {
"path": operand_1.split(".")[0],
"query": {"must_not": {"terms": {operand_1: operand_2}}},
}
}
return {"must_not": {"terms": {operand_1: operand_2}}}


def _split_on_or(parsed_query):
before_or = []
after_or = []
for i in range(len(parsed_query)):
if parsed_query[i] != "or":
before_or.append(parsed_query[i])
elif parsed_query[i] == "or":
after_or = parsed_query[i + 1 :]
break
return before_or, after_or


def _get_logical_operands(parsed_query):
operands = []
for q in parsed_query:
if q != "or" and q != "and":
operands.append(q)
return operands


def _is_nested_query(operands_1, operands_2=None):
path = None
if (
isinstance(operands_1, list)
and isinstance(operands_1[0], list)
and "." in operands_1[0][0]
):
path = operands_1[0][0].split(".")[0]
"""if path:
for o in operands_1:
if o[0].split(".")[0] != path:
return None
if operands_2:
for o in operands_2:
if o[0].split(".")[0] != path:
return None """
return path


def _generate_es_query(parsed_query, handle_nested=True):
if (
len(parsed_query) <= 3
and isinstance(parsed_query, list)
and isinstance(parsed_query[0], str)
):
return _generate_from_comparison_operators(parsed_query, handle_nested)
elif (
isinstance(parsed_query[0], list)
and len(parsed_query) == 1
and isinstance(parsed_query[0][0], str)
):
return _generate_from_comparison_operators(parsed_query[0], handle_nested)
else:
if "or" in parsed_query:
left_operands, right_operands = _split_on_or(parsed_query)
if (
isinstance(left_operands, list)
and isinstance(left_operands[0], list)
and len(left_operands) == 1
):
left_operands = left_operands[0]
if (
isinstance(right_operands, list)
and isinstance(right_operands[0], list)
and len(right_operands) == 1
):
right_operands = right_operands[0]
path = _is_nested_query(left_operands, right_operands)
if path:
return {
"nested": {
"path": path,
"query": {
"bool": {
"should": [
_generate_es_query(
left_operands, handle_nested=False
),
_generate_es_query(
right_operands, handle_nested=False
),
]
}
},
}
}
else:
return {
"bool": {
"should": [
_generate_es_query(left_operands, handle_nested=False),
_generate_es_query(right_operands, handle_nested=False),
]
}
}
else:
operands = _get_logical_operands(parsed_query)
path = _is_nested_query(operands)
if path:
return {
"nested": {
"path": path,
"query": {
"bool": {
"filter": [
_generate_es_query(o, handle_nested=False)
for o in operands
]
}
},
}
}
else:
return {"bool": {"filter": [_generate_es_query(o) for o in operands]}}


def build(query):
parsed_query = parse(query)
return {"query": _generate_es_query(parsed_query)}
46 changes: 46 additions & 0 deletions dci/api/v1/analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from requests.exceptions import ConnectionError
import uuid

from dci.analytics import query_es_dsl as qed
from dci.api.v1 import api
from dci.api.v1 import base
from dci.api.v1 import export_control
Expand Down Expand Up @@ -253,6 +254,51 @@ def tasks_jobs(user):
if user.is_not_super_admin() and user.is_not_epm():
raise dci_exc.Unauthorized()

payload = flask.request.json
query_string = payload["query"]
es_query = qed.build(query_string)
es_query["sort"] = [
{"created_at": {"order": "desc", "format": "strict_date_optional_time"}}
]

try:
res = requests.get(
"%s/analytics/jobs" % (CONFIG["ANALYTICS_URL"]),
headers={"Content-Type": "application/json"},
json=es_query,
)
res_json = res.json()

if res.status_code == 200:
res_json["generated_query"] = es_query
return flask.jsonify(res_json)
else:
logger.error("analytics error: %s" % str(res.text))
return flask.Response(
json.dumps(
{
"error": "error with backend service: %s" % str(res.text),
"generated_query": es_query,
}
),
res.status_code,
content_type="application/json",
)
except ConnectionError as e:
logger.error("analytics connection error: %s" % str(e))
return flask.Response(
json.dumps({"error": "connection error with backend service: %s" % str(e)}),
503,
content_type="application/json",
)


@api.route("/analytics/jobs2", methods=["GET", "POST"])
@decorators.login_required
def tasks_jobs2(user):
if user.is_not_super_admin() and user.is_not_epm():
raise dci_exc.Unauthorized()

payload = flask.request.json

try:
Expand Down
Loading

0 comments on commit 841745f

Please sign in to comment.