Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Different indexing for suggestion and query #53

Merged
merged 2 commits into from
Feb 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 107 additions & 2 deletions api/cueSearch/elasticSearch/elastic_search_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ class ESIndexingUtils:
)
GLOBAL_DIMENSIONS_INDEX_NAME = "global_dimensions_name_index_cuesearch"
GLOBAL_DIMENSIONS_INDEX_DATA = "cuesearch_global_dimensions_data_index"
AUTO_GLOBAL_DIMENSIONS_INDEX_DATA = (
AUTO_GLOBAL_DIMENSIONS_INDEX_DATA = "cuesearch_auto_global_dimensions_data_index"
AUTO_GLOBAL_DIMENSIONS_INDEX_DATA_SEARCH_SUGGESTION = (
"cuesearch_auto_global_dimensions_search_suggestion_data_index"
)
GLOBAL_DIMENSIONS_INDEX_SEARCH_SUGGESTION_DATA = (
Expand Down Expand Up @@ -168,6 +169,10 @@ def runAllIndexDimension():
target=ESIndexingUtils.indexNonGlobalDimensionsDataForSearchSuggestion
)
cardIndexer3.start()
cardIndexer4 = threading.Thread(
target=ESIndexingUtils.indexNonGlobalDimensionsData()
)
cardIndexer4.start()
logging.info("Indexing completed !! ")

@staticmethod
Expand Down Expand Up @@ -521,7 +526,9 @@ def indexNonGlobalDimensionsDataForSearchSuggestion(joblogger=None):
},
}

indexName = ESIndexingUtils.AUTO_GLOBAL_DIMENSIONS_INDEX_DATA
indexName = (
ESIndexingUtils.AUTO_GLOBAL_DIMENSIONS_INDEX_DATA_SEARCH_SUGGESTION
)

aliasIndex = ESIndexingUtils.initializeIndex(indexName, indexDefinition)
logging.info("IndexName %s", indexName)
Expand Down Expand Up @@ -605,3 +612,101 @@ def fetchNonGlobalDimensionsValueForIndexing(datasetDimensions: list):
len(indexingDocuments),
)
return indexingDocuments

@staticmethod
def indexNonGlobalDimensionsData(joblogger=None):
"""
Method to index Non global dimensions data
"""
from cueSearch.services import GlobalDimensionServices

logging.info(
"*************************** Indexing Starts of Non Global Dimension Data **************************"
)

response = GlobalDimensionServices.nonGlobalDimensionForIndexing()
if response["success"]:
datsetDimensions = response.get("data", [])
logging.debug("Dataset dimensions: %s", datsetDimensions)

indexDefinition = {
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "my_tokenizer",
"filter": ["lowercase"],
}
},
"default_search": {"type": "my_analyzer"},
"tokenizer": {
"my_tokenizer": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 10,
"token_chars": ["letter", "digit"],
}
},
}
},
"mappings": {
"properties": {
"globalDimensionId": {"type": "text"},
"globalDimensionDisplayValue": {"type": "keyword"},
"globalDimensionValue": {
"type": "text",
"search_analyzer": "my_analyzer",
"analyzer": "my_analyzer",
"fields": {
"ngram": {"type": "text", "analyzer": "my_analyzer"}
},
},
"globalDimensionName": {
"type": "text",
"search_analyzer": "my_analyzer",
"analyzer": "my_analyzer",
"fields": {
"ngram": {"type": "text", "analyzer": "my_analyzer"}
},
},
"dimension": {
"type": "text",
"search_analyzer": "my_analyzer",
"analyzer": "my_analyzer",
"fields": {
"ngram": {"type": "text", "analyzer": "my_analyzer"}
},
},
"dataset": {"type": "text"},
"datasetId": {"type": "integer"},
}
},
}

indexName = ESIndexingUtils.AUTO_GLOBAL_DIMENSIONS_INDEX_DATA

aliasIndex = ESIndexingUtils.initializeIndex(indexName, indexDefinition)
logging.info("IndexName %s", indexName)
logging.info("aliasIndex %s", aliasIndex)
# datsetDimensions is an array
try:
documentsToIndex = (
ESIndexingUtils.fetchNonGlobalDimensionsValueForIndexing(
datsetDimensions
)
)

ESIndexingUtils.ingestIndex(documentsToIndex, aliasIndex)
except (Exception) as error:
logging.error(str(error))

pass

ESIndexingUtils.deleteOldIndex(indexName, aliasIndex)
logging.info(
"*************************** Indexing Completed of Non Dimensional Data **************************"
)

else:
logging.error("Error in fetching global dimensions.")
raise RuntimeError("Error in fetching global dimensions")
9 changes: 5 additions & 4 deletions api/cueSearch/elasticSearch/elastic_search_querying.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@ class ESQueryingUtils:
"cuesearch_global_dimensions_names_for_search_index"
)
GLOBAL_DIMENSIONS_INDEX_DATA = "cuesearch_global_dimensions_data_index"
AUTO_GLOBAL_DIMENSIONS_INDEX_DATA = (
AUTO_GLOBAL_DIMENSIONS_INDEX_DATA_SEARCH_SUGGESTION = (
"cuesearch_auto_global_dimensions_search_suggestion_data_index"
)
AUTO_GLOBAL_DIMENSIONS_INDEX_DATA = "cuesearch_auto_global_dimensions_data_index"
DATASET_MEASURES_INDEX_NAME = "dataset_measures_index_cuesearch"

@staticmethod
Expand Down Expand Up @@ -123,7 +124,7 @@ def findNonGlobalDimensionResults(

logging.info("Querying global dimensions for: %s", query)

query = "" if query is None else query.lower()
query = "" if query is None else query
client = ESQueryingUtils._getESClient()

searchQuery = Search(
Expand All @@ -138,7 +139,7 @@ def findNonGlobalDimensionResults(
)

if query:
searchQuery = searchQuery.query("match", globalDimensionDisplayValue=query)
searchQuery = searchQuery.query("term", globalDimensionDisplayValue=query)
else:
searchQuery = searchQuery.query("match_all")

Expand Down Expand Up @@ -257,7 +258,7 @@ def findNonGlobalDimensionResultsForSearchSuggestion(
client = ESQueryingUtils._getESClient()

searchQuery = Search(
index=ESQueryingUtils.AUTO_GLOBAL_DIMENSIONS_INDEX_DATA
index=ESQueryingUtils.AUTO_GLOBAL_DIMENSIONS_INDEX_DATA_SEARCH_SUGGESTION
).using(client)

if globalDimension:
Expand Down
23 changes: 18 additions & 5 deletions api/cueSearch/services/searchCardTemplate.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,10 @@ def ElasticSearchQueryResultsForOnSearchQuery(
searchPayload: dict,
) -> List[SearchResults]:
"""
Praveen write it's documentation
Responsible for query on elastic search, based on searchPayload
:searchPayload: [{"searchType":"GLOBALDIMENSION", "label":str, "value": str},{"searchType":"DATASETDIMENSION", "label":str, "value": str,datasetId:int}]
:returns : [{'value': str, 'dimension':str, 'globalDimensionName': str, 'user_entity_identifier': str, 'id': str, 'dataset': str, 'datasetId': int, 'type': 'DATASETDIMENSION/GLOBALDIMENSION'}]

"""
searchResults = []
for payload in searchPayload:
Expand Down Expand Up @@ -257,12 +260,22 @@ def getSearchCardData(params: dict):
res = ApiResponse("Error in fetching data")
try:
finalData = {"data": None, "chartMetaData": None}
data = Datasets.getDatasetData(params).data
chartMetaData = getChartMetaData(params, data)
finaldata = {"data": data, "chartMetaData": chartMetaData}
res.update(True, "Successfully fetched data", finaldata)
data = []
# data = Datasets.getDatasetData(params).data
res = Datasets.getDatasetData(params)
if res.success:
data = res.data
chartMetaData = getChartMetaData(params, data)
finaldata = {"data": data, "chartMetaData": chartMetaData}
res.update(True, "Successfully fetched data", finaldata)

else:
data = res.data
finaldata = {"data": data, "chartMetaData": None}
res.update(False, "Error occured while fetching data", data)
except Exception as ex:
logging.error("Error in fetching data :%s", str(ex))
res.update(False, "Error occured in get chart meta data", data)
return res

@staticmethod
Expand Down
2 changes: 2 additions & 0 deletions api/cueSearch/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ def indexingJob():
ESIndexingUtils.indexGlobalDimensionsDataForSearchSuggestion() # Used for search suggestion
ESIndexingUtils.indexNonGlobalDimensionsDataForSearchSuggestion() # Used for index auto global dimension
ESIndexingUtils.indexGlobalDimensionsData()
ESIndexingUtils.indexNonGlobalDimensionsData()

logging.info(
"*********************** Indexing Job completed ! ********************** "
)
Expand Down
122 changes: 119 additions & 3 deletions api/cueSearch/tests/test_elasticSearch_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ def testNonGlobalDimensionDataIndex(client, mocker):


@pytest.mark.django_db(transaction=True)
def testFindNonGlobalDimensionResult(client, mocker):
def testFindNonGlobalDimensionSuggestionResult(client, mocker):
"""Method to test non global dimension index"""
ESIndexingUtils.deleteAllIndex()

Expand Down Expand Up @@ -436,11 +436,16 @@ def testFindNonGlobalDimensionResult(client, mocker):
},
]
query = "TestData"
result = ESQueryingUtils.findNonGlobalDimensionResults(query=query)
result = ESQueryingUtils.findNonGlobalDimensionResultsForSearchSuggestion(
query=query
)
# breakpoint()
count = 0
while not result:
count += 1
result = ESQueryingUtils.findNonGlobalDimensionResults(query=query)
result = ESQueryingUtils.findNonGlobalDimensionResultsForSearchSuggestion(
query=query
)

if count == 10:
result = []
Expand All @@ -450,6 +455,117 @@ def testFindNonGlobalDimensionResult(client, mocker):
################################ Delete all indexes ##############


# @pytest.mark.django_db(transaction=True)
# def testFindNonGlobalDimensionResultData(client, mocker):
# """Method to test non global dimension index"""
# ESIndexingUtils.deleteAllIndex()

# connection = mixer.blend("dataset.connection")
# testDataset = mixer.blend(
# "dataset.dataset",
# name="orders",
# id=1,
# dimensions='["Brand", "Color", "State"]',
# metrics='["Orders", "OrderAmount", "OrderQuantity"]',
# granularity="day",
# timestampColumn="TestDate",
# sql="Select * from testTable",
# )
# mockResponse = mocker.patch(
# "cueSearch.elasticSearch.elastic_search_indexing.ESIndexingUtils.runAllIndexDimension",
# new=mock.MagicMock(autospec=True, return_value=True),
# )
# mockResponse.start()
# path = reverse("createDataset")
# data = {
# "name": "demo_dataset",
# "sql": "SELECT * from TEST_TABLE",
# "connectionId": connection.id,
# "metrics": ["Amount", "Quantity"],
# "dimensions": ["Category", "Region"],
# "timestamp": "CreatedAt",
# "granularity": "day",
# "isNonRollup": False,
# }
# response = client.post(path, data=data, content_type="application/json")

# # create dimension for testing
# dataset = Dataset.objects.get(id=1)
# mockResponse.start()
# path = reverse("globalDimensionCreate")
# gd_data = {
# "name": "test",
# "dimensionalValues": [
# {
# "datasetId": dataset.id,
# "dataset": "Returns",
# "dimension": "WarehouseCode",
# }
# ],
# }
# response = client.post(path, gd_data, content_type="application/json")
# assert response.data["success"] == True
# assert response.status_code == 200

# globalDimsId = GlobalDimensionServices.getGlobalDimensions()
# globalDimensionId = globalDimsId.data[0]["values"][0]["id"]

# # Publishing global dimension by id
# path = reverse("pubGlobalDimension")
# payload = {"id": globalDimensionId, "published": True}
# response = client.post(path, payload)
# mockResponse.stop()
# listToIndex = [
# {"dataset": "Test data", "datasetId": 1, "dimension": "Brand"},
# {"dataset": "Test data", "datasetId": 1, "dimension": "WarehouseCode"},
# ]
# res = {"success": True, "data": listToIndex}

# mockResponse = mocker.patch(
# "cueSearch.services.globalDimension.GlobalDimensionServices.nonGlobalDimensionForIndexing",
# new=mock.MagicMock(autospec=True, return_value=res),
# )
# mockResponse.start()
# ESIndexingUtils.indexNonGlobalDimensionsData()
# mockResponse.stop()

# expectedResults = [
# {
# "value": "TestData",
# "dimension": "Brand",
# "globalDimensionName": "Test data_Brand",
# "user_entity_identifier": "Test data_Brand",
# "id": "Brand_TestData_1",
# "dataset": "Test data",
# "datasetId": testDataset.id,
# "type": "DATASETDIMENSION",
# },
# {
# "value": "TestData",
# "dimension": "WarehouseCode",
# "globalDimensionName": "Test data_WarehouseCode",
# "user_entity_identifier": "Test data_WarehouseCode",
# "id": "WarehouseCode_TestData_1",
# "dataset": "Test data",
# "datasetId": testDataset.id,
# "type": "DATASETDIMENSION",
# },
# ]
# query = "TestData"
# result = ESQueryingUtils.findNonGlobalDimensionResults(query=query)
# count = 0
# while not result:
# count += 1
# result = ESQueryingUtils.findNonGlobalDimensionResults(query=query)

# if count == 10:
# result = []
# return result

# assert result == expectedResults
# ################################ Delete all indexes ##############


def testRunAllIndexing(client, mocker):

mockResponse = mocker.patch(
Expand Down
1 change: 1 addition & 0 deletions api/cueSearch/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def elasticSearchIndexingView(request: HttpRequest) -> Response:
ESIndexingUtils.indexGlobalDimensionsDataForSearchSuggestion() # Used for search suggestion
ESIndexingUtils.indexNonGlobalDimensionsDataForSearchSuggestion() # Used for index auto global dimension
ESIndexingUtils.indexGlobalDimensionsData()
ESIndexingUtils.indexNonGlobalDimensionsData()
logging.info("************** Indexing Completed !****************")
res.update(True, "Indexing completed !", [])
return Response(res.json())
Expand Down
16 changes: 12 additions & 4 deletions api/dataset/services/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,17 @@ def getDatasetData(params: dict):
:param params: Dict containing dataset name, and dataset dimension
"""
res = ApiResponse("Error in fetching data")
dataset = Dataset.objects.get(id=params["datasetId"])
dataDf = Data.fetchDatasetDataframe(dataset, params["sql"])
data = dataDf.to_dict("records")
try:
dataset = Dataset.objects.get(id=params["datasetId"])
# params["sql"] = params["sql"] + "some went wrong"
dataDf = Data.fetchDatasetDataframe(dataset, params["sql"])
if isinstance(dataDf, str):
res.update(False, "Error occured while fetching data", dataDf)
else:
data = dataDf.to_dict("records")
res.update(True, "Successfully fetched data", data)

res.update(True, "Successfully fetched data", data)
except Exception as ex:
logging.error("Error occured due to %s", str(ex))
# data = str(ex)
return res
Loading