Skip to content

feat: Basic substring search within settings API based on a cache #3909

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
120 changes: 120 additions & 0 deletions devel/alt_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
"""Alternative implementation of the search function."""

from collections import deque
import gzip
import pickle
from pprint import pprint
import re
import time

import psutil

from ansys.fluent.core.generated.solver.settings_252 import root
from ansys.fluent.core.solver.flobject import NamedObject


def get_name_components(name: str):
"""
Given a name like 'abc_def' returns ['abc', 'def']
"""
return name.split("_")


PathCache = {} # caching name -> paths
NameCache = {} # caching name_component -> names


def build_cache(root_cls):
"""
Build a trie from the settings module
"""
print(f"Memory usage before building cache: {get_memory_usage():.2f} MB")
start_time = time.time()

# A depth-first algorithm is chosen for the following reasons:
# 1. Show the search results in a depth-first order of the settings API.
# 2. Can support a `depth` parameter in the search function to limit the depth of the search.
queue_order = 0
queue = deque([("", root_cls, "<solver_session>.settings", queue_order)])

while queue:
current_name, current_cls, current_path, rank = queue.popleft()
PathCache.setdefault(current_name, []).append((current_path, rank))
NameCache.setdefault(current_name, set()).add(current_name)
for name_component in get_name_components(current_name):
NameCache.setdefault(name_component, set()).add(current_name)

if not hasattr(current_cls, "_child_classes"):
continue

for k, v in current_cls._child_classes.items():
if not issubclass(v, NamedObject):
next_cls = v
next_path = f"{current_path}.{k}"
else:
next_cls = getattr(v, "child_object_type")
next_path = f'{current_path}.{k}["_name_"]'
# with open("alt_search.log", "a") as f:
# f.write(f"{next_path}\n")
queue_order += 1
queue.append((k, next_cls, next_path, queue_order))

print(f"Cache built in {time.time() - start_time:.2f} seconds")
print(f"Memory usage after building cache: {get_memory_usage():.2f} MB")


def search(
search_string: str,
wildcard: bool | None = False,
match_whole_word: bool = False,
):
"""
Basic string-based search
"""
if not PathCache:
build_cache(root)
if match_whole_word:
names = NameCache.get(search_string, set())
results = [item for name in names for item in PathCache[name]]
elif wildcard:
r = re.compile(search_string)
results = [item for k, v in PathCache.items() if r.match(k) for item in v]
else:
results = [
item for k, v in PathCache.items() if search_string in k for item in v
]
results.sort(key=lambda x: x[1])
return [x[0] for x in results]


def get_memory_usage():
"""
Print the memory usage of the current process.
"""
process = psutil.Process()
memory_info = process.memory_info()
return memory_info.rss / (1024 * 1024) # Convert bytes to MB


def save_compressed_cache():
"""
Save the cache to a compressed file.
"""
with gzip.open("search_cache.pkl.gz", "wb") as f:
pickle.dump((PathCache, NameCache), f)


if __name__ == "__main__":
# Example usage
pprint(search("viscous", match_whole_word=True))
pprint(len(search("viscous", match_whole_word=True)))
pprint(search("read_case", match_whole_word=True))
pprint(len(search("read_case", match_whole_word=True)))
pprint(search("viscous"))
pprint(len(search("viscous")))
pprint(search("viscous*", wildcard=True))
pprint(len(search("viscous*", wildcard=True)))
save_compressed_cache()
with open("alt_search.log", "w") as f:
pprint(PathCache, stream=f)
pprint(NameCache, stream=f)
139 changes: 139 additions & 0 deletions devel/alt_search_trie.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""Alternative implementation of the search function."""

from collections import deque
from pprint import pprint

import psutil

from ansys.fluent.core.generated.solver.settings_252 import root
from ansys.fluent.core.solver.flobject import NamedObject


class TrieNode:
"""
A node in the Trie data structure.
"""

def __init__(self):
self.children = {}
self.results = []


class Trie:
"""
A Trie (prefix tree) data structure for storing and searching search results."
"""

def __init__(self):
self._root = TrieNode()

def insert(self, word, result):
"""
Inserts a word into the Trie and associates it with the given results.
"""
node = self._root
for char in word:
if char not in node.children:
node.children[char] = TrieNode()
node = node.children[char]
node.results.append(result)

def search(self, prefix):
"""
Searches all results in the Trie for the given word and collects results from all child nodes.
"""

def collect_results(node):
results = list(node.results)
for child in node.children.values():
results.extend(collect_results(child))
return results

node = self._root
for char in prefix:
if char not in node.children:
return []
node = node.children[char]

return collect_results(node)


def get_name_components(name: str):
"""
Given a name like 'abc_def' returns ['abc', 'def']
"""
return name.split("_")


def get_all_ending_substrings(name_component: str):
"""
Given a name component like 'abc' returns all ending substrings of length > 1: ['abc', 'bc']
"""
return [
name_component[i:]
for i in range(len(name_component))
if len(name_component[i:]) > 1
]


def build_trie(root_cls):
"""
Build a trie from the settings module
"""
print(f"Memory usage before building trie: {get_memory_usage():.2f} MB")

# A depth-first algorithm is chosen for the following reasons:
# 1. Show the search results in a depth-first order of the settings API.
# 2. Can support a `depth` parameter in the search function to limit the depth of the search.
queue = deque([("", root_cls, "<solver_session>.settings")])

while queue:
current_name, current_cls, current_path = queue.popleft()
for component in get_name_components(current_name):
for substring in get_all_ending_substrings(component):
SettingsTrie.insert(substring, current_path)

if not hasattr(current_cls, "_child_classes"):
continue

for k, v in current_cls._child_classes.items():
if not issubclass(v, NamedObject):
next_cls = v
next_path = f"{current_path}.{k}"
else:
next_cls = getattr(v, "child_object_type")
next_path = f'{current_path}.{k}["_name_"]'
# with open("alt_search.log", "a") as f:
# f.write(f"{next_path}\n")
queue.append((k, next_cls, next_path))

print(f"Memory usage after building trie: {get_memory_usage():.2f} MB")


SettingsTrie = Trie()


def search(search_term):
"""
Basic substring search
"""
results = SettingsTrie.search(search_term)
return results


def get_memory_usage():
"""
Print the memory usage of the current process.
"""
process = psutil.Process()
memory_info = process.memory_info()
return memory_info.rss / (1024 * 1024) # Convert bytes to MB


if __name__ == "__main__":
build_trie(root)
# Example usage
pprint(search("viscous"))
pprint(search("isco"))
pprint(len(search("viscous")))
pprint(len(search("isco")))
1 change: 1 addition & 0 deletions doc/changelog.d/3909.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Basic substring search within settings API based on a cache