-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsearch.py
65 lines (50 loc) · 2.31 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""
This script utilizes the Elasticsearch library to search the 'users' index based on the provided
search input.
Requirements:
- Elasticsearch connection details must be properly configured in the 'scripts.config' module.
- The Elasticsearch library must be installed (`pip install elasticsearch`).
Usage:
1. Ensure that the Elasticsearch connection details are correctly set in 'scripts.config.ES_HOST'.
2. Modify the 'SEARCH_INPUT' variable to specify the search term.
3. Run the script. The search results will be printed to the console.
"""
from elasticsearch import Elasticsearch
from scripts.config import ES_HOST
SEARCH_INPUT = "german"
def search_users_index(search_input: str) -> None:
"""Search the 'users' index for the provided search input and print matching documents.
This function utilizes Elasticsearch to perform a search in the 'users' index based on the given
search input. It performs a multi-field match search with optional fuzzy matching to find
documents that match the provided input within the 'name', 'city', and 'country' fields. The
retrieved documents are then printed, displaying their respective document IDs and source data.
Args:
search_input (str): String to search for within the 'name', 'city', and 'country' fields.
Returns:
None
Note:
- This function requires the Elasticsearch connection to be properly set with the 'ES_HOST'.
- Fuzzy search allows finding results even with slight misspellings in the search input.
"""
es = Elasticsearch(ES_HOST)
search_result = es.search(
index="users",
query={
"multi_match": {
# string that will be searched for
"query": search_input,
# fields that will be searched in
"fields": ["name", "city", "country"],
# enable fuzzy search to be able to find results even when misspelling the query
"fuzziness": "AUTO",
}
},
source={"includes": ["name", "city", "country"]},
)
# print the retrieved documents
for hit in search_result["hits"]["hits"]:
print("Document ID:", hit["_id"])
print("Document Source:", hit["_source"])
print("=" * 50)
if __name__ == "__main__":
search_users_index(search_input=SEARCH_INPUT)