moved functions to somewhere else (#78)

* moved functions to somewhere else * oeps * black and lint --------- Co-authored-by: Lars van de Kerkhof <[email protected]>
django-oscar · Dec 4, 2024 · 44a99c2 · 44a99c2
1 parent 4b46d83
commit 44a99c2
Show file tree

Hide file tree

Showing 3 changed files with 122 additions and 113 deletions.
diff --git a/oscar_elasticsearch/search/indexing/indexer.py b/oscar_elasticsearch/search/indexing/indexer.py
@@ -52,7 +52,9 @@ def bulk_index(self, documents, current_alias=None):
         bulk(es, docs, ignore=[400])
 
     def get_current_alias(self):
-        aliasses = list(es.indices.get_alias(name=self.name).keys())
+        aliasses = list(
+            es.indices.get_alias(name=self.name, ignore_unavailable=True).keys()
+        )
         if aliasses:
             return aliasses[0]
 
@@ -64,7 +66,9 @@ def finish(self):
         # Check if alias exists for indice
         if es.indices.exists_alias(name=self.name):
             # Get alisases
-            aliased_indices = es.indices.get_alias(name=self.name).keys()
+            aliased_indices = es.indices.get_alias(
+                name=self.name, ignore_unavailable=True
+            ).keys()
 
             # Link the new alias to the old indice
             es.indices.put_alias(name=self.name, index=self.alias_name)

diff --git a/oscar_elasticsearch/search/indexing/settings.py b/oscar_elasticsearch/search/indexing/settings.py
@@ -1,3 +1,4 @@
+from oscar_elasticsearch.search.utils import get_index_settings
 from oscar_elasticsearch.search.settings import (
     INDEX_PREFIX,
     FACETS,
@@ -8,117 +9,7 @@
 
 
 def get_oscar_index_settings():
-    return {
-        "analysis": {
-            "analyzer": {
-                # the simplest analyzer most useful for normalizing and splitting a sentence into words
-                # this is most likely only used as a search analyzer
-                "lowercasewhitespace": {
-                    "tokenizer": "whitespace",
-                    "filter": ["lowercase", "asciifolding"],
-                    "char_filter": ["non_ascii_character_filter_mapping"],
-                },
-                # this analyzer will keep all punctuation and numbers and make ngrams
-                # as small as a single character. Only usefull for upcs and techincal terms
-                "technical_analyzer": {
-                    "tokenizer": "whitespace",
-                    "filter": [
-                        "shallow_edgengram",
-                        "lowercase",
-                        "asciifolding",
-                        "max_gram_truncate",
-                    ],
-                    "char_filter": ["non_ascii_character_filter_mapping"],
-                },
-                # should be used as the search analyzer for terms analyzed with the
-                # technical_analyzer. Will just split the input into words and normalize
-                # but keeping in mind the max ngram size.
-                "technical_search_analyzer": {
-                    "tokenizer": "whitespace",
-                    "filter": [
-                        "lowercase",
-                        "asciifolding",
-                        "max_gram_truncate",
-                    ],
-                    "char_filter": ["non_ascii_character_filter_mapping"],
-                },
-                # this analyzer is usefull for important textual data like titles,
-                # that contain a lot of search terms.
-                "title_analyzer": {
-                    "tokenizer": "standard",
-                    "filter": [
-                        "edgengram",
-                        "lowercase",
-                        "asciifolding",
-                        "max_gram_truncate",
-                    ],
-                },
-                # should be used as the search analyzer for terms analyzed with title_analyzer
-                "reversed_title_analyzer": {
-                    "tokenizer": "standard",
-                    "filter": [
-                        "lowercase",
-                        "asciifolding",
-                        "reversed_edgengram",
-                        "max_gram_truncate",
-                    ],
-                },
-                # this analyzer is most usefull for long textual data. punctuation and numbers
-                # WILL BE STRIPPED
-                "standard": {
-                    "tokenizer": "standard",
-                    "filter": ["lowercase", "asciifolding"],
-                },
-                # This analyzer is usefull for when you need to find really specific data inside some text,
-                # for example you have a 'Volvo Penta TAD163532E' code inside your model type and you want it to be found with 'Penta D16'
-                # Also use the 'technical_search_analyzer' for this one.
-                "technical_title_analyzer": {
-                    "tokenizer": "whitespace",
-                    "filter": [
-                        "ngram",
-                        "lowercase",
-                        "asciifolding",
-                        "max_gram_truncate",
-                    ],
-                },
-            },
-            "tokenizer": {
-                "ngram_tokenizer": {"type": "ngram", "min_gram": 3, "max_gram": 15},
-                "edgengram_tokenizer": {
-                    "type": "edge_ngram",
-                    "min_gram": 2,
-                    "max_gram": MAX_GRAM,
-                },
-            },
-            "filter": {
-                "ngram": {"type": "ngram", "min_gram": 3, "max_gram": MAX_GRAM},
-                "edgengram": {
-                    "type": "edge_ngram",
-                    "min_gram": 2,
-                    "max_gram": MAX_GRAM,
-                },
-                "shallow_edgengram": {
-                    "type": "edge_ngram",
-                    "min_gram": 1,
-                    "max_gram": MAX_GRAM,
-                },
-                "reversed_edgengram": {
-                    "type": "edge_ngram",
-                    "min_gram": 3,
-                    "max_gram": MAX_GRAM,
-                    "side": "back",
-                },
-                "max_gram_truncate": {"type": "truncate", "length": MAX_GRAM},
-            },
-            "char_filter": {
-                "non_ascii_character_filter_mapping": {
-                    "type": "mapping",
-                    "mappings": ["’ => '"],
-                }
-            },
-        },
-        "index": {"number_of_shards": 1, "max_ngram_diff": MAX_GRAM},
-    }
+    return get_index_settings(MAX_GRAM)
 
 
 OSCAR_INDEX_MAPPING = {

diff --git a/oscar_elasticsearch/search/utils.py b/oscar_elasticsearch/search/utils.py
@@ -25,3 +25,117 @@ def search_result_to_queryset(search_results, Model):
 
     preserved = Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(instance_ids)])
     return Model.objects.filter(pk__in=instance_ids).order_by(preserved)
+
+
+def get_index_settings(MAX_GRAM):
+    return {
+        "analysis": {
+            "analyzer": {
+                # the simplest analyzer most useful for normalizing and splitting a sentence into words
+                # this is most likely only used as a search analyzer
+                "lowercasewhitespace": {
+                    "tokenizer": "whitespace",
+                    "filter": ["lowercase", "asciifolding"],
+                    "char_filter": ["non_ascii_character_filter_mapping"],
+                },
+                # this analyzer will keep all punctuation and numbers and make ngrams
+                # as small as a single character. Only usefull for upcs and techincal terms
+                "technical_analyzer": {
+                    "tokenizer": "whitespace",
+                    "filter": [
+                        "shallow_edgengram",
+                        "lowercase",
+                        "asciifolding",
+                        "max_gram_truncate",
+                    ],
+                    "char_filter": ["non_ascii_character_filter_mapping"],
+                },
+                # should be used as the search analyzer for terms analyzed with the
+                # technical_analyzer. Will just split the input into words and normalize
+                # but keeping in mind the max ngram size.
+                "technical_search_analyzer": {
+                    "tokenizer": "whitespace",
+                    "filter": [
+                        "lowercase",
+                        "asciifolding",
+                        "max_gram_truncate",
+                    ],
+                    "char_filter": ["non_ascii_character_filter_mapping"],
+                },
+                # this analyzer is usefull for important textual data like titles,
+                # that contain a lot of search terms.
+                "title_analyzer": {
+                    "tokenizer": "standard",
+                    "filter": [
+                        "edgengram",
+                        "lowercase",
+                        "asciifolding",
+                        "max_gram_truncate",
+                    ],
+                },
+                # should be used as the search analyzer for terms analyzed with title_analyzer
+                "reversed_title_analyzer": {
+                    "tokenizer": "standard",
+                    "filter": [
+                        "lowercase",
+                        "asciifolding",
+                        "reversed_edgengram",
+                        "max_gram_truncate",
+                    ],
+                },
+                # this analyzer is most usefull for long textual data. punctuation and numbers
+                # WILL BE STRIPPED
+                "standard": {
+                    "tokenizer": "standard",
+                    "filter": ["lowercase", "asciifolding"],
+                },
+                # This analyzer is usefull for when you need to find really specific data inside some text,
+                # for example you have a 'Volvo Penta TAD163532E' code inside your model type and you want it to be found with 'Penta D16'
+                # Also use the 'technical_search_analyzer' for this one.
+                "technical_title_analyzer": {
+                    "tokenizer": "whitespace",
+                    "filter": [
+                        "ngram",
+                        "lowercase",
+                        "asciifolding",
+                        "max_gram_truncate",
+                    ],
+                },
+            },
+            "tokenizer": {
+                "ngram_tokenizer": {"type": "ngram", "min_gram": 3, "max_gram": 15},
+                "edgengram_tokenizer": {
+                    "type": "edge_ngram",
+                    "min_gram": 2,
+                    "max_gram": MAX_GRAM,
+                },
+            },
+            "filter": {
+                "ngram": {"type": "ngram", "min_gram": 3, "max_gram": MAX_GRAM},
+                "edgengram": {
+                    "type": "edge_ngram",
+                    "min_gram": 2,
+                    "max_gram": MAX_GRAM,
+                },
+                "shallow_edgengram": {
+                    "type": "edge_ngram",
+                    "min_gram": 1,
+                    "max_gram": MAX_GRAM,
+                },
+                "reversed_edgengram": {
+                    "type": "edge_ngram",
+                    "min_gram": 3,
+                    "max_gram": MAX_GRAM,
+                    "side": "back",
+                },
+                "max_gram_truncate": {"type": "truncate", "length": MAX_GRAM},
+            },
+            "char_filter": {
+                "non_ascii_character_filter_mapping": {
+                    "type": "mapping",
+                    "mappings": ["’ => '"],
+                }
+            },
+        },
+        "index": {"number_of_shards": 1, "max_ngram_diff": MAX_GRAM},
+    }