From 3337a8432e8b26d581f76bfd71241670f54db50f Mon Sep 17 00:00:00 2001 From: Nick Smallbone Date: Wed, 14 Feb 2024 00:04:03 +0100 Subject: [PATCH] Upgrade from ES6 to ES8. * ES URL needs to include http:// * No more doc_type (mapping types) * response.hits is now an object rather than a number --- README.md | 2 +- .../elasticsearch6/es_mapping_repo.py | 40 ++++++------------- .../queries/es6_search_service.py | 9 +++-- .../repositories/es6_indicies.py | 4 +- pyproject.toml | 4 +- tests/conftest.py | 2 +- tests/e2e/conftest.py | 2 +- 7 files changed, 23 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index bbdcbd0c..3bef5609 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ A Makefile is provided to simplify tasks. ``` export ES_ENABLED=true -export ELASTICSEARCH_HOST=localhost:9200 +export ELASTICSEARCH_HOST=http://localhost:9200 ``` ## Create test resources diff --git a/karp-backend/src/karp/search_infrastructure/elasticsearch6/es_mapping_repo.py b/karp-backend/src/karp/search_infrastructure/elasticsearch6/es_mapping_repo.py index 3f82c862..4bc6f42e 100644 --- a/karp-backend/src/karp/search_infrastructure/elasticsearch6/es_mapping_repo.py +++ b/karp-backend/src/karp/search_infrastructure/elasticsearch6/es_mapping_repo.py @@ -11,7 +11,6 @@ logger = logging.getLogger("karp") KARP_CONFIGINDEX = "karp_config" -KARP_CONFIGINDEX_TYPE = "configs" class Es6MappingRepository: # noqa: D101 @@ -39,12 +38,10 @@ def ensure_config_index_exist(self) -> None: # noqa: D102 "refresh_interval": -1, }, "mappings": { - KARP_CONFIGINDEX_TYPE: { - "dynamic": False, - "properties": { - "index_name": {"type": "text"}, - "alias_name": {"type": "text"}, - }, + "dynamic": False, + "properties": { + "index_name": {"type": "text"}, + "alias_name": {"type": "text"}, } }, }, @@ -72,7 +69,6 @@ def _update_config(self, resource_id: str) -> dict[str, str]: self.es.index( index=self._config_index, id=resource_id, - doc_type=KARP_CONFIGINDEX_TYPE, body=names, ) return names @@ -96,9 +92,7 @@ def delete_from_config(self, resource_id: str) -> None: def get_index_name(self, resource_id: str) -> str: # noqa: D102 try: - res = self.es.get( - index=self._config_index, id=resource_id, doc_type=KARP_CONFIGINDEX_TYPE - ) + res = self.es.get(index=self._config_index, id=resource_id) except es_exceptions.NotFoundError as err: logger.info("didn't find index_name for resource '%s' details: %s", resource_id, err) return self._update_config(resource_id)["index_name"] @@ -106,9 +100,7 @@ def get_index_name(self, resource_id: str) -> str: # noqa: D102 def get_alias_name(self, resource_id: str) -> str: # noqa: D102 try: - res = self.es.get( - index=self._config_index, id=resource_id, doc_type=KARP_CONFIGINDEX_TYPE - ) + res = self.es.get(index=self._config_index, id=resource_id) except es_exceptions.NotFoundError as err: logger.info("didn't find alias_name for resource '%s' details: %s", resource_id, err) return self._update_config(resource_id)["alias_name"] @@ -144,16 +136,12 @@ def _init_field_mapping( aliases = self._get_all_aliases() mapping: Dict[str, Dict[str, Dict[str, Dict[str, Dict]]]] = self.es.indices.get_mapping() for alias, index in aliases: - if ( - "mappings" in mapping[index] - and "entry" in mapping[index]["mappings"] - and "properties" in mapping[index]["mappings"]["entry"] - ): + if "mappings" in mapping[index] and "properties" in mapping[index]["mappings"]: field_mapping[alias] = Es6MappingRepository.get_analyzed_fields_from_mapping( - mapping[index]["mappings"]["entry"]["properties"] + mapping[index]["mappings"]["properties"] ) sortable_fields[alias] = Es6MappingRepository.create_sortable_map_from_mapping( - mapping[index]["mappings"]["entry"]["properties"] + mapping[index]["mappings"]["properties"] ) return field_mapping, sortable_fields @@ -226,20 +214,16 @@ def on_publish_resource( # noqa: ANN201, D102 self, alias_name: str, index_name: str ): mapping = self._get_index_mappings(index=index_name) - if ( - "mappings" in mapping[index_name] - and "entry" in mapping[index_name]["mappings"] - and "properties" in mapping[index_name]["mappings"]["entry"] - ): + if "mappings" in mapping[index_name] and "properties" in mapping[index_name]["mappings"]: self.analyzed_fields[ alias_name ] = Es6MappingRepository.get_analyzed_fields_from_mapping( - mapping[index_name]["mappings"]["entry"]["properties"] + mapping[index_name]["mappings"]["properties"] ) self.sortable_fields[ alias_name ] = Es6MappingRepository.create_sortable_map_from_mapping( - mapping[index_name]["mappings"]["entry"]["properties"] + mapping[index_name]["mappings"]["properties"] ) @staticmethod diff --git a/karp-backend/src/karp/search_infrastructure/queries/es6_search_service.py b/karp-backend/src/karp/search_infrastructure/queries/es6_search_service.py index d3e6888b..3257122e 100644 --- a/karp-backend/src/karp/search_infrastructure/queries/es6_search_service.py +++ b/karp-backend/src/karp/search_infrastructure/queries/es6_search_service.py @@ -149,7 +149,7 @@ def format_entry(entry): # noqa: ANN202 } result = { - "total": response.hits.total, + "total": response.hits.total.value, "hits": [format_entry(entry) for entry in response], } return result @@ -186,6 +186,7 @@ def search_with_query(self, query: EsQuery): # noqa: ANN201, D102, C901 alias_name = self.mapping_repo.get_alias_name(resource) s = es_dsl.Search(index=alias_name) s = self.add_runtime_mappings(s, field_names) + s = s.extra(track_total_hits=True) # get accurate hits numbers if es_query is not None: s = s.query(es_query) @@ -204,11 +205,11 @@ def search_with_query(self, query: EsQuery): # noqa: ANN201, D102, C901 result["hits"][query.resources[i]] = self._format_result( query.resources, response ).get("hits", []) - result["total"] += response.hits.total + result["total"] += response.hits.total.value if query.lexicon_stats: if "distribution" not in result: result["distribution"] = {} - result["distribution"][query.resources[i]] = response.hits.total + result["distribution"][query.resources[i]] = response.hits.total.value else: result = self._extracted_from_search_with_query_47(query, es_query, field_names) @@ -219,7 +220,7 @@ def _extracted_from_search_with_query_47(self, query, es_query, field_names): # alias_names = [ self.mapping_repo.get_alias_name(resource) for resource in query.resources ] - s = es_dsl.Search(using=self.es, index=alias_names, doc_type="entry") + s = es_dsl.Search(using=self.es, index=alias_names) s = self.add_runtime_mappings(s, field_names) if es_query is not None: s = s.query(es_query) diff --git a/karp-backend/src/karp/search_infrastructure/repositories/es6_indicies.py b/karp-backend/src/karp/search_infrastructure/repositories/es6_indicies.py index 14763648..a5084be2 100644 --- a/karp-backend/src/karp/search_infrastructure/repositories/es6_indicies.py +++ b/karp-backend/src/karp/search_infrastructure/repositories/es6_indicies.py @@ -44,7 +44,7 @@ def create_index(self, resource_id: str, config): # noqa: ANN201, D102 body = { "settings": settings, - "mappings": {"entry": mapping}, + "mappings": mapping, } index_alias_name = self.mapping_repo.create_index_and_alias_name(resource_id) @@ -94,7 +94,6 @@ def add_entries( # noqa: D102, ANN201 { "_index": index_name, "_id": entry.id, - "_type": "entry", "_source": entry.entry, } ) @@ -117,7 +116,6 @@ def delete_entry( # noqa: ANN201, D102 try: self.es.delete( index=index_name, - doc_type="entry", id=entry_id, refresh=True, ) diff --git a/pyproject.toml b/pyproject.toml index c8796748..3fe4f47a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,8 +46,8 @@ aiomysql = { version = "^0.1.1", optional = true } aiosqlite = { version = "^0.17.0", optional = true } alembic = "^1.8.1" asgi-correlation-id = "^3.0.1" -elasticsearch = "^6" -elasticsearch-dsl = "^6" +elasticsearch = "^8" +elasticsearch-dsl = "^8" environs = "^9.3.4" fastapi = "^0.89.0" injector = "^0.20.1" diff --git a/tests/conftest.py b/tests/conftest.py index d96be33e..f6ce12fb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,7 +4,7 @@ from starlette.config import environ environ["TESTING"] = "True" -environ["ELASTICSEARCH_HOST"] = "localhost:9202" +environ["ELASTICSEARCH_HOST"] = "http://localhost:9202" environ["CONSOLE_LOG_LEVEL"] = "DEBUG" from . import common_data, utils # nopep8 # noqa: E402, F401 diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index 6dc20a65..d69c44e6 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -43,7 +43,7 @@ def sqlite_session_factory(in_memory_sqlite_db): # noqa: ANN201 def setup_environment() -> None: os.environ["TESTING"] = "1" os.environ["AUTH_JWT_PUBKEY_PATH"] = "assets/testing/pubkey.pem" - os.environ["ELASTICSEARCH_HOST"] = "localhost:9202" + os.environ["ELASTICSEARCH_HOST"] = "http://localhost:9202" @pytest.fixture(scope="session")