diff --git a/indexer/src/main/java/au/org/aodn/esindexer/service/IndexerServiceImpl.java b/indexer/src/main/java/au/org/aodn/esindexer/service/IndexerServiceImpl.java index 9272c6f7..eb522193 100644 --- a/indexer/src/main/java/au/org/aodn/esindexer/service/IndexerServiceImpl.java +++ b/indexer/src/main/java/au/org/aodn/esindexer/service/IndexerServiceImpl.java @@ -151,9 +151,7 @@ protected StacCollectionModel getMappedMetadataValues(String metadataValues) thr stacCollectionModel.getSummaries().setScore(score); - // set title suggest for each metadata record, this will be used by the autocomplete search - List filteredWords = StringUtil.generateTitleSuggest(stacCollectionModel.getTitle()); - stacCollectionModel.setTitleSuggest(filteredWords); + stacCollectionModel.setTitleSuggest(stacCollectionModel.getTitle()); return stacCollectionModel; } diff --git a/indexer/src/main/java/au/org/aodn/esindexer/service/StacCollectionMapperService.java b/indexer/src/main/java/au/org/aodn/esindexer/service/StacCollectionMapperService.java index b03058bc..76f2e584 100644 --- a/indexer/src/main/java/au/org/aodn/esindexer/service/StacCollectionMapperService.java +++ b/indexer/src/main/java/au/org/aodn/esindexer/service/StacCollectionMapperService.java @@ -431,11 +431,12 @@ List mapProviders(MDMetadataType source) { if (item.getAbstractResponsibility() != null) { if(item.getAbstractResponsibility().getValue() instanceof CIResponsibilityType2 ciResponsibility) { ciResponsibility.getParty().forEach(party -> { - try { + try + { ProviderModel providerModel = ProviderModel.builder().build(); providerModel.setRoles(Collections.singletonList(ciResponsibility.getRole().getCIRoleCode().getCodeListValue())); CIOrganisationType2 organisationType2 = (CIOrganisationType2) party.getAbstractCIParty().getValue(); - providerModel.setName(organisationType2.getName().getCharacterString().getValue().toString()); + providerModel.setName(organisationType2.getName() != null ? organisationType2.getName().getCharacterString().getValue().toString() : ""); organisationType2.getIndividual().forEach(individual -> individual.getCIIndividual().getContactInfo().forEach(contactInfo -> { contactInfo.getCIContact().getOnlineResource().forEach(onlineResource -> { providerModel.setUrl(onlineResource.getCIOnlineResource().getLinkage().getCharacterString().getValue().toString()); @@ -592,10 +593,11 @@ protected String mapContactsRole(CIResponsibilityType2 ciResponsibility) { } protected String mapContactsOrganization(AbstractCIPartyPropertyType2 party) { - String organisationString = party.getAbstractCIParty().getValue().getName().getCharacterString().getValue().toString(); - return organisationString != null ? - organisationString : ""; - + String organisationString = ""; + if (party.getAbstractCIParty() != null) { + organisationString = party.getAbstractCIParty().getValue().getName().getCharacterString().getValue().toString(); + } + return organisationString; } protected String mapContactsName(CIIndividualPropertyType2 individual) { diff --git a/indexer/src/main/java/au/org/aodn/esindexer/utils/StringUtil.java b/indexer/src/main/java/au/org/aodn/esindexer/utils/StringUtil.java index 0f9258b2..5d1899fe 100644 --- a/indexer/src/main/java/au/org/aodn/esindexer/utils/StringUtil.java +++ b/indexer/src/main/java/au/org/aodn/esindexer/utils/StringUtil.java @@ -14,15 +14,4 @@ public class StringUtil { public static String encodeUTF8(String input) { return new String(input.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8); } - - public static List generateTitleSuggest(String input) { - Set uniqueWords = new HashSet<>(); - // Filter out stop words - return Arrays.stream(input.replaceAll("[^a-zA-Z0-9]", " ").split("\\s+")) - .map(String::toLowerCase) - .filter(word -> !AppConstants.STOP_WORDS.contains(word)) - .filter(uniqueWords::add) - .collect(Collectors.toList()); - - } } diff --git a/indexer/src/main/resources/config_files/portal_records_index_schema.json b/indexer/src/main/resources/config_files/portal_records_index_schema.json index 2d3c3c97..3968e4aa 100644 --- a/indexer/src/main/resources/config_files/portal_records_index_schema.json +++ b/indexer/src/main/resources/config_files/portal_records_index_schema.json @@ -1,4 +1,24 @@ { + "settings":{ + "analysis":{ + "analyzer":{ + "my_stop_analyzer":{ + "type":"custom", + "tokenizer":"standard", + "filter":[ + "lowercase", + "english_stop" + ] + } + }, + "filter":{ + "english_stop":{ + "type":"stop", + "stopwords":"_english_" + } + } + } + }, "mappings": { "dynamic": true, "properties": { @@ -11,10 +31,10 @@ "title": { "type": "text" }, "title_suggest": { "type": "completion", - "analyzer": "simple", + "analyzer": "my_stop_analyzer", "preserve_separators": true, "preserve_position_increments": true, - "max_input_length": 50 + "max_input_length": 1000 }, "keywords": { "type": "nested", diff --git a/indexer/src/test/java/au/org/aodn/esindexer/utils/StringUtilTest.java b/indexer/src/test/java/au/org/aodn/esindexer/utils/StringUtilTest.java index 4d63d8e2..bf0615dd 100644 --- a/indexer/src/test/java/au/org/aodn/esindexer/utils/StringUtilTest.java +++ b/indexer/src/test/java/au/org/aodn/esindexer/utils/StringUtilTest.java @@ -24,13 +24,6 @@ public void testToUTF8String_withFrenchCharacters() { assertEquals(frenchString, result); } - @Test - void generateTitleSuggest() { - String input = "IMOS - Animal Tracking Facility - Satellite Relay Tagging Program - Delayed mode data °"; - List result = StringUtil.generateTitleSuggest(input); - assertEquals(List.of("imos", "animal", "tracking", "facility", "satellite", "relay", "tagging", "program", "delayed", "mode", "data"), result); - } - @Test public void testToUTF8String_withDegreeSign() { // Example string containing the degree symbol diff --git a/indexer/src/test/resources/canned/sample3_stac.json b/indexer/src/test/resources/canned/sample3_stac.json index 73e40397..2a8fbef2 100644 --- a/indexer/src/test/resources/canned/sample3_stac.json +++ b/indexer/src/test/resources/canned/sample3_stac.json @@ -329,19 +329,7 @@ } ], "id": "06b09398-d3d0-47dc-a54a-a745319fbece", - "title_suggest": [ - "imos", - "animal", - "tracking", - "facility", - "satellite", - "relay", - "tagging", - "program", - "delayed", - "mode", - "data" - ], + "title_suggest": "IMOS - Animal Tracking Facility - Satellite Relay Tagging Program - Delayed mode data °", "type": "Collection", "stac_version": "1.0.0", "stac_extensions": [ diff --git a/stacmodel/src/main/java/au/org/aodn/stac/model/StacCollectionModel.java b/stacmodel/src/main/java/au/org/aodn/stac/model/StacCollectionModel.java index de78d463..0ac9ecb9 100644 --- a/stacmodel/src/main/java/au/org/aodn/stac/model/StacCollectionModel.java +++ b/stacmodel/src/main/java/au/org/aodn/stac/model/StacCollectionModel.java @@ -15,7 +15,7 @@ public class StacCollectionModel { protected String uuid; protected String title; @JsonProperty("title_suggest") - protected List titleSuggest; + protected String titleSuggest; protected String description; protected ExtentModel extent; protected SummariesModel summaries;