Skip to content

Commit

Permalink
Merge pull request #59 from aodn/enhance-suggester
Browse files Browse the repository at this point in the history
Enhance suggester
  • Loading branch information
utas-raymondng authored Mar 22, 2024
2 parents b267543 + fb2c3fe commit fab69f9
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,7 @@ protected StacCollectionModel getMappedMetadataValues(String metadataValues) thr

stacCollectionModel.getSummaries().setScore(score);

// set title suggest for each metadata record, this will be used by the autocomplete search
List<String> filteredWords = StringUtil.generateTitleSuggest(stacCollectionModel.getTitle());
stacCollectionModel.setTitleSuggest(filteredWords);
stacCollectionModel.setTitleSuggest(stacCollectionModel.getTitle());

return stacCollectionModel;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -431,11 +431,12 @@ List<ProviderModel> mapProviders(MDMetadataType source) {
if (item.getAbstractResponsibility() != null) {
if(item.getAbstractResponsibility().getValue() instanceof CIResponsibilityType2 ciResponsibility) {
ciResponsibility.getParty().forEach(party -> {
try {
try
{
ProviderModel providerModel = ProviderModel.builder().build();
providerModel.setRoles(Collections.singletonList(ciResponsibility.getRole().getCIRoleCode().getCodeListValue()));
CIOrganisationType2 organisationType2 = (CIOrganisationType2) party.getAbstractCIParty().getValue();
providerModel.setName(organisationType2.getName().getCharacterString().getValue().toString());
providerModel.setName(organisationType2.getName() != null ? organisationType2.getName().getCharacterString().getValue().toString() : "");
organisationType2.getIndividual().forEach(individual -> individual.getCIIndividual().getContactInfo().forEach(contactInfo -> {
contactInfo.getCIContact().getOnlineResource().forEach(onlineResource -> {
providerModel.setUrl(onlineResource.getCIOnlineResource().getLinkage().getCharacterString().getValue().toString());
Expand Down Expand Up @@ -592,10 +593,11 @@ protected String mapContactsRole(CIResponsibilityType2 ciResponsibility) {
}

protected String mapContactsOrganization(AbstractCIPartyPropertyType2 party) {
String organisationString = party.getAbstractCIParty().getValue().getName().getCharacterString().getValue().toString();
return organisationString != null ?
organisationString : "";

String organisationString = "";
if (party.getAbstractCIParty() != null) {
organisationString = party.getAbstractCIParty().getValue().getName().getCharacterString().getValue().toString();
}
return organisationString;
}

protected String mapContactsName(CIIndividualPropertyType2 individual) {
Expand Down
11 changes: 0 additions & 11 deletions indexer/src/main/java/au/org/aodn/esindexer/utils/StringUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,4 @@ public class StringUtil {
public static String encodeUTF8(String input) {
return new String(input.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
}

public static List<String> generateTitleSuggest(String input) {
Set<String> uniqueWords = new HashSet<>();
// Filter out stop words
return Arrays.stream(input.replaceAll("[^a-zA-Z0-9]", " ").split("\\s+"))
.map(String::toLowerCase)
.filter(word -> !AppConstants.STOP_WORDS.contains(word))
.filter(uniqueWords::add)
.collect(Collectors.toList());

}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,24 @@
{
"settings":{
"analysis":{
"analyzer":{
"my_stop_analyzer":{
"type":"custom",
"tokenizer":"standard",
"filter":[
"lowercase",
"english_stop"
]
}
},
"filter":{
"english_stop":{
"type":"stop",
"stopwords":"_english_"
}
}
}
},
"mappings": {
"dynamic": true,
"properties": {
Expand All @@ -11,10 +31,10 @@
"title": { "type": "text" },
"title_suggest": {
"type": "completion",
"analyzer": "simple",
"analyzer": "my_stop_analyzer",
"preserve_separators": true,
"preserve_position_increments": true,
"max_input_length": 50
"max_input_length": 1000
},
"keywords": {
"type": "nested",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,6 @@ public void testToUTF8String_withFrenchCharacters() {
assertEquals(frenchString, result);
}

@Test
void generateTitleSuggest() {
String input = "IMOS - Animal Tracking Facility - Satellite Relay Tagging Program - Delayed mode data °";
List<String> result = StringUtil.generateTitleSuggest(input);
assertEquals(List.of("imos", "animal", "tracking", "facility", "satellite", "relay", "tagging", "program", "delayed", "mode", "data"), result);
}

@Test
public void testToUTF8String_withDegreeSign() {
// Example string containing the degree symbol
Expand Down
14 changes: 1 addition & 13 deletions indexer/src/test/resources/canned/sample3_stac.json
Original file line number Diff line number Diff line change
Expand Up @@ -329,19 +329,7 @@
}
],
"id": "06b09398-d3d0-47dc-a54a-a745319fbece",
"title_suggest": [
"imos",
"animal",
"tracking",
"facility",
"satellite",
"relay",
"tagging",
"program",
"delayed",
"mode",
"data"
],
"title_suggest": "IMOS - Animal Tracking Facility - Satellite Relay Tagging Program - Delayed mode data °",
"type": "Collection",
"stac_version": "1.0.0",
"stac_extensions": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public class StacCollectionModel {
protected String uuid;
protected String title;
@JsonProperty("title_suggest")
protected List<String> titleSuggest;
protected String titleSuggest;
protected String description;
protected ExtentModel extent;
protected SummariesModel summaries;
Expand Down

0 comments on commit fab69f9

Please sign in to comment.