Skip to content

Commit

Permalink
Add API for getting supported languages.
Browse files Browse the repository at this point in the history
  • Loading branch information
ledsoft committed Nov 18, 2024
1 parent b8bdbe7 commit f2a7796
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

import cz.cvut.kbss.textanalysis.lemmatizer.model.LemmatizerResult;

import java.util.List;

public interface LemmatizerApi {

/**
Expand All @@ -30,4 +32,11 @@ public interface LemmatizerApi {
* @throws cz.cvut.kbss.textanalysis.exception.UnsupportedLanguageException If the given language is not supported
*/
LemmatizerResult process(String text, String lang);

/**
* Returns a set of languages for which annotation is supported.
*
* @return List of languages
*/
List<String> getSupportedLanguages();
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@
import cz.cvut.kbss.textanalysis.service.HtmlAnnotationService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;

import java.util.List;

@RestController
public class AnnotateController {

Expand All @@ -38,11 +41,16 @@ public AnnotateController(HtmlAnnotationService service) {
}

@RequestMapping(value = "/annotate", method = RequestMethod.POST,
produces = MediaType.APPLICATION_XML_VALUE,
consumes = MediaType.APPLICATION_JSON_VALUE)
produces = MediaType.APPLICATION_XML_VALUE,
consumes = MediaType.APPLICATION_JSON_VALUE)
public String annotate(@RequestParam(value = "enableKeywordExtraction", defaultValue = "false")
Boolean enableKeywordExtraction,
Boolean enableKeywordExtraction,
@RequestBody TextAnalysisInput input) {
return service.annotate(enableKeywordExtraction, input);
}

@GetMapping(value = "/languages", produces = MediaType.APPLICATION_JSON_VALUE)
public List<String> getSupportedLanguages() {
return service.getSupportedLanguages();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,13 @@ private List<Word> annotateOntologyLabels(LemmatizerResult lemmatizerResult, Lis

return annotationsResults;
}

/**
* Gets a list of languages supported by the lemmatizer.
*
* @return List of supported languages
*/
public List<String> getSupportedLanguages() {
return lemmatizer.getSupportedLanguages();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -193,4 +193,13 @@ private boolean isTermOccurrence(Node node) {
final String typeOf = node.attr("typeof");
return (typeOf.equals(Constants.TERM_OCCURRENCE) || typeOf.equals(Constants.TERM_OCCURRENCE_PREFIXED));
}

/**
* Returns a list of supported languages.
*
* @return List of supported languages
*/
public List<String> getSupportedLanguages() {
return annotationService.getSupportedLanguages();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@ public MorphoDitaServiceJNI(MorphoditaConf conf) {
}
log.info("Found at {}", taggerPath);
log.info("Loading tagger ... (looks up MorphoDita native library at {})",
System.getProperty("java.library.path"));
System.getProperty("java.library.path"));
Tagger tagger = Tagger.load(taggerPath);
if (tagger == null) {
log.warn("Creating tagger failed.");
} else {
taggers.put(lang,tagger);
taggers.put(lang, tagger);
log.info("Tagger {} for lang {} successfully created.", tagger, lang);
}
} catch (Exception e) {
Expand All @@ -73,7 +73,7 @@ public LemmatizerResult process(String s, String lang) {
final List<TaggedLemmas> tTl = new ArrayList<>();

final Tokenizer tk =
lang.equals("en") ? Tokenizer.newEnglishTokenizer() : Tokenizer.newCzechTokenizer();
lang.equals("en") ? Tokenizer.newEnglishTokenizer() : Tokenizer.newCzechTokenizer();
tk.setText(s);

Tagger tagger = taggers.get(lang);
Expand Down Expand Up @@ -133,7 +133,7 @@ private List<List<SingleLemmaResult>> transform(final String s,

final long end = tokenRange.getStart() + tokenRange.getLength();
final long startNext =
(j == tokenLemmas.size() - 1) ? end : tokenRanges.get(j + 1).getStart();
(j == tokenLemmas.size() - 1) ? end : tokenRanges.get(j + 1).getStart();

String spaces = " ".repeat((int) (startNext - end));
if (spaces.isEmpty() && (end < s.length() && Character.isSpaceChar(s.charAt((int) end)))) {
Expand All @@ -146,4 +146,9 @@ private List<List<SingleLemmaResult>> transform(final String s,
}
return result;
}

@Override
public List<String> getSupportedLanguages() {
return new ArrayList<>(taggers.keySet());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import cz.cvut.kbss.textanalysis.lemmatizer.model.LemmatizerResult;
import org.springframework.boot.web.client.RestTemplateBuilder;

import java.util.List;

public class MorphoDitaServiceOnline implements LemmatizerApi {

private final RestTemplateBuilder restTemplateBuilder;
Expand All @@ -42,4 +44,9 @@ public LemmatizerResult process(String s, String lang) {
morphoDitaResult.setLemmatizer(this.getClass().getName());
return morphoDitaResult;
}

@Override
public List<String> getSupportedLanguages() {
return List.of();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,9 @@ private LightPipeline getPipeline(String lang) {
}
return pipelines.get(lang);
}

@Override
public List<String> getSupportedLanguages() {
return new ArrayList<>(pipelines.keySet());
}
}

0 comments on commit f2a7796

Please sign in to comment.