Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#66 Add inflection group to word form #67

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@ public interface IWiktionaryWordForm {
/** Returns the {@link GrammaticalGender} of this word form or
* <code>null</code> if no gender is specified or applicable. */
GrammaticalGender getGender();

/**
* Returns the index of the inflection group this word form belong to.
* Semantics of this property may differ from language to language.
* Roughly speaking, inflection group corresponds to the column in
* the inflection table.
* @return Index of the inflection group, <code>-1</code> if there this word form
* does not belong to an inflection group.
*/
int getInflectionGroup();

/** Returns the {@link GrammaticalPerson} of this word form or
* <code>null</code> if no person is specified or applicable. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public class WiktionaryWordForm implements IWiktionaryWordForm {
protected GrammaticalNumber grammaticalNumber;
protected GrammaticalCase grammaticalCase;
protected GrammaticalGender grammaticalGender;
protected int inflectionGroup = -1;
protected GrammaticalPerson grammaticalPerson;
protected GrammaticalTense grammaticalTense;
protected GrammaticalMood grammaticalMood;
Expand Down Expand Up @@ -93,6 +94,17 @@ public void setGender(final GrammaticalGender grammaticalGender) {
this.grammaticalGender = grammaticalGender;
}

public int getInflectionGroup() {
return this.inflectionGroup;
}

/** Assigns the given inflection group to this word form.
* @param inflectionGroup index of the inflection group.
*/
public void setInflectionGroup(int inflectionGroup) {
this.inflectionGroup = inflectionGroup;
}

public GrammaticalPerson getPerson() {
return grammaticalPerson;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,21 @@
package de.tudarmstadt.ukp.jwktl.parser.de.components.nountable;

import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import de.tudarmstadt.ukp.jwktl.api.entry.WiktionaryWordForm;
import de.tudarmstadt.ukp.jwktl.parser.de.components.DEGenderText;
import de.tudarmstadt.ukp.jwktl.parser.util.IWiktionaryWordFormTemplateParameterHandler;
import de.tudarmstadt.ukp.jwktl.parser.util.ParsingContext;

public class DEWordFormNounTableHandler implements IWiktionaryWordFormTemplateParameterHandler {

public static final int MAX_INFLECTION_GROUP_COUNT = 4;

public void reset() {
this.genera = new DEGenderText[4];
this.genera = new HashMap<>(DEWordFormNounTableHandler.MAX_INFLECTION_GROUP_COUNT);
}

private List<? extends IWiktionaryWordFormTemplateParameterHandler> handlers = Arrays.asList(
Expand All @@ -51,32 +55,24 @@ public void reset() {
// Accusative
new AccusativeHandler());

protected DEGenderText[] genera = new DEGenderText[4];
protected Map<Integer, DEGenderText> genera = new HashMap<>(DEWordFormNounTableHandler.MAX_INFLECTION_GROUP_COUNT);

/**
* Returns genus by index.
* @param index index of the genus, must be between 1 and 4.
* @param index index of the genus.
* @return Genus by index or <code>null</code> if genus by this index was not set yet.
* @throws IllegalArgumentException If index is not between 1 and 4.
*/
DEGenderText getGenusByIndex(int index) {
if (index < 1 || index > 4) {
throw new IllegalArgumentException("Genus index must be 1, 2, 3 or 4.");
}
return genera[index - 1];
return genera.get(index - 1);
}

/**
* Sets genus by index
* @param genderText genus.
* @param index index of the genus, must be between 1 and 4.
* @throws IllegalArgumentException If index is not between 1 and 4.
* @param index index of the genus.
*/
void setGenusByIndex(DEGenderText genderText, Integer index) {
if (index < 1 || index > 4) {
throw new IllegalArgumentException("Genus index must be 1, 2, 3 or 4.");
}
this.genera[index - 1] = genderText;
void setGenusByIndex(DEGenderText genderText, int index) {
this.genera.put(index - 1, genderText);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public EinzahlHandler(DEWordFormNounTableHandler nounTableHandler) {
public void handleIfFound(WiktionaryWordForm wordForm, String label, int index, String value, Matcher matcher,
ParsingContext context) {
wordForm.setNumber(GrammaticalNumber.SINGULAR);
wordForm.setInflectionGroup(index);
final DEGenderText genderText = this.nounTableHandler.getGenusByIndex(index);
if (genderText != null) {
wordForm.setGender(genderText.asGrammaticalGender());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,6 @@ public MehrzahlHandler(DEWordFormNounTableHandler nounTableHandler) {
public void handleIfFound(WiktionaryWordForm wordForm, String label, int index, String value, Matcher matcher,
ParsingContext context) {
wordForm.setNumber(GrammaticalNumber.PLURAL);
wordForm.setInflectionGroup(index);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,12 @@
******************************************************************************/
package de.tudarmstadt.ukp.jwktl.parser.de.components.nountable;

import java.util.List;
import java.util.Objects;
import java.util.regex.Matcher;

import de.tudarmstadt.ukp.jwktl.api.IWiktionaryWordForm;
import de.tudarmstadt.ukp.jwktl.api.entry.WiktionaryEntry;
import de.tudarmstadt.ukp.jwktl.api.entry.WiktionaryWordForm;
import de.tudarmstadt.ukp.jwktl.parser.util.ParsingContext;
import de.tudarmstadt.ukp.jwktl.parser.util.PatternUtils;
Expand All @@ -36,14 +39,28 @@ public PatternBasedIndexedParameterHandler(DEWordFormNounTableHandler nounTableH

public void handle(String label, String value, WiktionaryWordForm wordForm, ParsingContext context) {
final Matcher matcher = pattern.matcher(label);

WiktionaryEntry wiktionaryEntry = context.findEntry();

List<IWiktionaryWordForm> wordForms = wiktionaryEntry.getWordForms();
final int indexOffset;
if (wordForms == null) {
indexOffset = 0;
} else {
final int maxInflectionGroup = wordForms.stream().mapToInt(IWiktionaryWordForm::getInflectionGroup).max()
.orElse(0);
indexOffset = (((maxInflectionGroup - 1) / DEWordFormNounTableHandler.MAX_INFLECTION_GROUP_COUNT) + 1)
* DEWordFormNounTableHandler.MAX_INFLECTION_GROUP_COUNT;
}

if (matcher.find()) {
final Integer index = PatternUtils.extractIndex(matcher);
final int i = index == null ? 1 : index.intValue();
handleIfFound(wordForm, label, i, value, matcher, context);
final int i = index == null ? 1 : index.intValue();
handleIfFound(wordForm, label, i + indexOffset, value, matcher, context);
}
}

public abstract void handleIfFound(WiktionaryWordForm wordForm, String label, int index, String value, Matcher matcher,
ParsingContext context);
public abstract void handleIfFound(WiktionaryWordForm wordForm, String label, int index, String value,
Matcher matcher, ParsingContext context);

}
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,6 @@ public PluralHandler(DEWordFormNounTableHandler nounTableHandler) {
public void handleIfFound(WiktionaryWordForm wordForm, String label, int index, String value, Matcher matcher,
ParsingContext context) {
wordForm.setNumber(GrammaticalNumber.PLURAL);
wordForm.setInflectionGroup(index);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ public SingularHandler(DEWordFormNounTableHandler nounTableHandler) {
public void handleIfFound(WiktionaryWordForm wordForm, String label, int index, String value, Matcher matcher,
ParsingContext context) {
wordForm.setNumber(GrammaticalNumber.SINGULAR);
wordForm.setInflectionGroup(index);
final DEGenderText genderText = this.nounTableHandler.getGenusByIndex(index);
if (genderText != null) {
wordForm.setGender(genderText.asGrammaticalGender());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,15 @@ public interface IWiktionaryWordFormTemplateParameterHandler {
* word form.
* @param context
* parsing context.
* @return <code>true<code> if this handler can handle the given parameter,
* <code>false<code> otherwise.
* @return <code>true</code> if this handler can handle the given parameter,
* <code>false</code> otherwise.
*/
public boolean canHandle(String label, String value, WiktionaryWordForm wordForm, ParsingContext context);

/**
* Handles the given parameter. This should only be called if
* {@see #canHandle(String, String, WiktionaryWordForm, ParsingContext)}
* returned true.
* @see #canHandle(String, String, WiktionaryWordForm, ParsingContext)
* returned <code>true</code>.
*
* @param label
* parameter label.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*******************************************************************************
* Copyright 2013
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package de.tudarmstadt.ukp.jwktl.api.entry;

import de.tudarmstadt.ukp.jwktl.api.IWiktionaryWordForm;
import junit.framework.TestCase;

public class WiktionaryWordFormTest extends TestCase {

public void testDefaultInflectionGroupValue() {
IWiktionaryWordForm wordForm = new WiktionaryWordForm("test");
assertEquals(-1, wordForm.getInflectionGroup());
}
}
Loading