Skip to content

Commit

Permalink
No issue - added required preprocessing for NER
Browse files Browse the repository at this point in the history
  • Loading branch information
maxxkia committed Dec 5, 2017
1 parent ad69e53 commit 29986c0
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 6 deletions.
2 changes: 2 additions & 0 deletions ss-module-ner/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>

<!-- DKPRO -->
<dependency>
<groupId>de.tudarmstadt.ukp.dkpro.core</groupId>
<artifactId>de.tudarmstadt.ukp.dkpro.core.testing-asl</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,21 @@

import java.io.IOException;

import de.tudarmstadt.ukp.dkpro.core.io.xmi.XmiReader;
import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordNamedEntityRecognizer;
import eu.openminted.uc.socialsciences.common.CommandLineArgumentHandler;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.uima.UIMAException;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.collection.CollectionReaderDescription;

import de.tudarmstadt.ukp.dkpro.core.io.xmi.XmiWriter;
import org.kohsuke.args4j.Option;
import org.kohsuke.args4j.spi.BooleanOptionHandler;

import de.tudarmstadt.ukp.dkpro.core.io.xmi.XmiReader;
import de.tudarmstadt.ukp.dkpro.core.io.xmi.XmiWriter;
import de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpSegmenter;
import de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordNamedEntityRecognizer;
import de.tudarmstadt.ukp.dkpro.core.testing.validation.CasValidatorComponent;
import eu.openminted.uc.socialsciences.common.CommandLineArgumentHandler;

public class Pipeline
{

Expand Down Expand Up @@ -66,6 +68,14 @@ private void runInternal() {
reader = createReaderDescription(XmiReader.class,
XmiReader.PARAM_SOURCE_LOCATION, input,
XmiReader.PARAM_LENIENT, true);

AnalysisEngineDescription preprocessing = createEngineDescription(
createEngineDescription(OpenNlpSegmenter.class,
OpenNlpSegmenter.PARAM_WRITE_SENTENCE, true,
OpenNlpSegmenter.PARAM_WRITE_TOKEN, true,
OpenNlpSegmenter.PARAM_STRICT_ZONING, true),
createEngineDescription(CasValidatorComponent.class,
CasValidatorComponent.PARAM_STRICT_CHECK, true));

AnalysisEngineDescription ner = useStanfordModels ?
createEngineDescription(StanfordNamedEntityRecognizer.class)
Expand All @@ -81,7 +91,7 @@ private void runInternal() {
XmiWriter.PARAM_OVERWRITE, true,
XmiWriter.PARAM_STRIP_EXTENSION, true,
XmiWriter.PARAM_USE_DOCUMENT_ID, true);
runPipeline(reader, ner, xmiWriter);
runPipeline(reader, preprocessing, ner, xmiWriter);
} catch (UIMAException | IOException e) {
logger.error("An error has occurred.", e);
throw new IllegalStateException(e);
Expand Down

0 comments on commit 29986c0

Please sign in to comment.