From b9f07123ec20907d4252fd9661582e9d0bd6647c Mon Sep 17 00:00:00 2001 From: Martin Wiesner Date: Fri, 27 Dec 2024 10:04:37 +0100 Subject: [PATCH] OPENNLP-1684: Reduce creation of String instances in BrownBigramFeatureGenerator (#731) --- .../BrownBigramFeatureGenerator.java | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java index f8bf0c8b7..b16606ce9 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownBigramFeatureGenerator.java @@ -24,6 +24,10 @@ */ public class BrownBigramFeatureGenerator implements AdaptiveFeatureGenerator { + private static final String BROWNCLUSTER = "browncluster"; + private static final String FEATURE_NEXT_BROWNCLUSTER_BASE = BROWNCLUSTER + ",n" + BROWNCLUSTER + "="; + private static final String FEATURE_PREV_BROWNCLUSTER_BASE = "p" + BROWNCLUSTER + "," + BROWNCLUSTER + "="; + private final BrownCluster brownCluster; /** @@ -38,20 +42,18 @@ public BrownBigramFeatureGenerator(BrownCluster brownCluster) { public void createFeatures(List features, String[] tokens, int index, String[] previousOutcomes) { - List wordClasses = BrownTokenClasses.getWordClasses(tokens[index], brownCluster); + List wc = BrownTokenClasses.getWordClasses(tokens[index], brownCluster); if (index > 0) { - List prevWordClasses = BrownTokenClasses.getWordClasses(tokens[index - 1], brownCluster); - for (int i = 0; i < wordClasses.size() && i < prevWordClasses.size(); i++) { - features.add("p" + "browncluster" + "," + "browncluster" + "=" - + prevWordClasses.get(i) + "," + wordClasses.get(i)); + List prevWC = BrownTokenClasses.getWordClasses(tokens[index - 1], brownCluster); + for (int i = 0; i < wc.size() && i < prevWC.size(); i++) { + features.add(FEATURE_PREV_BROWNCLUSTER_BASE + prevWC.get(i) + "," + wc.get(i)); } } if (index + 1 < tokens.length) { List nextWordClasses = BrownTokenClasses.getWordClasses(tokens[index + 1], brownCluster); - for (int i = 0; i < wordClasses.size() && i < nextWordClasses.size(); i++) { - features.add("browncluster" + "," + "n" + "browncluster" + "=" - + wordClasses.get(i) + "," + nextWordClasses.get(i)); + for (int i = 0; i < wc.size() && i < nextWordClasses.size(); i++) { + features.add(FEATURE_NEXT_BROWNCLUSTER_BASE + wc.get(i) + "," + nextWordClasses.get(i)); } } }