From 125a096aaa58202e17f0f53ab0cca86d51c9304c Mon Sep 17 00:00:00 2001 From: Joey Novak Date: Thu, 26 Oct 2017 17:21:53 -0700 Subject: [PATCH 1/3] Made _train and _untrain public so that tokens can be bulk trained and untrained and move tokenization outside of the library. --- BayesSharp/BayesClassifier.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/BayesSharp/BayesClassifier.cs b/BayesSharp/BayesClassifier.cs index cba4bfe..280220b 100644 --- a/BayesSharp/BayesClassifier.cs +++ b/BayesSharp/BayesClassifier.cs @@ -170,7 +170,7 @@ public void Train(TTagType tagId, string input) { var tokens = _tokenizer.Tokenize(input); var tag = GetAndAddIfNotFound(_tags.Items, tagId); - _train(tag, tokens); + Train(tag, tokens); _tags.SystemTag.TrainCount += 1; tag.TrainCount += 1; _mustRecache = true; @@ -189,7 +189,7 @@ public void Untrain(TTagType tagId, string input) { return; } - _untrain(tag, tokens); + Untrain(tag, tokens); _tags.SystemTag.TrainCount += 1; tag.TrainCount += 1; _mustRecache = true; @@ -219,7 +219,7 @@ public Dictionary Classify(string input) #region Private Methods - private void _train(TagData tag, IEnumerable tokens) + private void Train(TagData tag, IEnumerable tokens) { var tokenCount = 0; foreach (var token in tokens) @@ -234,7 +234,7 @@ private void _train(TagData tag, IEnumerable tokens) _tags.SystemTag.TokenCount += tokenCount; } - private void _untrain(TagData tag, IEnumerable tokens) + public void Untrain(TagData tag, IEnumerable tokens) { foreach (var token in tokens) { From f9efc4db6d8bf7d83e4d475241a8b70987238472 Mon Sep 17 00:00:00 2001 From: Joey Novak Date: Thu, 26 Oct 2017 17:36:15 -0700 Subject: [PATCH 2/3] Whoops, forgot to make Train() a public method. --- BayesSharp/BayesClassifier.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BayesSharp/BayesClassifier.cs b/BayesSharp/BayesClassifier.cs index 280220b..5a08746 100644 --- a/BayesSharp/BayesClassifier.cs +++ b/BayesSharp/BayesClassifier.cs @@ -219,7 +219,7 @@ public Dictionary Classify(string input) #region Private Methods - private void Train(TagData tag, IEnumerable tokens) + public void Train(TagData tag, IEnumerable tokens) { var tokenCount = 0; foreach (var token in tokens) From 6838ec4b4cd2f8cdc283dd28f480c2bd2dd7d189 Mon Sep 17 00:00:00 2001 From: Joey Novak Date: Thu, 26 Oct 2017 17:40:30 -0700 Subject: [PATCH 3/3] Refactored Classify into a seperate method for tokenized vs not tokenized. --- BayesSharp/BayesClassifier.cs | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/BayesSharp/BayesClassifier.cs b/BayesSharp/BayesClassifier.cs index 5a08746..aa41f53 100644 --- a/BayesSharp/BayesClassifier.cs +++ b/BayesSharp/BayesClassifier.cs @@ -201,23 +201,28 @@ public void Untrain(TTagType tagId, string input) /// Input to be classified public Dictionary Classify(string input) { - var tokens = _tokenizer.Tokenize(input).ToList(); - var tags = CreateCacheAnsGetTags(); + var tokens = _tokenizer.Tokenize(input).ToList(); + return Classify(tokens); + } - var stats = new Dictionary(); + private Dictionary Classify(List tokens) + { + var tags = CreateCacheAnsGetTags(); - foreach (var tag in tags.Items) - { - var probs = GetProbabilities(tag.Value, tokens).ToList(); - if (probs.Count() != 0) - { - stats[tag.Key] = _combiner.Combine(probs); - } - } - return stats.OrderByDescending(s => s.Value).ToDictionary(s => s.Key, pair => pair.Value); - } + var stats = new Dictionary(); + + foreach (var tag in tags.Items) + { + var probs = GetProbabilities(tag.Value, tokens).ToList(); + if (probs.Count() != 0) + { + stats[tag.Key] = _combiner.Combine(probs); + } + } + return stats.OrderByDescending(s => s.Value).ToDictionary(s => s.Key, pair => pair.Value); + } - #region Private Methods + #region Private Methods public void Train(TagData tag, IEnumerable tokens) {