From 3194242bc56edd0083401028227033b64d6193f7 Mon Sep 17 00:00:00 2001 From: antonocube Date: Mon, 19 Aug 2019 17:25:44 -0400 Subject: [PATCH] Updated / refreshed with the latest monad code. --- MonadicProgramming/MonadicPhraseCompletion.m | 37 ++++++++++--- MonadicProgramming/MonadicTextAnalyzer.m | 57 +++++++++++--------- 2 files changed, 62 insertions(+), 32 deletions(-) diff --git a/MonadicProgramming/MonadicPhraseCompletion.m b/MonadicProgramming/MonadicPhraseCompletion.m index a2ae507d..48d71e35 100644 --- a/MonadicProgramming/MonadicPhraseCompletion.m +++ b/MonadicProgramming/MonadicPhraseCompletion.m @@ -94,6 +94,15 @@ Mathematica is (C) Copyright 1988-2017 Wolfram Research, Inc. https://github.com/antononcube/MathematicaForPrediction/blob/master/JavaTriesWithFrequencies.m . *) +(**************************************************************) +(* Importing packages (if needed) *) +(**************************************************************) + +If[Length[DownValues[StateMonadCodeGenerator`GenerateStateMonadCode]] == 0, + Import["https://raw.githubusercontent.com/antononcube/MathematicaForPrediction/master/MonadicProgramming/StateMonadCodeGenerator.m"] +]; + + (*BeginPackage["MonadicPhraseCompletion`"]*) (** Exported symbols added here with SymbolName::usage *) @@ -101,9 +110,19 @@ Mathematica is (C) Copyright 1988-2017 Wolfram Research, Inc. (*Phrase Fill-in Monad (PhFillMon)*) -GenerateStateMonadCode["PhFillMon"] +(**************************************************************) +(* Generation *) +(**************************************************************) + +(* Generate base functions of PhFillMon monad (through StMon.) *) -ClearAll[PhFillMonMakeNGramTrie] +GenerateStateMonadCode[ "PhFillMon", "FailureSymbol" -> $PhFillMonFailure, "StringContextNames" -> False ]; + +(**************************************************************) +(* Generation *) +(**************************************************************) + +ClearAll[PhFillMonMakeNGramTrie]; PhFillMonMakeNGramTrie[___][None] := None; PhFillMonMakeNGramTrie[indexPermutation_: {_Integer ..}][xs_, context_] := Block[{jTr, p, ip}, @@ -126,7 +145,7 @@ Mathematica is (C) Copyright 1988-2017 Wolfram Research, Inc. ]; -ClearAll[PhFillMonPhraseSuggestionPaths] +ClearAll[PhFillMonPhraseSuggestionPaths]; Options[PhFillMonPhraseSuggestionPaths] = {Prepend -> True}; PhFillMonPhraseSuggestionPaths[phrase : {_String ..}, maxLength : (Automatic | _Integer) : Automatic][xs_, context_] := Block[{prependQ = TrueQ[OptionValue[PhFillMonPhraseSuggestionPaths, Prepend]], res, phrasePairs}, @@ -160,15 +179,17 @@ Mathematica is (C) Copyright 1988-2017 Wolfram Research, Inc. ]; -ClearAll[PhFillMonPhraseSuggestions] +ClearAll[PhFillMonPhraseSuggestions]; PhFillMonPhraseSuggestions[phrase : {_String ...}][xs_, context_] := Block[{res}, - If[Length[phrase] == 0, PhFillMon[{}, context], + If[ Length[phrase] == 0, + + PhFillMon[Unit{}, context], (*ELSE*) - res = PhFillMonBind[PhFillMon[xs, context], PhFillMonPhraseSuggestionPaths[phrase, Automatic]]; + res = Fold[ PhFillMonBind, PhFillMonUnit[xs, context], { PhFillMonPhraseSuggestionPaths[phrase, Automatic], PhFillMonTakeValue } ]; - res = Select[First[res], Length[#] == Length[context["indexPermutation"]] &]; + res = Select[ res, Length[#] == Length[context["indexPermutation"]] &]; res = Map[ @@ -180,7 +201,7 @@ Mathematica is (C) Copyright 1988-2017 Wolfram Research, Inc. ]; -ClearAll[PhFillMonPredictedIndex] +ClearAll[PhFillMonPredictedIndex]; PhFillMonPredictedIndex[][xs_,context_] := Block[{}, If[! KeyExistsQ[context["indexPermutation"]], diff --git a/MonadicProgramming/MonadicTextAnalyzer.m b/MonadicProgramming/MonadicTextAnalyzer.m index 5a1f715b..b39046dd 100644 --- a/MonadicProgramming/MonadicTextAnalyzer.m +++ b/MonadicProgramming/MonadicTextAnalyzer.m @@ -92,17 +92,17 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a If[Length[DownValues[StateMonadCodeGenerator`GenerateStateMonadCode]] == 0, - Echo["StateMonadCodeGenerator.m", "Importing from GitHub:"] + Echo["StateMonadCodeGenerator.m", "Importing from GitHub:"]; Import["https://raw.githubusercontent.com/antononcube/MathematicaForPrediction/master/MonadicProgramming/StateMonadCodeGenerator.m"] ]; If[Length[DownValues[CrossTabulate`CrossTabulate]] == 0, - Echo["CrossTabulate.m", "Importing from GitHub:"] + Echo["CrossTabulate.m", "Importing from GitHub:"]; Import["https://raw.githubusercontent.com/antononcube/MathematicaForPrediction/master/CrossTabulate.m"] ]; If[Length[DownValues[OutlierIdentifiers`OutlierPosition]] == 0, - Echo["OutlierIdentifiers.m", "Importing from GitHub:"] + Echo["OutlierIdentifiers.m", "Importing from GitHub:"]; Import["https://raw.githubusercontent.com/antononcube/MathematicaForPrediction/master/OutlierIdentifiers.m"] ]; @@ -113,7 +113,7 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a ]; If[Length[DownValues[Soundex`Soundex]] == 0, - Echo["Soundex.m", "Importing from GitHub:"] + Echo["Soundex.m", "Importing from GitHub:"]; Import["https://raw.githubusercontent.com/antononcube/MathematicaForPrediction/master/Misc/Soundex.m"] ]; @@ -135,7 +135,7 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a (* Generate base functions of TextAMon monad (through StMon.) *) -GenerateStateMonadCode[ "TextAMon", "StringContextNames" -> True ] +GenerateStateMonadCode[ "TextAMon", "FailureSymbol" -> $TextAMonFailure, "StringContextNames" -> True ]; (**************************************************************) @@ -148,7 +148,8 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a ]; If[ !StringQ[$POSTaggerPath], - $POSTaggerPath = "/Users/antonov/Java/StanfordPosTagger/stanford-postagger-2015-12-09"; +(* $POSTaggerPath = "/Users/antonov/Java/StanfordPosTagger/stanford-postagger-full-2015-12-09";*) + $POSTaggerPath = "/Users/antonov/Java/StanfordPosTagger/stanford-postagger-full-2018-10-16"; ]; If[ ( BooleanQ[$LoadJava] && $LoadJava ) || ! BooleanQ[$LoadJava], @@ -156,7 +157,7 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a Needs["JLink`"]; AddToClassPath[$JavaTriesWithFrequenciesPath]; AddToClassPath[$POSTaggerPath]; - ReinstallJava[JVMArguments -> "-Xmx8g -Xms1g"]; + ReinstallJava[JVMArguments -> "-Xmx12g -Xms2g"]; LoadJavaClass["java.util.Collections"]; LoadJavaClass["java.util.Arrays"]; @@ -176,13 +177,13 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a (**************************************************************) -ClearAll[JavaStanfordTagString] +ClearAll[JavaStanfordTagString]; JavaStanfordTagString[str_String] := JavaBlock[ JAVASTABFORDPOSTAGGER@tagString[str] ]; -Clear[SeparatePOSTags] +Clear[SeparatePOSTags]; SeparatePOSTags[tagged_String] := Block[{}, StringCases[tagged, @@ -218,24 +219,31 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a (* Text analysis functions *) (**************************************************************) -ClearAll[TextAMonSentences] +ClearAll[TextAMonSentences]; + +Options[TextAMonSentences] = + { + "Splitter" -> Function[{text}, Select[StringSplit[text, {".", "!", "?", "...", ";"}], StringLength[#] >= 1 &] ] + }; TextAMonSentences[___][None] := None; -TextAMonSentences[][xs_, context_] := - Block[{text, sentences}, + +TextAMonSentences[ opts:OptionsPattern[] ][xs_, context_] := + Block[{text, sentences, splitFunc = OptionValue[TextAMonSentences, "Splitter"]}, + Which[ StringQ[xs], - sentences = TextSentences[ xs ]; + sentences = splitFunc[ xs ]; TextAMon[ sentences, Join[ context, <|"text"->xs, "sentences"->sentences|> ] ], VectorQ[xs,StringQ], text = StringJoin[Riffle[xs," "]]; - sentences = TextSentences[ text ]; + sentences = splitFunc[ text ]; TextAMon[ sentences, Join[ context, <|"text"->text, "sentences"->sentences|> ] ], KeyExistsQ[context, "text"], - sentences = TextSentences[ context["text"] ]; + sentences = splitFunc[ context["text"] ]; TextAMon[ sentences, Join[ context, <|"sentences"->sentences|> ] ], True, @@ -245,7 +253,7 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a ]; -ClearAll[TextAMonWords] +ClearAll[TextAMonWords]; Options[TextAMonWords] = { Method-> TextWords }; @@ -264,7 +272,7 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a ]; -ClearAll[TextAMonComputePOSTags] +ClearAll[TextAMonComputePOSTags]; TextAMonComputePOSTags[___][None] := None; TextAMonComputePOSTags[args___][xs_, context_] := @@ -274,20 +282,21 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a ]; -ClearAll[TextAMonPOSWordsTrie] +ClearAll[TextAMonPOSWordsTrie]; TextAMonPOSWordsTrie[___][None] := None; TextAMonPOSWordsTrie[separator_String:"®"][xs_,context_] := TextAMonBind[ TextAMon[xs,context], TextAMonTagWordsTrie[separator] ]; -ClearAll[TextAMonComputeTagWordPairs] +ClearAll[TextAMonComputeTagWordPairs]; -Options[TextAMonComputeTagWordPairs] = {"SentenceToTagWordPairsFunction" -> "StandfordTagger"}; +Options[TextAMonComputeTagWordPairs] = { "SentenceToTagWordPairsFunction" -> "StandfordTagger" }; TextAMonComputeTagWordPairs[___][None] := None; + TextAMonComputeTagWordPairs[opts : OptionsPattern[]][xs_, context_] := - Block[{sentences, tagWordPairs, res, taggerFunc}, + Block[{sentences, tagWordPairs, taggerFunc}, taggerFunc = OptionValue[TextAMonComputeTagWordPairs, "SentenceToTagWordPairsFunction"]; @@ -318,7 +327,7 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a ]; -ClearAll[TextAMonTagWordsTrie] +ClearAll[TextAMonTagWordsTrie]; TextAMonTagWordsTrie[___][None] := None; TextAMonTagWordsTrie[separator_String: "®"][xs_, context_] := @@ -424,7 +433,7 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a ]; -ClearAll[TextAMonMakeWordTrie] +ClearAll[TextAMonMakeWordTrie]; TextAMonMakeWordTrie[___][None] := None; TextAMonMakeWordTrie[ separator_String:"®" ][xs_, context_] := @@ -449,7 +458,7 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a ]; -ClearAll[TextAMonMakeNGramTrie] +ClearAll[TextAMonMakeNGramTrie]; TextAMonMakeNGramTrie[___][None] := None; TextAMonMakeNGramTrie[___][xs_, context_] :=