Skip to content

Commit

Permalink
Updated / refreshed with the latest monad code.
Browse files Browse the repository at this point in the history
  • Loading branch information
antononcube committed Aug 19, 2019
1 parent 0f7fee7 commit 3194242
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 32 deletions.
37 changes: 29 additions & 8 deletions MonadicProgramming/MonadicPhraseCompletion.m
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,35 @@ Mathematica is (C) Copyright 1988-2017 Wolfram Research, Inc.
https://github.com/antononcube/MathematicaForPrediction/blob/master/JavaTriesWithFrequencies.m .
*)

(**************************************************************)
(* Importing packages (if needed) *)
(**************************************************************)

If[Length[DownValues[StateMonadCodeGenerator`GenerateStateMonadCode]] == 0,
Import["https://raw.githubusercontent.com/antononcube/MathematicaForPrediction/master/MonadicProgramming/StateMonadCodeGenerator.m"]
];


(*BeginPackage["MonadicPhraseCompletion`"]*)
(** Exported symbols added here with SymbolName::usage *)

(*Begin["`Private`"]*)

(*Phrase Fill-in Monad (PhFillMon)*)

GenerateStateMonadCode["PhFillMon"]
(**************************************************************)
(* Generation *)
(**************************************************************)

(* Generate base functions of PhFillMon monad (through StMon.) *)

ClearAll[PhFillMonMakeNGramTrie]
GenerateStateMonadCode[ "PhFillMon", "FailureSymbol" -> $PhFillMonFailure, "StringContextNames" -> False ];

(**************************************************************)
(* Generation *)
(**************************************************************)

ClearAll[PhFillMonMakeNGramTrie];
PhFillMonMakeNGramTrie[___][None] := None;
PhFillMonMakeNGramTrie[indexPermutation_: {_Integer ..}][xs_, context_] :=
Block[{jTr, p, ip},
Expand All @@ -126,7 +145,7 @@ Mathematica is (C) Copyright 1988-2017 Wolfram Research, Inc.
];


ClearAll[PhFillMonPhraseSuggestionPaths]
ClearAll[PhFillMonPhraseSuggestionPaths];
Options[PhFillMonPhraseSuggestionPaths] = {Prepend -> True};
PhFillMonPhraseSuggestionPaths[phrase : {_String ..}, maxLength : (Automatic | _Integer) : Automatic][xs_, context_] :=
Block[{prependQ = TrueQ[OptionValue[PhFillMonPhraseSuggestionPaths, Prepend]], res, phrasePairs},
Expand Down Expand Up @@ -160,15 +179,17 @@ Mathematica is (C) Copyright 1988-2017 Wolfram Research, Inc.
];


ClearAll[PhFillMonPhraseSuggestions]
ClearAll[PhFillMonPhraseSuggestions];
PhFillMonPhraseSuggestions[phrase : {_String ...}][xs_, context_] :=
Block[{res},
If[Length[phrase] == 0, PhFillMon[{}, context],
If[ Length[phrase] == 0,

PhFillMon[Unit{}, context],
(*ELSE*)

res = PhFillMonBind[PhFillMon[xs, context], PhFillMonPhraseSuggestionPaths[phrase, Automatic]];
res = Fold[ PhFillMonBind, PhFillMonUnit[xs, context], { PhFillMonPhraseSuggestionPaths[phrase, Automatic], PhFillMonTakeValue } ];

res = Select[First[res], Length[#] == Length[context["indexPermutation"]] &];
res = Select[ res, Length[#] == Length[context["indexPermutation"]] &];

res =
Map[
Expand All @@ -180,7 +201,7 @@ Mathematica is (C) Copyright 1988-2017 Wolfram Research, Inc.
];


ClearAll[PhFillMonPredictedIndex]
ClearAll[PhFillMonPredictedIndex];
PhFillMonPredictedIndex[][xs_,context_] :=
Block[{},
If[! KeyExistsQ[context["indexPermutation"]],
Expand Down
57 changes: 33 additions & 24 deletions MonadicProgramming/MonadicTextAnalyzer.m
Original file line number Diff line number Diff line change
Expand Up @@ -92,17 +92,17 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a


If[Length[DownValues[StateMonadCodeGenerator`GenerateStateMonadCode]] == 0,
Echo["StateMonadCodeGenerator.m", "Importing from GitHub:"]
Echo["StateMonadCodeGenerator.m", "Importing from GitHub:"];
Import["https://raw.githubusercontent.com/antononcube/MathematicaForPrediction/master/MonadicProgramming/StateMonadCodeGenerator.m"]
];

If[Length[DownValues[CrossTabulate`CrossTabulate]] == 0,
Echo["CrossTabulate.m", "Importing from GitHub:"]
Echo["CrossTabulate.m", "Importing from GitHub:"];
Import["https://raw.githubusercontent.com/antononcube/MathematicaForPrediction/master/CrossTabulate.m"]
];

If[Length[DownValues[OutlierIdentifiers`OutlierPosition]] == 0,
Echo["OutlierIdentifiers.m", "Importing from GitHub:"]
Echo["OutlierIdentifiers.m", "Importing from GitHub:"];
Import["https://raw.githubusercontent.com/antononcube/MathematicaForPrediction/master/OutlierIdentifiers.m"]
];

Expand All @@ -113,7 +113,7 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a
];

If[Length[DownValues[Soundex`Soundex]] == 0,
Echo["Soundex.m", "Importing from GitHub:"]
Echo["Soundex.m", "Importing from GitHub:"];
Import["https://raw.githubusercontent.com/antononcube/MathematicaForPrediction/master/Misc/Soundex.m"]
];

Expand All @@ -135,7 +135,7 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a

(* Generate base functions of TextAMon monad (through StMon.) *)

GenerateStateMonadCode[ "TextAMon", "StringContextNames" -> True ]
GenerateStateMonadCode[ "TextAMon", "FailureSymbol" -> $TextAMonFailure, "StringContextNames" -> True ];


(**************************************************************)
Expand All @@ -148,15 +148,16 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a
];

If[ !StringQ[$POSTaggerPath],
$POSTaggerPath = "/Users/antonov/Java/StanfordPosTagger/stanford-postagger-2015-12-09";
(* $POSTaggerPath = "/Users/antonov/Java/StanfordPosTagger/stanford-postagger-full-2015-12-09";*)
$POSTaggerPath = "/Users/antonov/Java/StanfordPosTagger/stanford-postagger-full-2018-10-16";
];

If[ ( BooleanQ[$LoadJava] && $LoadJava ) || ! BooleanQ[$LoadJava],

Needs["JLink`"];
AddToClassPath[$JavaTriesWithFrequenciesPath];
AddToClassPath[$POSTaggerPath];
ReinstallJava[JVMArguments -> "-Xmx8g -Xms1g"];
ReinstallJava[JVMArguments -> "-Xmx12g -Xms2g"];

LoadJavaClass["java.util.Collections"];
LoadJavaClass["java.util.Arrays"];
Expand All @@ -176,13 +177,13 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a
(**************************************************************)


ClearAll[JavaStanfordTagString]
ClearAll[JavaStanfordTagString];
JavaStanfordTagString[str_String] :=
JavaBlock[
JAVASTABFORDPOSTAGGER@tagString[str]
];

Clear[SeparatePOSTags]
Clear[SeparatePOSTags];
SeparatePOSTags[tagged_String] :=
Block[{},
StringCases[tagged,
Expand Down Expand Up @@ -218,24 +219,31 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a
(* Text analysis functions *)
(**************************************************************)

ClearAll[TextAMonSentences]
ClearAll[TextAMonSentences];

Options[TextAMonSentences] =
{
"Splitter" -> Function[{text}, Select[StringSplit[text, {".", "!", "?", "...", ";"}], StringLength[#] >= 1 &] ]
};

TextAMonSentences[___][None] := None;
TextAMonSentences[][xs_, context_] :=
Block[{text, sentences},

TextAMonSentences[ opts:OptionsPattern[] ][xs_, context_] :=
Block[{text, sentences, splitFunc = OptionValue[TextAMonSentences, "Splitter"]},

Which[

StringQ[xs],
sentences = TextSentences[ xs ];
sentences = splitFunc[ xs ];
TextAMon[ sentences, Join[ context, <|"text"->xs, "sentences"->sentences|> ] ],

VectorQ[xs,StringQ],
text = StringJoin[Riffle[xs," "]];
sentences = TextSentences[ text ];
sentences = splitFunc[ text ];
TextAMon[ sentences, Join[ context, <|"text"->text, "sentences"->sentences|> ] ],

KeyExistsQ[context, "text"],
sentences = TextSentences[ context["text"] ];
sentences = splitFunc[ context["text"] ];
TextAMon[ sentences, Join[ context, <|"sentences"->sentences|> ] ],

True,
Expand All @@ -245,7 +253,7 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a
];


ClearAll[TextAMonWords]
ClearAll[TextAMonWords];

Options[TextAMonWords] = { Method-> TextWords };

Expand All @@ -264,7 +272,7 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a
];


ClearAll[TextAMonComputePOSTags]
ClearAll[TextAMonComputePOSTags];

TextAMonComputePOSTags[___][None] := None;
TextAMonComputePOSTags[args___][xs_, context_] :=
Expand All @@ -274,20 +282,21 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a
];


ClearAll[TextAMonPOSWordsTrie]
ClearAll[TextAMonPOSWordsTrie];

TextAMonPOSWordsTrie[___][None] := None;
TextAMonPOSWordsTrie[separator_String:"®"][xs_,context_] :=
TextAMonBind[ TextAMon[xs,context], TextAMonTagWordsTrie[separator] ];


ClearAll[TextAMonComputeTagWordPairs]
ClearAll[TextAMonComputeTagWordPairs];

Options[TextAMonComputeTagWordPairs] = {"SentenceToTagWordPairsFunction" -> "StandfordTagger"};
Options[TextAMonComputeTagWordPairs] = { "SentenceToTagWordPairsFunction" -> "StandfordTagger" };

TextAMonComputeTagWordPairs[___][None] := None;

TextAMonComputeTagWordPairs[opts : OptionsPattern[]][xs_, context_] :=
Block[{sentences, tagWordPairs, res, taggerFunc},
Block[{sentences, tagWordPairs, taggerFunc},

taggerFunc = OptionValue[TextAMonComputeTagWordPairs, "SentenceToTagWordPairsFunction"];

Expand Down Expand Up @@ -318,7 +327,7 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a
];


ClearAll[TextAMonTagWordsTrie]
ClearAll[TextAMonTagWordsTrie];

TextAMonTagWordsTrie[___][None] := None;
TextAMonTagWordsTrie[separator_String: "®"][xs_, context_] :=
Expand Down Expand Up @@ -424,7 +433,7 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a
];


ClearAll[TextAMonMakeWordTrie]
ClearAll[TextAMonMakeWordTrie];

TextAMonMakeWordTrie[___][None] := None;
TextAMonMakeWordTrie[ separator_String:"®" ][xs_, context_] :=
Expand All @@ -449,7 +458,7 @@ The Java ARchive (JAR) files of [1,2] are used through JLink. Since full paths a
];


ClearAll[TextAMonMakeNGramTrie]
ClearAll[TextAMonMakeNGramTrie];

TextAMonMakeNGramTrie[___][None] := None;
TextAMonMakeNGramTrie[___][xs_, context_] :=
Expand Down

0 comments on commit 3194242

Please sign in to comment.