-
Notifications
You must be signed in to change notification settings - Fork 0
/
SentencesParserTask.cs
60 lines (56 loc) · 2.12 KB
/
SentencesParserTask.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
using System;
using System.Collections.Generic;
using System.Text;
namespace TextAnalysis
{
static class SentencesParserTask
{
public static List<List<string>> ParseSentences(string text)
{
char[] sentenceSeparators = new char[]{'.', '!', '?', ';', ':', '(', ')' };
string[] sentences = text
.Split(sentenceSeparators,
StringSplitOptions.RemoveEmptyEntries);
return ParseAndAddWordsToLists(sentences);
}
private static List<List<string>> ParseAndAddWordsToLists(string[] sentences)
{
List<List<string>> sentencesList = new List<List<string>>();
char[] wordSeparators = new char[] { ' ', ',' };
for (int i = 0; i < sentences.Length; i++)
{
List<string> words = SentencesParse(sentences, wordSeparators, i);
sentencesList.Add(words);
}
return RemoteEmptyWords(sentencesList);
}
private static List<List<string>> RemoteEmptyWords(List<List<string>> sentencesList)
{
List<List<string>> list = new List<List<string>>(sentencesList.Count / 8);
foreach (var item in sentencesList)
if (item.Count != 0)
list.Add(item);
return list;
}
private static List<string> SentencesParse(string[] sentences,
char[] wordSeparators, int i)
{
StringBuilder sb = new StringBuilder();
char[] chars = sentences[i].ToLower().ToCharArray();
for (int l = 0; l < chars.Length; l++)
{
if (char.IsLetter(chars[l]) || chars[l] == '\'')
sb.Append(chars[l]);
else
sb.Append(' ');
}
sentences[i] = sb.ToString().Trim(wordSeparators);
List<string> words = new List<string>(sentences[i]
.Split(
wordSeparators,
StringSplitOptions.RemoveEmptyEntries));
words.RemoveAll(x => x.Length == 0);
return words;
}
}
}