-
Notifications
You must be signed in to change notification settings - Fork 7
/
isspam-trie.c
110 lines (91 loc) · 2.53 KB
/
isspam-trie.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <ctype.h>
#define NPAT 6 // existing patterns
#define NSTART 1024 // max of patterns begin with the same two characters
char *pat[NPAT]; // patterns
int patlen[NPAT]; // length of pattern
typedef struct Trie Trie;
struct Trie
{
int patterns[NSTART];
size_t patternSize;
Trie *children[UCHAR_MAX + 1];
};
Trie *initTrie()
{
Trie *trie = (Trie *)malloc(sizeof(Trie));
trie->patternSize = 0;
for (int i = 0; i < UCHAR_MAX + 1; ++i)
{
trie->children[i] = NULL;
}
return trie;
}
Trie *buildPatternTrie()
{
Trie *trie = initTrie();
unsigned char c0, c1;
for (int i = 0; i < NPAT; ++i)
{
c0 = tolower(pat[i][0]);
c1 = tolower(pat[i][1]);
Trie *child = NULL;
if (trie->children[c0] == NULL)
{
child = initTrie();
trie->children[c0] = child;
}
if (trie->children[c0]->children[c1] == NULL)
{
child = initTrie();
trie->children[c0]->children[c1] = child;
}
child = trie->children[c0]->children[c1];
child->patterns[child->patternSize++] = i;
}
return trie;
}
// isspam: test mesg for occurrence of any pat
int isspam(Trie *trie, char *mesg)
{
int j, k;
unsigned char c0;
unsigned char c1;
for (j = 0; (c0 = tolower(mesg[j])) != '\0' && (c1 = tolower(mesg[j + 1])) != '\0'; j++)
{
if (trie->children[c0] != NULL && trie->children[c0]->children[c1] != NULL)
{
Trie *child = trie->children[c0]->children[c1];
for (size_t i = 0; i < child->patternSize; ++i)
{
k = child->patterns[i];
if (memcmp(mesg + j, pat[k], patlen[k]) == 0)
{
printf("spam: match for '%s'\n", pat[k]);
return 1;
}
}
}
}
return 0;
}
int main()
{
pat[0] = "buy!";
pat[1] = "big bucks";
pat[2] = "best pictures";
pat[3] = "Pretty girls";
pat[4] = "beautiful woman";
pat[5] = "big boob";
Trie *trie = buildPatternTrie();
char *mesg = "buy! now!";
printf("'%s' is spam? %s\n", mesg, isspam(trie, mesg) == 1 ? "yes" : "no");
mesg = "there are lots of pRetty girls, come on!";
printf("'%s' is spam? %s\n", mesg, isspam(trie, mesg) == 1 ? "yes" : "no");
mesg = "TOP secret";
printf("'%s' is spam? %s\n", mesg, isspam(trie, mesg) == 1 ? "yes" : "no");
return 0;
}