-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathtalkabout.py
executable file
·78 lines (66 loc) · 2.35 KB
/
talkabout.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env python3
import json
import os
import re
import markovify
import nltk
import toml
ignored_users = ['cosnok', 'pinhook', 'quote_bot', 'tracer', 'sedbot']
regex = re.compile(b"\x01|\x1f|\x02|\x12|\x0f|\x16|\x03(?:\d{1,2}(?:,\d{1,2})?)?")
with open('/home/archangelic/irc/log', 'rb') as i:
lines = i.readlines()
with open('/home/pinhook/pinhook/optout') as o:
for x in o.readlines():
if x:
ignored_users.append(x)
class POSifiedText(markovify.NewlineText):
def word_split(self, sentence):
words = re.split(self.word_split_pattern, sentence)
words = [w for w in words if len(w) > 0]
words = [":-:".join(tag) for tag in nltk.pos_tag(words)]
return words
def word_join(self, words):
sentence = " ".join(word.split(":-:")[0] for word in words)
return sentence
def get_patterns():
patterns = toml.load('ebooks_patterns.toml')['patterns']
patterns = {k: re.compile(v, re.IGNORECASE) for k,v in patterns.items()}
return patterns
def make_sentence(sentence):
word_list = sentence.split()
if word_list[0].endswith(':'):
word_list.pop(0)
if word_list[0].startswith(('!', ',')):
word_list = []
sentence = ''.join([i + ' ' for i in word_list if not i.startswith('http')]).strip()
return sentence
def check_line(pattern, sentence):
s = sentence.split('\t')
try:
if s[1] not in ignored_users and not s[2].startswith('pinhook:'):
results = pattern.search(sentence)
if results:
sentence = make_sentence(s[2])
else:
sentence = None
return sentence
except:
return None
def make_model(sentences, filename):
corpus = ''.join([_+'\n' for _ in sentences])
model_json = POSifiedText(corpus).to_json()
with open(os.path.join('plugins', 'ebooks', filename), 'w') as j:
json.dump(model_json, j)
if __name__=='__main__':
patterns = get_patterns()
for p in patterns:
sentences = []
filename = p + '.json'
pattern = patterns[p]
for line in lines:
line = regex.sub(b'', line)
line = line.decode('UTF-8', errors='replace').replace('ACTION', '')
sentence = check_line(pattern, line)
if sentence:
sentences.append(sentence)
make_model(sentences, filename)