Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
zqhZY committed May 13, 2018
1 parent 3d6cdc4 commit 6c88b6f
Show file tree
Hide file tree
Showing 32 changed files with 46,370 additions and 0 deletions.
26 changes: 26 additions & 0 deletions INSTALL.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@

## install dependency

#### python3
install or update to python 3

#### install chinese version of rasa nlu
```
git clone https://github.com/crownpku/Rasa_NLU_Chi.git
cd rasa_nlu
pip install -r requirements.txt
python setup.py install
```

#### install sklearn and MITIE

```
pip install -U scikit-learn sklearn-crfsuite
pip install git+https://github.com/mit-nlp/MITIE.git
```

#### install rasa_core

```
pip install rasa_core
```
Empty file added __init__.py
Empty file.
170 changes: 170 additions & 0 deletions bot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
# -*- coding: UTF-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import argparse
import logging
import warnings

from rasa_core.actions import Action
from rasa_core.agent import Agent
from rasa_core.channels.console import ConsoleInputChannel
from rasa_core.events import SlotSet
from rasa_core.interpreter import RasaNLUInterpreter
from rasa_core.policies.keras_policy import KerasPolicy
from rasa_core.policies.memoization import MemoizationPolicy

logger = logging.getLogger(__name__)

support_search = ["消费", "流量"]


def extract_item(item):
"""
check if item supported, this func just for lack of train data.
:param item: item in track, eg: "流量"、"查流量"
:return:
"""
if item is None:
return None
for name in support_search:
if name in item:
return name
return None


class ActionSearchConsume(Action):
def name(self):
return 'action_search_consume'

def run(self, dispatcher, tracker, domain):
item = tracker.get_slot("item")
item = extract_item(item)
if item is None:
dispatcher.utter_message("您好,我现在只会查话费和流量")
dispatcher.utter_message("你可以这样问我:“帮我查话费”")
return []

time = tracker.get_slot("time")
if time is None:
dispatcher.utter_message("您想查询哪个月的消费?")
return []
# query database here using item and time as key. but you may normalize time format first.
dispatcher.utter_message("好,请稍等")
if item == "流量":
dispatcher.utter_message("您好,您{}共使用{}二百八十兆,剩余三十兆。".format(time, item))
else:
dispatcher.utter_message("您好,您{}共消费二十八元。".format(time))
return []


class MobilePolicy(KerasPolicy):
def model_architecture(self, num_features, num_actions, max_history_len):
"""Build a Keras model and return a compiled model."""
from keras.layers import LSTM, Activation, Masking, Dense
from keras.models import Sequential

n_hidden = 32 # size of hidden layer in LSTM
# Build Model
batch_shape = (None, max_history_len, num_features)

model = Sequential()
model.add(Masking(-1, batch_input_shape=batch_shape))
model.add(LSTM(n_hidden, batch_input_shape=batch_shape))
model.add(Dense(input_dim=n_hidden, output_dim=num_actions))
model.add(Activation("softmax"))

model.compile(loss="categorical_crossentropy",
optimizer="adam",
metrics=["accuracy"])

logger.debug(model.summary())
return model


def train_dialogue(domain_file="mobile_domain.yml",
model_path="models/dialogue",
training_data_file="data/mobile_story.md"):
agent = Agent(domain_file,
policies=[MemoizationPolicy(), MobilePolicy()])

agent.train(
training_data_file,
max_history=2,
epochs=200,
batch_size=16,
augmentation_factor=50,
validation_split=0.2
)

agent.persist(model_path)
return agent


def train_nlu():
from rasa_nlu.converters import load_data
from rasa_nlu.config import RasaNLUConfig
from rasa_nlu.model import Trainer

training_data = load_data("data/mobile_nlu_data.json")
trainer = Trainer(RasaNLUConfig("mobile_nlu_model_config.json"))
trainer.train(training_data)
model_directory = trainer.persist("models/", project_name="ivr", fixed_model_name="demo")

return model_directory


def run_ivrbot_online(input_channel=ConsoleInputChannel(),
interpreter=RasaNLUInterpreter("models/ivr/demo"),
domain_file="mobile_domain.yml",
training_data_file="data/mobile_story.md"):
agent = Agent(domain_file,
policies=[MemoizationPolicy(), KerasPolicy()],
interpreter=interpreter)

agent.train_online(training_data_file,
input_channel=input_channel,
max_history=2,
batch_size=50,
epochs=200,
max_training_samples=300)

return agent


def run(serve_forever=True):
agent = Agent.load("models/dialogue",
interpreter=RasaNLUInterpreter("models/ivr/demo"))

if serve_forever:
agent.handle_channel(ConsoleInputChannel())
return agent


if __name__ == "__main__":
logging.basicConfig(level="INFO")

parser = argparse.ArgumentParser(
description="starts the bot")

parser.add_argument(
"task",
choices=["train-nlu", "train-dialogue", "run", "online_train"],
help="what the bot should do - e.g. run or train?")
task = parser.parse_args().task

# decide what to do based on first parameter of the script
if task == "train-nlu":
train_nlu()
elif task == "train-dialogue":
train_dialogue()
elif task == "run":
run()
elif task == "online_train":
run_ivrbot_online()
else:
warnings.warn("Need to pass either 'train-nlu', 'train-dialogue' or "
"'run' to use the script.")
exit(1)
117 changes: 117 additions & 0 deletions chat_detection/corpus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import os


DIALOG_MAXIMUM_CHARACTER_LENGTH = 400


class CorpusObject(list):
"""
This is a proxy object that allow additional
attributes to be added to the collections of
data that get returned by the corpus reader.
"""

def __init__(self, *args, **kwargs):
"""
Imitate a list by allowing a value to be passed in.
"""
if args:
super(CorpusObject, self).__init__(args[0])
else:
super(CorpusObject, self).__init__()

self.categories = []


class Corpus(object):

def __init__(self):
current_directory = os.path.dirname(os.path.abspath(__file__))
self.data_directory = os.path.join(current_directory, 'data')

def get_file_path(self, dotted_path, extension='json'):
"""
Reads a dotted file path and returns the file path.
"""

# If the operating system's file path seperator character is in the string
if os.sep in dotted_path or '/' in dotted_path:
# Assume the path is a valid file path
return dotted_path

parts = dotted_path.split('.')
if parts[0] == 'chatterbot':
parts.pop(0)
parts[0] = self.data_directory

corpus_path = os.path.join(*parts)

if os.path.exists(corpus_path + '.{}'.format(extension)):
corpus_path += '.{}'.format(extension)

return corpus_path

def read_corpus(self, file_name):
"""
Read and return the data from a corpus json file.
"""
import io
import yaml

with io.open(file_name, encoding='utf-8') as data_file:
data = yaml.load(data_file)
return data

def list_corpus_files(self, dotted_path):
"""
Return a list of file paths to each data file in
the specified corpus.
"""
CORPUS_EXTENSION = 'yml'

corpus_path = self.get_file_path(dotted_path, extension=CORPUS_EXTENSION)
paths = []

if os.path.isdir(corpus_path):
for dirname, dirnames, filenames in os.walk(corpus_path):
for datafile in filenames:
if datafile.endswith(CORPUS_EXTENSION):
paths.append(os.path.join(dirname, datafile))
else:
paths.append(corpus_path)

paths.sort()
return paths

def load_corpus(self, dotted_path):
"""
Return the data contained within a specified corpus.
"""
data_file_paths = self.list_corpus_files(dotted_path)

corpora = []

for file_path in data_file_paths:
corpus = CorpusObject()
corpus_data = self.read_corpus(file_path)

conversations = corpus_data.get('conversations', [])
corpus.categories = corpus_data.get('categories', [])
corpus.extend(conversations)

corpora.append(corpus)

return corpora

c = Corpus()
res = c.load_corpus("../data/chinese_dialogs")
texts = []
with open("../data/chat_detection_non_task.txt", "w") as f:
f.write("text,intent\n")
for categorys in res: # category
for dialogus in categorys:
for text in dialogus:
f.write(text+"|non_task\n")



Loading

0 comments on commit 6c88b6f

Please sign in to comment.