Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port to ROS2 #3

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 36 additions & 16 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,21 +1,41 @@
cmake_minimum_required(VERSION 2.8.3)
cmake_minimum_required(VERSION 3.5)
project(japanese_text_to_speech)

find_package(catkin REQUIRED
actionlib_msgs
genmsg
)
# Default to C99
if(NOT CMAKE_C_STANDARD)
set(CMAKE_C_STANDARD 99)
endif()

add_action_files(DIRECTORY action FILES Speak.action)
generate_messages(DEPENDENCIES actionlib_msgs)
# Default to C++14
if(NOT CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD 14)
endif()

catkin_package(
CATKIN_DEPENDS actionlib actionlib_msgs
)
catkin_install_python(
PROGRAMS
nodes/japanese_text_to_speech
nodes/test_client
DESTINATION
${CATKIN_PACKAGE_BIN_DESTINATION}
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
add_compile_options(-Wall -Wextra -Wpedantic)
endif()

# find dependencies
find_package(ament_cmake REQUIRED)
find_package(ament_cmake_python REQUIRED)
find_package(builtin_interfaces REQUIRED)
find_package(rosidl_default_generators REQUIRED)

rosidl_generate_interfaces(${PROJECT_NAME}
"action/Speak.action"
DEPENDENCIES builtin_interfaces
)

install(PROGRAMS
nodes/japanese_text_to_speech
nodes/test_client
DESTINATION lib/${PROJECT_NAME}
)

if(BUILD_TESTING)
find_package(ament_lint_auto REQUIRED)
ament_lint_auto_find_test_dependencies()
endif()

ament_export_dependencies(rosidl_default_runtime)
ament_package()
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,22 @@ Open JTalkとaplayコマンドに依存しています.
sudo apt install open-jtalk open-jtalk-mecab-naist-jdic hts-voice-nitech-jp-atr503-m001 alsa-utils
```

catkin workspace内に設置して `catkin_make` してください.
colcon workspace内に設置して `colcon build` してください.

## 使い方

`japanese_text_to_speech` node を起動してください.
この node がサーバとなり,クライアントから受け取ったテキストを音声出力します.

```
rosrun japanese_text_to_speech japanese_text_to_speech
ros2 run japanese_text_to_speech japanese_text_to_speech
```

この node は [actionlib](http://wiki.ros.org/actionlib) を使って実装されています.
この node は ros2 action を使って実装されています.
簡単なクライアントのサンプルとして `test_client` nodeを用意してあります.

```
rosrun japanese_text_to_speech test_client
ros2 run japanese_text_to_speech test_client
```

### 音声モデルの切り替え
Expand All @@ -37,7 +37,7 @@ rosrun japanese_text_to_speech test_client
`.htsvoice` ファイルを適当な場所に設置し, `~hts_voice_file` rosparam をセットしてください.

```
rosrun japanese_text_to_speech japanese_text_to_speech _hts_voice_file:='/path/to/htsvoice'
ros2 run japanese_text_to_speech japanese_text_to_speech --ros-args -p hts_voice_file:='/path/to/htsvoice'
```

#### メイ&タクミ
Expand All @@ -55,7 +55,7 @@ unzip MMDAgent_Example-1.8.zip
例えば,メイの幸せな声の音声モデルを利用する場合は,次のように引数を追加します.

```bash
rosrun japanese_text_to_speech japanese_text_to_speech _hts_voice_file:='PATH_TO_UNZIPPED_FILES/MMDAgent_Example-1.8/Voice/mei/mei_happy.htsvoice'
ros2 run japanese_text_to_speech japanese_text_to_speech --ros-args -p hts_voice_file:='PATH_TO_UNZIPPED_FILES/MMDAgent_Example-1.8/Voice/mei/mei_happy.htsvoice'
```

参考:https://www.rcnp.osaka-u.ac.jp/~kohda/linux/espeak.html
4 changes: 2 additions & 2 deletions action/Speak.action
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ string text
float64 speed_rate
---
# Result
time elapsed_time
builtin_interfaces/Duration elapsed_time
---
# Feedback
uint8 GENERATING_VOICE=0
uint8 PLAYING_VOICE=1
uint8 state
uint8 state
100 changes: 60 additions & 40 deletions nodes/japanese_text_to_speech
Original file line number Diff line number Diff line change
@@ -1,82 +1,102 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import os.path

import rospy
import actionlib
import japanese_text_to_speech.msg
import rclpy
from rclpy.action import ActionServer
from rclpy.node import Node
from japanese_text_to_speech.action import Speak

class JapaneseTextToSpeech:

class JapaneseTextToSpeech(Node):
_TEMPORALY_DIR = '/tmp/japanese_text_to_speech'
_INPUT_TEXT_FILE = _TEMPORALY_DIR + '/input_text'
_GENERATED_VOICE_FILE = _TEMPORALY_DIR + '/generated_voice.wav'

def __init__(self):
self._action_name = rospy.get_param('~action_name', 'japanese_text_to_speech')
self._dictionary_dir = rospy.get_param('~dictionary_dir', '/var/lib/mecab/dic/open-jtalk/naist-jdic')
self._hts_voice_file = rospy.get_param('~hts_voice_file', '/usr/share/hts-voice/nitech-jp-atr503-m001/nitech_jp_atr503_m001.htsvoice')
self._player_command = rospy.get_param('~player_command', 'aplay')
super().__init__('japanese_text_to_speech')

self.declare_parameters(
namespace='',
parameters=[
('action_name', 'japanese_text_to_speech'),
('dictionary_dir', '/var/lib/mecab/dic/open-jtalk/naist-jdic'),
('hts_voice_file', '/usr/share/hts-voice/nitech-jp-atr503-m001/nitech_jp_atr503_m001.htsvoice'),
('aplayer_command', 'aplay')
]
)
self._action_name = self.get_parameter('action_name').get_parameter_value().string_value
self._dictionary_dir = self.get_parameter('dictionary_dir').get_parameter_value().string_value
self._hts_voice_file = self.get_parameter('hts_voice_file').get_parameter_value().string_value
self._player_command = self.get_parameter('aplayer_command').get_parameter_value().string_value

if not os.path.isdir(self._TEMPORALY_DIR):
os.mkdir(self._TEMPORALY_DIR)
self._action_server = ActionServer(self, Speak, str(self._action_name), self.execute_callback)

self._action_server = actionlib.SimpleActionServer(self._action_name, japanese_text_to_speech.msg.SpeakAction, execute_cb=self.execute_callback, auto_start=False)
self._action_server.start()

def execute_callback(self, goal):
start_time = rospy.Time.now()
def execute_callback(self, goal_handle):
start_time = self.get_clock().now()

is_succeeded = True

self.write_input_text(goal.text)
self.write_input_text(goal_handle.request.text)

if goal.speed_rate > 0.0:
self.generate_voice(goal.speed_rate)
self.play_voice()
if goal_handle.request.speed_rate > 0.0:
self.generate_voice(goal_handle.request.speed_rate, goal_handle)
self.play_voice(goal_handle)
else:
print("WARNING: Do not set 'speed_rate' to 0.0")
is_succeeded = False

end_time = rospy.Time.now()
end_time = self.get_clock().now()
elapsed_time = end_time - start_time

result = japanese_text_to_speech.msg.SpeakResult()
result.elapsed_time = elapsed_time
result = Speak.Result()
result.elapsed_time = elapsed_time.to_msg()

if is_succeeded:
self._action_server.set_succeeded(result)
goal_handle.succeed()
else:
self._action_server.set_aborted(result)
goal_handle.abort()

return result


def write_input_text(self, input_text):
rospy.loginfo('Input text: %s', input_text)
self.get_logger().info(f'Input text: {input_text}')
input_text_file = open(self._INPUT_TEXT_FILE, 'w')
input_text_file.write(input_text)
input_text_file.close()

def generate_voice(self, speed_rate):
feedback = japanese_text_to_speech.msg.SpeakFeedback()
feedback.state = japanese_text_to_speech.msg.SpeakFeedback.GENERATING_VOICE
self._action_server.publish_feedback(feedback)

rospy.loginfo('Converting text to wav file...')
def generate_voice(self, speed_rate, goal_handle):
feedback = Speak.Feedback()
feedback.state = Speak.Feedback.GENERATING_VOICE
goal_handle.publish_feedback(feedback)

self.get_logger().info('Converting text to wav file...')
command = 'open_jtalk -x ' + self._dictionary_dir + ' -m ' + self._hts_voice_file + ' -ow ' + self._GENERATED_VOICE_FILE + ' -r ' + str(speed_rate) + ' ' + self._INPUT_TEXT_FILE
rospy.loginfo('Send command: %s', command)
self.get_logger().info(f'Send command: {command}')
os.system(command)

def play_voice(self):
feedback = japanese_text_to_speech.msg.SpeakFeedback()
feedback.state = japanese_text_to_speech.msg.SpeakFeedback.PLAYING_VOICE
self._action_server.publish_feedback(feedback)
def play_voice(self, goal_handle):
feedback = Speak.Feedback()
feedback.state = Speak.Feedback.PLAYING_VOICE
goal_handle.publish_feedback(feedback)

rospy.loginfo('Playing wav file...')
self.get_logger().info('Playing wav file...')
command = self._player_command + ' ' + self._GENERATED_VOICE_FILE
rospy.loginfo('Send command: %s', command)
self.get_logger().info(f'Send command: {command}')
os.system(command)

if __name__ == '__main__':
rospy.init_node('japanese_text_to_speech')

def main(args=None):

rclpy.init(args=args)
tts = JapaneseTextToSpeech()
rospy.spin()
rclpy.spin(tts)


if __name__ == '__main__':
main()
63 changes: 46 additions & 17 deletions nodes/test_client
Original file line number Diff line number Diff line change
@@ -1,23 +1,52 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import rospy
import actionlib
import japanese_text_to_speech.msg
import rclpy
from rclpy.action import ActionClient
from rclpy.node import Node
from japanese_text_to_speech.action import Speak

if __name__ == '__main__':
rospy.init_node('japanese_tts_test_client')
action_name = rospy.get_param('~action_name', 'japanese_text_to_speech')
text = rospy.get_param('~text', 'テスト')
speed_rate = rospy.get_param('~speed_rate', 0.5)

simple_client = actionlib.SimpleActionClient(action_name, japanese_text_to_speech.msg.SpeakAction)
class TestClient(Node):

def __init__(self):
super().__init__('japanese_tts_test_client')
self.declare_parameters(
namespace='',
parameters=[
('action_name', 'japanese_text_to_speech'),
('text', 'テスト'),
('speed_rate', 0.5)
]
)

self._action_name = self.get_parameter('action_name').get_parameter_value().string_value
self._text = self.get_parameter('text').get_parameter_value().string_value
self._speed_rate = self.get_parameter('speed_rate').get_parameter_value().double_value
self._action_client = ActionClient(self, Speak, self._action_name)

def send_goal(self):
self.get_logger().info('Waiting japanese_text_to_speech server')
self._action_client.wait_for_server()

self.get_logger().info('Sending goal to server')
goal_msg = Speak.Goal()
goal_msg.text = self._text
goal_msg.speed_rate = self._speed_rate

return self._action_client.send_goal_async(goal_msg)


rospy.loginfo('Waiting japanese_text_to_speech server')
simple_client.wait_for_server()
def main(args=None):
rclpy.init(args=args)

action_client = TestClient()

future = action_client.send_goal()

rclpy.spin_until_future_complete(action_client, future)


if __name__ == '__main__':
main()

rospy.loginfo('Sending goal to server')
goal = japanese_text_to_speech.msg.SpeakGoal()
goal.text = text
goal.speed_rate = speed_rate
simple_client.send_goal_and_wait(goal)
19 changes: 16 additions & 3 deletions package.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0"?>
<package format="2">
<package format="3">
<name>japanese_text_to_speech</name>
<version>0.1.0</version>
<description>The japanese_text_to_speech package</description>
Expand All @@ -8,8 +8,21 @@
<license>TODO</license>
<url type="website">https://github.com/ActiveIntelligentSystemsLab/japanese_tts_ros</url>

<buildtool_depend>catkin</buildtool_depend>
<buildtool_depend>ament_cmake_python</buildtool_depend>
<buildtool_depend>ament_cmake</buildtool_depend>

<depend>actionlib</depend>
<depend>actionlib_msgs</depend>
<exec_depend>rospy</exec_depend>
<depend>builtin_interfaces</depend>
<exec_depend>rclpy</exec_depend>

<!-- for acrion file -->
<buildtool_depend>rosidl_default_generators</buildtool_depend>
<depend>action_msgs</depend>
<exec_depend>rosidl_default_runtime</exec_depend>
<member_of_group>rosidl_interface_packages</member_of_group>

<export>
<build_type>ament_cmake</build_type>
</export>
</package>