Skip to content
This repository has been archived by the owner on Oct 1, 2024. It is now read-only.

Removal of Sphinxbase dependency and update Pocketsphinx to v5.0.3 #183

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,4 @@ CppCheckResults.xml
.eggs
*.egg-info
*.snap
*result
8 changes: 3 additions & 5 deletions doc/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ You could download release builds from [download page](https://sc0ty.github.io/s
- C++11 compatible compiler (or better C++14);
- pybind11;
- ffmpeg libraries (4.0 or newer);
- sphinxbase and pocketsphinx;
- pocketsphinx;
- Python interpreter (supporting Python 3.5 or newer);
- Python modules listed in `requirements.txt` file;

Expand All @@ -30,10 +30,9 @@ Activate your virtual environment:
source .env/bin/activate
```

If you have ffmpeg, sphinxbase and pocketsphinx libraries installed and avaiable via `pkg-config`, it will be configured automatically. Otherwise, you must provide paths to these libraries manually, using evironment variables:
If you have ffmpeg and pocketsphinx libraries installed and avaiable via `pkg-config`, it will be configured automatically. Otherwise, you must provide paths to these libraries manually, using evironment variables:
```
export FFMPEG_DIR=PATH
export SPHINXBASE_DIR=PATH
export POCKETSPHINX_DIR=PATH
export USE_PKG_CONFIG=no
```
Expand Down Expand Up @@ -75,13 +74,12 @@ python -m venv .env
```

To build subsync, you need to provide dependencies first.
Sphinxbase and pocketsphinx are published with Visual Studio solution file, which is [easy to use](https://github.com/cmusphinx/pocketsphinx#ms-windows-ms-visual-studio-2012-or-newer---we-test-with-vc-2012-express).
pocketsphinx is published with a Visual Studio solution file, which is [easy to use](https://github.com/cmusphinx/pocketsphinx#ms-windows-ms-visual-studio-2012-or-newer---we-test-with-vc-2012-express).
Building ffmpeg on the other hand is not that easy. You could use [official build](https://ffmpeg.zeranoe.com/builds/) instead.

Configure dependencies paths:
```
set FFMPEG_DIR=d:\projects\ffmpeg
set SPHINXBASE_DIR=d:\projects\sphinxbase
set POCKETSPHINX_DIR=d:\projects\pocketsphinx
set USE_PKG_CONFIG=no
```
Expand Down
45 changes: 45 additions & 0 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

64 changes: 64 additions & 0 deletions flake.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
{
inputs = {
nixpkgs = {
type = "github";
owner = "NixOS";
repo = "nixpkgs";
ref = "nixos-unstable";
};

# srcs
pocketsphinx-src = {
type = "github";
owner = "cmusphinx";
repo = "pocketsphinx";
ref = "v5.0.3";
flake = false;
};
};

outputs = {
self,
nixpkgs,
pocketsphinx-src,
}: let
supportedSystems = ["x86_64-linux"];

perSystem = attrs:
nixpkgs.lib.genAttrs supportedSystems (system: let
pkgs = nixpkgs.legacyPackages.${system};
in
attrs system pkgs);
in {
packages = perSystem (system: pkgs: let
mkDate = longDate: (pkgs.lib.concatStringsSep "-" [
(builtins.substring 0 4 longDate)
(builtins.substring 4 2 longDate)
(builtins.substring 6 2 longDate)
]);
date = mkDate (self.lastModifiedDate or "19700101");
in {
pocketsphinx = pkgs.callPackage ./nix/pocketsphinx.nix {
inherit pocketsphinx-src;
};

subsync = pkgs.callPackage ./nix {
version = date;
inherit (self.packages.${system}) pocketsphinx;
};

default = self.packages.${system}.subsync;
});

formatter = perSystem (_: pkgs: pkgs.alejandra);

devShells = perSystem (_: pkgs: {
default = pkgs.mkShell {
packages = with pkgs; [
alejandra
# ... more dev packages
];
};
});
};
}
6 changes: 3 additions & 3 deletions gizmo/general/logger.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "logger.h"
#include "text/utf8.h"
#include <sphinxbase/err.h>
#include <pocketsphinx/err.h>
#include <pocketsphinx.h>
#include <string>
#include <cstdarg>

Expand Down Expand Up @@ -60,7 +61,7 @@ static void sphinxLogCb(void *user_data, err_lvl_t level, const char *fmt, ...)
{
(void) user_data;

if (g_loggerCallback && level != ERR_INFOCONT && level >= g_sphinxLogLevel)
if (g_loggerCallback && level >= g_sphinxLogLevel)
{
char line[MAX_LOG_SIZE];
va_list args;
Expand All @@ -73,7 +74,6 @@ static void sphinxLogCb(void *user_data, err_lvl_t level, const char *fmt, ...)
{
case ERR_DEBUG: lvl = LOG_DEBUG; break;
case ERR_INFO: lvl = LOG_DEBUG; break;
case ERR_INFOCONT: lvl = LOG_DEBUG; break;
case ERR_WARN: lvl = LOG_WARNING; break;
case ERR_ERROR: lvl = LOG_ERROR; break;
case ERR_FATAL: lvl = LOG_CRITICAL; break;
Expand Down
43 changes: 21 additions & 22 deletions gizmo/media/speechrec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,35 +17,39 @@ SpeechRecognition::SpeechRecognition() :
m_minProb(1.0f),
m_minLen(0)
{
m_config = cmd_ln_parse_r(NULL, ps_args(), 0, NULL, TRUE);
m_config = ps_config_init(NULL);
if (m_config == NULL)
throw EXCEPTION("can't init Sphinx configuration")
.module("SpeechRecognition", "cmd_ln_parse_r");
.module("SpeechRecognition", "ps_config_init");
}

SpeechRecognition::~SpeechRecognition()
{
cmd_ln_free_r(m_config);
ps_config_free(m_config);
}

void SpeechRecognition::setParam(const string &key, const string &val)
{
arg_t const *args = ps_args();
ps_arg_t const *args = ps_args();

for (size_t i = 0; args[i].name != NULL; i++)
{
if (key == args[i].name)
if (key == string("-").append(args[i].name))
{
int type = args[i].type;
if (type & ARG_INTEGER)
cmd_ln_set_int_r(m_config, key.c_str(), atol(val.c_str()));
ps_config_set_int(m_config, args[i].name, atol(val.c_str()));

else if (type & ARG_FLOATING)
cmd_ln_set_float_r(m_config, key.c_str(), atof(val.c_str()));
ps_config_set_float(m_config, args[i].name, atof(val.c_str()));

else if (type & ARG_STRING)
cmd_ln_set_str_r(m_config, key.c_str(), val.c_str());
ps_config_set_str(m_config, args[i].name, val.c_str());

else if (type & ARG_BOOLEAN)
cmd_ln_set_boolean_r(m_config, key.c_str(),
ps_config_set_bool(m_config, args[i].name,
!(val.empty() || val == "0"));

else
throw EXCEPTION("invalid parameter type")
.module("SpeechRecognition", "setParameter")
Expand Down Expand Up @@ -89,14 +93,14 @@ void SpeechRecognition::start(const AVStream *stream)
throw EXCEPTION("can't init Sphinx engine")
.module("SpeechRecognition", "ps_init");

int32_t frate = cmd_ln_int32_r(m_config, "-frate");
int32_t frate = ps_config_int(m_config, "frate");
m_framePeriod = 1.0 / (double)frate;

if (frate == 0)
throw EXCEPTION("can't get frame rate value")
.module("SpeechRecognition", "cmd_ln_int32_r");
.module("SpeechRecognition", "ps_config_get");

if (ps_start_utt(m_ps))
if (ps_start_utt(m_ps) < 0)
throw EXCEPTION("can't start speech recognition")
.module("SpeechRecognition", "ps_start_utt");

Expand All @@ -108,7 +112,7 @@ void SpeechRecognition::stop()
{
if (m_ps)
{
if (ps_end_utt(m_ps))
if (ps_end_utt(m_ps) < 0)
throw EXCEPTION("can't stop speech recognition")
.module("SpeechRecognition", "ps_end_utt");

Expand Down Expand Up @@ -143,13 +147,13 @@ void SpeechRecognition::feed(const AVFrame *frame)
}
if (!inSpeech && m_utteranceStarted)
{
if (ps_end_utt(m_ps))
if (ps_end_utt(m_ps) < 0)
throw EXCEPTION("can't end utterance")
.module("SpeechRecognition", "ps_end_utt");

parseUtterance();

if (ps_start_utt(m_ps))
if (ps_start_utt(m_ps) < 0)
throw EXCEPTION("can't start utterance")
.module("SpeechRecognition", "ps_start_utt");

Expand All @@ -163,21 +167,16 @@ void SpeechRecognition::flush()

void SpeechRecognition::discontinuity()
{
if (ps_end_utt(m_ps))
if (ps_end_utt(m_ps) < 0)
throw EXCEPTION("can't stop speech recognition")
.module("SpeechRecognition", "ps_end_utt");

if (m_utteranceStarted)
parseUtterance();

m_deltaTime = -1.0;
if (ps_start_stream(m_ps))
{
throw EXCEPTION("can't reset speech recognition engine")
.module("SpeechRecognition", "sphinx", "ps_start_stream");
}

if (ps_start_utt(m_ps))
if (ps_start_utt(m_ps) < 0)
throw EXCEPTION("can't start speech recognition")
.module("SpeechRecognition", "ps_start_utt");

Expand Down
2 changes: 1 addition & 1 deletion gizmo/media/speechrec.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class SpeechRecognition : public AVOutput

private:
ps_decoder_t *m_ps;
cmd_ln_t *m_config;
ps_config_t *m_config;

bool m_utteranceStarted;

Expand Down
1 change: 1 addition & 0 deletions gizmo/text/ssa.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <list>
#include <set>
#include <string>
#include <cstdint>


class SSAParser
Expand Down
1 change: 1 addition & 0 deletions gizmo/text/utf8.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define __UTF8_H__

#include <string>
#include <cstdint>


class Utf8
Expand Down
1 change: 1 addition & 0 deletions gizmo/util
55 changes: 55 additions & 0 deletions nix/default.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
ffmpeg,
pkg-config,
pocketsphinx,
python3Packages,
version,
...
}: let
inherit (builtins) concatStringsSep;
in
python3Packages.buildPythonPackage {
pname = "subsync";
inherit version;
format = "other";

src = ../.;

buildInputs = [
ffmpeg
pkg-config
pocketsphinx
];

nativeBuildInputs = with python3Packages; [
pip
setuptools
wheel
];

propagatedBuildInputs = with python3Packages; [
certifi
cryptography
pybind11
pycryptodome
pysubs2
pyyaml
requests
utils
];

# The tests are for the GUI
doCheck = false;

# 'pip install .' takes care of building the package
buildPhase = "";

installPhase = ''
python -m pip install . ${concatStringsSep " " [
"--no-index"
"--no-warn-script-location"
"--prefix=\"$out\""
"--no-cache"
]}
'';
}
14 changes: 14 additions & 0 deletions nix/pocketsphinx.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
cmake,
pocketsphinx-src,
stdenv,
...
}:
stdenv.mkDerivation {
pname = "pocketsphinx";
version = pocketsphinx-src.shortRev;

src = pocketsphinx-src;

buildInputs = [cmake];
}
Loading