From 5036892fa62f242472e2a510ea7c57d62c356650 Mon Sep 17 00:00:00 2001 From: Lars Immisch Date: Fri, 3 Nov 2023 20:06:39 +0100 Subject: [PATCH] Voice activity detector module. See https://github.com/baresip/baresip/pull/2783 for history. --- cmake/FindFVAD.cmake | 32 ++++ cmake/modules.cmake | 6 + modules/fvad/CMakeLists.txt | 12 ++ modules/fvad/fvad.c | 342 ++++++++++++++++++++++++++++++++++++ 4 files changed, 392 insertions(+) create mode 100644 cmake/FindFVAD.cmake create mode 100644 modules/fvad/CMakeLists.txt create mode 100644 modules/fvad/fvad.c diff --git a/cmake/FindFVAD.cmake b/cmake/FindFVAD.cmake new file mode 100644 index 0000000..5565fe0 --- /dev/null +++ b/cmake/FindFVAD.cmake @@ -0,0 +1,32 @@ +find_package(PkgConfig QUIET) +pkg_search_module(FVAD fvad) + +find_path(FVAD_INCLUDE_DIR + NAMES fvad.h + HINTS + "${FVAD_INCLUDE_DIRS}" + "${FVAD_HINTS}/include" + PATHS /usr/local/include /usr/include +) + +find_library(FVAD_LIBRARY + NAME libfvad.a + HINTS + "${FVAD_LIBRARY_DIRS}" + "${FVAD_HINTS}/lib" + PATHS /usr/local/lib /usr/lib +) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(FVAD DEFAULT_MSG FVAD_LIBRARY + FVAD_INCLUDE_DIR) + +if(FVAD_FOUND) + set( FVAD_INCLUDE_DIRS ${FVAD_INCLUDE_DIR} ) + set( FVAD_LIBRARIES ${FVAD_LIBRARY} ) +else() + set( FVAD_INCLUDE_DIRS ) + set( FVAD_LIBRARIES ) +endif() + +mark_as_advanced( FVAD_INCLUDE_DIRS FVAD_LIBRARIES ) diff --git a/cmake/modules.cmake b/cmake/modules.cmake index 610a098..a4215d1 100644 --- a/cmake/modules.cmake +++ b/cmake/modules.cmake @@ -3,6 +3,8 @@ if(DEFINED MODULES) return() endif() +find_package(FVAD) + set(MODULES auloop autotest @@ -14,6 +16,10 @@ set(MODULES qualify ) +if(FVAD_FOUND) + list(APPEND MODULES fvad) +endif() + if(DEFINED EXTRA_MODULES) list(APPEND MODULES ${EXTRA_MODULES}) endif() diff --git a/modules/fvad/CMakeLists.txt b/modules/fvad/CMakeLists.txt new file mode 100644 index 0000000..904b7c0 --- /dev/null +++ b/modules/fvad/CMakeLists.txt @@ -0,0 +1,12 @@ +project(fvad) + +set(SRCS fvad.c) + +if(STATIC) + add_library(${PROJECT_NAME} OBJECT ${SRCS}) +else() + add_library(${PROJECT_NAME} MODULE ${SRCS}) +endif() + +target_include_directories(${PROJECT_NAME} PRIVATE ${FVAD_INCLUDE_DIRS}) +target_link_libraries(${PROJECT_NAME} PRIVATE ${FVAD_LIBRARIES}) diff --git a/modules/fvad/fvad.c b/modules/fvad/fvad.c new file mode 100644 index 0000000..854d0c3 --- /dev/null +++ b/modules/fvad/fvad.c @@ -0,0 +1,342 @@ +/** + * @file fvad.c Voice Activity Detection + * + * Uses libfvad from https://github.com/dpirch/libfvad + * + * Copyright (C) 2023 Lars Immisch + */ +#include +#include +#include +#include +#include +#include +#include "fvad.h" + + +/** + * @defgroup fvad fvad + * + * Voice Activity Detection for the audio-signal. + * + * It is using the aufilt API to get the audio samples. + */ + + +struct vad_enc { + struct aufilt_enc_st af; /* inheritance */ + bool vad_tx; + Fvad *fvad; + struct call *call; +}; + + +struct vad_dec { + struct aufilt_enc_st af; /* inheritance */ + bool vad_rx; + Fvad *fvad; + struct call *call; +}; + +struct filter_arg { + const struct audio *audio; + struct call *call; +}; + + +static void enc_destructor(void *arg) +{ + struct vad_enc *st = arg; + + if (st->fvad) + fvad_free(st->fvad); + + list_unlink(&st->af.le); +} + + +static void dec_destructor(void *arg) +{ + struct vad_dec *st = arg; + + if (st->fvad) + fvad_free(st->fvad); + + list_unlink(&st->af.le); +} + + +static void find_first_call(struct call *call, void *arg) +{ + struct filter_arg *fa = arg; + + if (!fa->call) + fa->call = call; +} + + +static bool find_call(const struct call *call, void *arg) +{ + struct filter_arg *fa = arg; + + return call_audio(call) == fa->audio; +} + + +static int encode_update(struct aufilt_enc_st **stp, void **ctx, + const struct aufilt *af, struct aufilt_prm *prm, + const struct audio *au) +{ + struct vad_enc *st; + (void)ctx; + + if (!stp || !af || !prm) + return EINVAL; + + if (*stp) + return 0; + + if (prm->ch != 1) { + warning("fvad: only mono is supported\n"); + return EINVAL; + } + + if (prm->fmt != AUFMT_S16LE) { + warning("fvad: only AUFMT_S16LE is supported. " + "Use the auconv module to fix this\n"); + return EINVAL; + } + + st = mem_zalloc(sizeof(*st), enc_destructor); + if (!st) + return ENOMEM; + + st->fvad = fvad_new(); + if (!st->fvad) { + mem_deref(st); + return ENOMEM; + } + + int err = fvad_set_sample_rate(st->fvad, prm->srate); + if (err < 0) { + warning("fvad: sample rate %d is not supported\n", + prm->srate); + mem_deref(st); + return EINVAL; + } + + if (!st->call) { + struct filter_arg fa = { au, NULL }; + + uag_filter_calls(find_first_call, find_call, &fa); + st->call = fa.call; + } + + *stp = (struct aufilt_enc_st *)st; + + return 0; +} + + +static int decode_update(struct aufilt_dec_st **stp, void **ctx, + const struct aufilt *af, struct aufilt_prm *prm, + const struct audio *au) +{ + struct vad_dec *st; + (void)ctx; + + if (!stp || !af || !prm) + return EINVAL; + + if (*stp) + return 0; + + if (prm->ch != 1) { + warning("fvad: only mono is supported\n"); + return EINVAL; + } + + if (prm->fmt != AUFMT_S16LE) { + warning("fvad: only AUFMT_S16LE is supported. " + "Use the auconv module to fix this\n"); + return EINVAL; + } + + st = mem_zalloc(sizeof(*st), dec_destructor); + if (!st) + return ENOMEM; + + st->fvad = fvad_new(); + if (!st->fvad) { + mem_deref(st); + return ENOMEM; + } + + int err = fvad_set_sample_rate(st->fvad, prm->srate); + if (err < 0) { + warning("fvad: sample rate %d is not supported\n", + prm->srate); + mem_deref(st); + return EINVAL; + } + + if (!st->call) { + struct filter_arg fa = { au, NULL }; + + uag_filter_calls(find_first_call, find_call, &fa); + st->call = fa.call; + } + + *stp = (struct aufilt_dec_st *)st; + + return 0; +} + + +static bool auframe_vad(Fvad *fvad, struct auframe *af) +{ + static int chunk_times_ms[] = { 30, 20, 10 }; + + if (af->fmt != AUFMT_S16LE) { + warning("fvad: invalid sample format %d\n", + af->fmt); + + return false; + } + + size_t pos = 0; + + /* process all chunk_sizes that fvad accepts */ + for (size_t chunk_time_index = 0; + chunk_time_index < RE_ARRAY_SIZE(chunk_times_ms); + ++chunk_time_index) { + + const size_t chunk_time = chunk_times_ms[chunk_time_index]; + size_t sampc = af->srate / 1000 * chunk_time; + + while (af->sampc - pos >= sampc) { + + int err = fvad_process(fvad, (int16_t*)af->sampv + pos, + sampc); + pos += sampc; + if (err > 0) { + return true; + } + else if (err < 0) { + warning("fvad: fvad_process(%d) failed\n", + sampc); + return false; + } + } + } + + if (pos != af->sampc) { + warning("fvad: fvad_process: samples left over: %d\n", + af->sampc - pos); + } + + return false; +} + + +static int encode(struct aufilt_enc_st *st, struct auframe *af) +{ + struct vad_enc *vad = (void *)st; + + if (!st || !af) + return EINVAL; + + bool vad_tx = auframe_vad(vad->fvad, af); + + if (vad_tx != vad->vad_tx) { + const char* desc = vad_tx ? "on" : "off"; + vad->vad_tx = vad_tx; + + debug("vfad: vad_tx: %s\n", desc); + + module_event("fvad", "vad_tx", call_get_ua(vad->call), + vad->call, desc); + } + + return 0; +} + + +static int decode(struct aufilt_dec_st *st, struct auframe *af) +{ + struct vad_dec *vad = (void *)st; + + if (!st || !af) + return EINVAL; + + bool vad_rx = auframe_vad(vad->fvad, af); + + if (vad_rx != vad->vad_rx) { + const char* desc = vad_rx ? "on" : "off"; + vad->vad_rx = vad_rx; + + debug("vfad: vad_rx: %s\n", desc); + module_event("fvad", "vad_rx", call_get_ua(vad->call), + vad->call, desc); + } + + return 0; +} + + +static struct aufilt vad = { + .name = "vad", + .encupdh = encode_update, + .ench = encode, + .decupdh = decode_update, + .dech = decode +}; + + +static int module_init(void) +{ + struct conf *conf = conf_cur(); + + bool rx_enabled = true; + conf_get_bool(conf, "fvad_rx", &rx_enabled); + + bool tx_enabled = true; + conf_get_bool(conf, "fvad_tx", &tx_enabled); + + if (!rx_enabled) { + vad.dech = NULL; + vad.decupdh = NULL; + } + + if (!tx_enabled) { + vad.ench = NULL; + vad.encupdh = NULL; + } + + if (!tx_enabled && !rx_enabled) { + warning("fvad: neither fvad_rx nor fvad_tx are enabled" + ", not loading filter\n"); + return 0; + } + + aufilt_register(baresip_aufiltl(), &vad); + + return 0; +} + + +static int module_close(void) +{ + if (vad.dech || vad.ench) + aufilt_unregister(&vad); + + return 0; +} + + +EXPORT_SYM const struct mod_export DECL_EXPORTS(vad) = { + "vad", + "filter", + module_init, + module_close +};