diff --git a/.github/scripts/test-vad.sh b/.github/scripts/test-vad.sh new file mode 100755 index 00000000..d9678c9b --- /dev/null +++ b/.github/scripts/test-vad.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +set -e + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +echo "EXE is $EXE" +echo "PATH: $PATH" + +which $EXE + +cd build + +curl -SL -O https://github.com/k2-fsa/sherpa-ncnn/releases/download/models/sherpa-ncnn-silero-vad.tar.bz2 +tar xvf sherpa-ncnn-silero-vad.tar.bz2 +rm sherpa-ncnn-silero-vad.tar.bz2 +ls -lh sherpa-ncnn-silero-vad + +curl -SL -O https://github.com/k2-fsa/sherpa-ncnn/releases/download/models/lei-jun-test.wav + +$EXE + +ls -lh *.wav +rm -rfv sherpa-ncnn-* +rm *.wav diff --git a/.github/workflows/aarch64-linux-gnu.yaml b/.github/workflows/aarch64-linux-gnu.yaml index 5b4dac03..96054349 100644 --- a/.github/workflows/aarch64-linux-gnu.yaml +++ b/.github/workflows/aarch64-linux-gnu.yaml @@ -23,6 +23,8 @@ on: - 'sherpa-ncnn/csrc/*' - 'toolchains/aarch64-linux-gnu.toolchain.cmake' + workflow_dispatch: + concurrency: group: aarch64-linux-gnu-${{ github.ref }} cancel-in-progress: true diff --git a/.github/workflows/android.yaml b/.github/workflows/android.yaml index 8711a3b6..d2d65e64 100644 --- a/.github/workflows/android.yaml +++ b/.github/workflows/android.yaml @@ -53,6 +53,11 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-android + - name: Display NDK HOME shell: bash run: | @@ -62,6 +67,9 @@ jobs: - name: build android arm64-v8a shell: bash run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME ./build-android-arm64-v8a.sh mkdir -p jniLibs/arm64-v8a/ @@ -70,6 +78,9 @@ jobs: - name: build android armv7-eabi shell: bash run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME ./build-android-armv7-eabi.sh mkdir -p ./jniLibs/armeabi-v7a/ @@ -78,6 +89,9 @@ jobs: - name: build android x86_64 shell: bash run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME ./build-android-x86-64.sh mkdir -p ./jniLibs/x86_64 @@ -86,6 +100,9 @@ jobs: - name: build android x86 shell: bash run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME ./build-android-x86.sh mkdir -p ./jniLibs/x86 @@ -117,10 +134,11 @@ jobs: git config --global user.email "csukuangfj@gmail.com" git config --global user.name "Fangjun Kuang" + export GIT_CLONE_PROTECTION_ACTIVE=false GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-ncnn-libs huggingface cd huggingface - git lfs pull + git pull cp -v ../sherpa-ncnn-*-android.tar.bz2 ./ diff --git a/.github/workflows/arm-linux-gnueabihf.yaml b/.github/workflows/arm-linux-gnueabihf.yaml index ec79fe47..3a0aaf6c 100644 --- a/.github/workflows/arm-linux-gnueabihf.yaml +++ b/.github/workflows/arm-linux-gnueabihf.yaml @@ -23,6 +23,8 @@ on: - 'sherpa-ncnn/csrc/*' - 'toolchains/arm-linux-gnueabihf.toolchain.cmake' + workflow_dispatch: + concurrency: group: arm-linux-gnueabihf-${{ github.ref }} cancel-in-progress: true diff --git a/.github/workflows/jni.yaml b/.github/workflows/jni.yaml index 8fa253aa..542a50ce 100644 --- a/.github/workflows/jni.yaml +++ b/.github/workflows/jni.yaml @@ -22,6 +22,8 @@ on: - 'sherpa-ncnn/jni/*' - '.github/scripts/test-jni.sh' + workflow_dispatch: + concurrency: group: jni-${{ github.ref }} cancel-in-progress: true @@ -42,6 +44,11 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-jni + - name: Display kotlin version shell: bash run: | @@ -56,4 +63,7 @@ jobs: - name: Run JNI test shell: bash run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + .github/scripts/test-jni.sh diff --git a/.github/workflows/linux.yaml b/.github/workflows/linux.yaml index 519bc75a..3a873bbe 100644 --- a/.github/workflows/linux.yaml +++ b/.github/workflows/linux.yaml @@ -7,6 +7,7 @@ on: paths: - '.github/workflows/linux.yaml' - '.github/scripts/run-test.sh' + - '.github/scripts/test-vad.sh' - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-ncnn/csrc/*' @@ -16,11 +17,14 @@ on: paths: - '.github/workflows/linux.yaml' - '.github/scripts/run-test.sh' + - '.github/scripts/test-vad.sh' - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-ncnn/csrc/*' - 'sherpa-ncnn/csrc/*' + workflow_dispatch: + concurrency: group: linux-${{ github.ref }} cancel-in-progress: true @@ -37,19 +41,31 @@ jobs: os: [ubuntu-latest] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-linux + - name: Configure CMake shell: bash run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + mkdir build cd build cmake -D CMAKE_BUILD_TYPE=Release -DSHERPA_NCNN_ENABLE_FFMPEG_EXAMPLES=OFF .. - name: Build sherpa-ncnn for ubuntu run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cd build make -j2 @@ -59,6 +75,9 @@ jobs: ls -lh bin/sherpa-ncnn file bin/sherpa-ncnn + ls -lh bin/sherpa-ncnn-vad + file bin/sherpa-ncnn-vad + ls -lh bin/sherpa-ncnn-microphone file bin/sherpa-ncnn-microphone @@ -71,6 +90,13 @@ jobs: name: sherpa-ncnn-pre-built-binaries-os-${{ matrix.os }} path: ./build/bin + - name: Test vad + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-ncnn-vad + + .github/scripts/test-vad.sh + - name: Test sherpa-ncnn run: | export PATH=$PWD/build/bin:$PATH diff --git a/.github/workflows/macos.yaml b/.github/workflows/macos.yaml index a3df7f2a..7bedae05 100644 --- a/.github/workflows/macos.yaml +++ b/.github/workflows/macos.yaml @@ -7,6 +7,7 @@ on: paths: - '.github/workflows/macos.yaml' - '.github/scripts/run-test.sh' + - '.github/scripts/test-vad.sh' - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-ncnn/csrc/*' @@ -16,10 +17,13 @@ on: paths: - '.github/workflows/macos.yaml' - '.github/scripts/run-test.sh' + - '.github/scripts/test-vad.sh' - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-ncnn/csrc/*' + workflow_dispatch: + concurrency: group: macos-${{ github.ref }} cancel-in-progress: true @@ -33,22 +37,34 @@ jobs: strategy: fail-fast: false matrix: - os: [macos-latest] + os: [macos-latest, macos-13] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-macos + - name: Configure CMake shell: bash run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + mkdir build cd build cmake -D CMAKE_BUILD_TYPE=Release .. - name: Build sherpa-ncnn for macos run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cd build make -j2 @@ -58,6 +74,9 @@ jobs: ls -lh bin/sherpa-ncnn file bin/sherpa-ncnn + ls -lh bin/sherpa-ncnn-vad + file bin/sherpa-ncnn-vad + ls -lh bin/sherpa-ncnn-microphone file bin/sherpa-ncnn-microphone @@ -70,6 +89,13 @@ jobs: name: sherpa-ncnn-pre-built-binaries-os-${{ matrix.os }} path: ./build/bin + - name: Test vad + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-ncnn-vad + + .github/scripts/test-vad.sh + - name: Test sherpa-ncnn run: | export PATH=$PWD/build/bin:$PATH diff --git a/.github/workflows/npm.yaml b/.github/workflows/npm.yaml index 81066ab7..c1935842 100644 --- a/.github/workflows/npm.yaml +++ b/.github/workflows/npm.yaml @@ -25,6 +25,11 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-npm + - name: Install emsdk uses: mymindstorm/setup-emsdk@v14 @@ -59,6 +64,9 @@ jobs: env: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + ./build-wasm-simd-for-nodejs.sh cp -v build-wasm-simd-for-nodejs/install/bin/wasm/sherpa-ncnn-wasm-main.js ./scripts/nodejs diff --git a/.github/workflows/swift-api-test.yaml b/.github/workflows/swift-api-test.yaml index 1353e3f6..eb72fdbf 100644 --- a/.github/workflows/swift-api-test.yaml +++ b/.github/workflows/swift-api-test.yaml @@ -24,6 +24,8 @@ on: - 'sherpa-ncnn/swift-api-examples/*' - 'build-swift-macos.sh' + workflow_dispatch: + concurrency: group: swift-api-test-${{ github.ref }} cancel-in-progress: true @@ -44,7 +46,15 @@ jobs: with: fetch-depth: 0 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-swift + - name: Run swift-api-test shell: bash run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + .github/scripts/swift-api-test.sh diff --git a/.github/workflows/test-dot-net.yaml b/.github/workflows/test-dot-net.yaml index ccdaf107..d5a329af 100644 --- a/.github/workflows/test-dot-net.yaml +++ b/.github/workflows/test-dot-net.yaml @@ -15,6 +15,8 @@ on: - '.github/workflows/test-dot-net' - 'dotnet-examples/**' + workflow_dispatch: + schedule: # minute (0-59) # hour (0-23) diff --git a/.github/workflows/test-pip-install.yaml b/.github/workflows/test-pip-install.yaml index 42a2851b..b031d351 100644 --- a/.github/workflows/test-pip-install.yaml +++ b/.github/workflows/test-pip-install.yaml @@ -13,6 +13,8 @@ on: # nightly test at 22:50 UTC time every day - cron: "50 22 * * *" + workflow_dispatch: + concurrency: group: test_pip_install-${{ github.ref }} cancel-in-progress: true diff --git a/.github/workflows/wasm-simd-hf-space-en.yaml b/.github/workflows/wasm-simd-hf-space-en.yaml index ba587084..8781439b 100644 --- a/.github/workflows/wasm-simd-hf-space-en.yaml +++ b/.github/workflows/wasm-simd-hf-space-en.yaml @@ -23,6 +23,12 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-wasm + - name: Install emsdk uses: mymindstorm/setup-emsdk@v14 @@ -53,6 +59,9 @@ jobs: - name: Build sherpa-ncnn for WebAssembly shell: bash run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + ./build-wasm-simd.sh - name: collect files diff --git a/.github/workflows/wasm-simd-hf-space-zh-en.yaml b/.github/workflows/wasm-simd-hf-space-zh-en.yaml index e13f0711..9c64d7ca 100644 --- a/.github/workflows/wasm-simd-hf-space-zh-en.yaml +++ b/.github/workflows/wasm-simd-hf-space-zh-en.yaml @@ -23,6 +23,12 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-wasm + - name: Install emsdk uses: mymindstorm/setup-emsdk@v14 @@ -53,6 +59,9 @@ jobs: - name: Build sherpa-ncnn for WebAssembly shell: bash run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + ./build-wasm-simd.sh - name: collect files diff --git a/.github/workflows/windows-x64.yaml b/.github/workflows/windows-x64.yaml index 95a707a2..b8919f02 100644 --- a/.github/workflows/windows-x64.yaml +++ b/.github/workflows/windows-x64.yaml @@ -7,6 +7,7 @@ on: paths: - '.github/workflows/windows-x64.yaml' - '.github/scripts/run-test.sh' + - '.github/scripts/test-vad.sh' - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-ncnn/csrc/*' @@ -16,10 +17,13 @@ on: paths: - '.github/workflows/windows-x64.yaml' - '.github/scripts/run-test.sh' + - '.github/scripts/test-vad.sh' - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-ncnn/csrc/*' + workflow_dispatch: + concurrency: group: windows-x64-${{ github.ref }} cancel-in-progress: true @@ -70,8 +74,18 @@ jobs: cmake --build . --config Release -- -m:2 ls -lh ./bin/Release/sherpa-ncnn.exe + ls -lh ./bin/Release/sherpa-ncnn-vad.exe + ls -lh ./bin/Release/decode-file-c-api.exe ls -lh ./bin/Release/sherpa-ncnn-microphone.exe + - name: Test VAD + shell: bash + run: | + export PATH=$PWD/build/bin/Release:$PATH + export EXE=sherpa-ncnn-vad.exe + + .github/scripts/test-vad.sh + - name: Test sherpa-ncnn shell: bash run: | diff --git a/.github/workflows/windows-x86.yaml b/.github/workflows/windows-x86.yaml index 2908f746..c7c4c19f 100644 --- a/.github/workflows/windows-x86.yaml +++ b/.github/workflows/windows-x86.yaml @@ -7,6 +7,7 @@ on: paths: - '.github/workflows/windows-x86.yaml' - '.github/scripts/run-test.sh' + - '.github/scripts/test-vad.sh' - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-ncnn/csrc/*' @@ -16,10 +17,13 @@ on: paths: - '.github/workflows/windows-x86.yaml' - '.github/scripts/run-test.sh' + - '.github/scripts/test-vad.sh' - 'CMakeLists.txt' - 'cmake/**' - 'sherpa-ncnn/csrc/*' + workflow_dispatch: + concurrency: group: windows-x86-${{ github.ref }} cancel-in-progress: true @@ -70,9 +74,18 @@ jobs: cmake --build . --config Release -- -m:2 ls -lh ./bin/Release/sherpa-ncnn.exe + ls -lh ./bin/Release/sherpa-ncnn-vad.exe ls -lh ./bin/Release/decode-file-c-api.exe ls -lh ./bin/Release/sherpa-ncnn-microphone.exe + - name: Test VAD + shell: bash + run: | + export PATH=$PWD/build/bin/Release:$PATH + export EXE=sherpa-ncnn-vad.exe + + .github/scripts/test-vad.sh + - name: Test sherpa-ncnn shell: bash run: | diff --git a/README.md b/README.md index ecb6e361..a3ac5db9 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Introduction You can use `sherpa-ncnn` for **real-time** speech recognition (i.e., speech-to-text) -on +and voice activity detection (VAD) on the following platforms - Linux - macOS @@ -10,6 +10,8 @@ on - Android - etc ... +It runs on locally CPU without accessing the network. + We support all platforms that [ncnn](https://github.com/tencent/ncnn) supports. Everything can be compiled from source with static link. The generated diff --git a/sherpa-ncnn/csrc/CMakeLists.txt b/sherpa-ncnn/csrc/CMakeLists.txt index fac36c50..d189cfa0 100644 --- a/sherpa-ncnn/csrc/CMakeLists.txt +++ b/sherpa-ncnn/csrc/CMakeLists.txt @@ -22,6 +22,7 @@ set(sherpa_ncnn_core_srcs symbol-table.cc tensorasstrided.cc wave-reader.cc + wave-writer.cc zipformer-model.cc ) diff --git a/sherpa-ncnn/csrc/sherpa-ncnn-vad.cc b/sherpa-ncnn/csrc/sherpa-ncnn-vad.cc index 5626cfa5..44266a4a 100644 --- a/sherpa-ncnn/csrc/sherpa-ncnn-vad.cc +++ b/sherpa-ncnn/csrc/sherpa-ncnn-vad.cc @@ -18,18 +18,122 @@ #include -#include "sherpa-ncnn/csrc/silero-vad-model.h" +#include "sherpa-ncnn/csrc/file-utils.h" +#include "sherpa-ncnn/csrc/voice-activity-detector.h" +#include "sherpa-ncnn/csrc/wave-reader.h" +#include "sherpa-ncnn/csrc/wave-writer.h" int main() { + std::string usage = R"usage( +This file shows how to use silero vad to remove silences from a file. + +===========Usage============: + +0. Build sherpa-ncnn +-------------------- + +mkdir -p $HOME/open-source +cd $HOME/open-source +git clone https://github.com/k2-fsa/sherpa-ncnn +cd sherpa-ncnn +mkdir build +cd build +cmake .. +make -j3 + +1. Download the vad model +------------------------- + +cd $HOME/open-source/sherpa-ncnn/build +wget https://github.com/k2-fsa/sherpa-ncnn/releases/download/models/sherpa-ncnn-silero-vad.tar.bz2 +tar xvf sherpa-ncnn-silero-vad.tar.bz2 + +2. Download the test data +------------------------- + +cd $HOME/open-source/sherpa-ncnn/build +wget https://github.com/k2-fsa/sherpa-ncnn/releases/download/models/lei-jun-test.wav +wget https://github.com/k2-fsa/sherpa-ncnn/releases/download/models/Obama.wav + +3. Run it! +---------- + +cd $HOME/open-source/sherpa-ncnn/build +./bin/sherpa-ncnn-vad + +**Note**: We only support 16000Hz wav files. + )usage"; + sherpa_ncnn::SileroVadModelConfig config; - config.param = "./silero.ncnn.param"; - config.bin = "./silero.ncnn.bin"; + config.sample_rate = 16000; + config.param = "./sherpa-ncnn-silero-vad/silero.ncnn.param"; + config.bin = "./sherpa-ncnn-silero-vad/silero.ncnn.bin"; + config.window_size = 512; + if (!config.Validate()) { + fprintf(stderr, "%s %d: %s", __FILE__, static_cast(__LINE__), + usage.c_str()); + return -1; + } + + std::string input_wave = "./lei-jun-test.wav"; + // std::string input_wave = "./Obama.wav"; + if (!sherpa_ncnn::FileExists(input_wave)) { + fprintf(stderr, "%s %d: %s", __FILE__, static_cast(__LINE__), + usage.c_str()); return -1; } - sherpa_ncnn::SileroVadModel model(config); + bool is_ok = false; + std::vector samples = + sherpa_ncnn::ReadWave(input_wave, config.sample_rate, &is_ok); + if (!is_ok) { + fprintf(stderr, "%s %d: We support only %d wave files", __FILE__, + static_cast(__LINE__), config.sample_rate); + return -1; + } + + sherpa_ncnn::VoiceActivityDetector vad(config); + int32_t num_samples = static_cast(samples.size()); + + std::vector segments; + + for (int32_t i = 0; i < samples.size(); i += config.window_size) { + vad.AcceptWaveform(samples.data() + i, config.window_size); + while (!vad.Empty()) { + const auto &front = vad.Front(); + segments.push_back(front); + + vad.Pop(); + } + } + + vad.Flush(); + while (!vad.Empty()) { + const auto &front = vad.Front(); + segments.push_back(front); + + vad.Pop(); + } + + std::vector all_samples; + for (const auto &s : segments) { + float start = s.start / static_cast(config.sample_rate); + float duration = s.samples.size() / static_cast(config.sample_rate); + float stop = start + duration; // in seconds + // + fprintf(stderr, "%.3f -- %.3f s\n", start, start + duration); + all_samples.insert(all_samples.end(), s.samples.begin(), s.samples.end()); + } + + std::string out_wave = "./out-without-silence.wav"; + is_ok = sherpa_ncnn::WriteWave(out_wave, config.sample_rate, + all_samples.data(), all_samples.size()); + if (is_ok) { + fprintf(stderr, "Saved to %s\n", out_wave.c_str()); + } else { + fprintf(stderr, "Failed to saved to %s\n", out_wave.c_str()); + } - std::cout << config.ToString() << "\n"; return 0; } diff --git a/sherpa-ncnn/csrc/silero-vad-model-config.cc b/sherpa-ncnn/csrc/silero-vad-model-config.cc index 941e3e1b..f0d75cdb 100644 --- a/sherpa-ncnn/csrc/silero-vad-model-config.cc +++ b/sherpa-ncnn/csrc/silero-vad-model-config.cc @@ -18,6 +18,7 @@ #include "sherpa-ncnn/csrc/silero-vad-model-config.h" #include +#include #include "platform.h" // for NCNN_LOGE, NOLINT #include "sherpa-ncnn/csrc/file-utils.h" diff --git a/sherpa-ncnn/csrc/silero-vad-model-config.h b/sherpa-ncnn/csrc/silero-vad-model-config.h index 05df1c73..a41421c1 100644 --- a/sherpa-ncnn/csrc/silero-vad-model-config.h +++ b/sherpa-ncnn/csrc/silero-vad-model-config.h @@ -19,13 +19,14 @@ #ifndef SHERPA_NCNN_CSRC_SILERO_VAD_MODEL_CONFIG_H_ #define SHERPA_NCNN_CSRC_SILERO_VAD_MODEL_CONFIG_H_ +#include +#include + #if __ANDROID_API__ >= 9 #include "android/asset_manager.h" #include "android/asset_manager_jni.h" #endif -#include - #include "net.h" // NOLINT namespace sherpa_ncnn { diff --git a/sherpa-ncnn/csrc/silero-vad-model.cc b/sherpa-ncnn/csrc/silero-vad-model.cc index 193e3d10..69dfa9ce 100644 --- a/sherpa-ncnn/csrc/silero-vad-model.cc +++ b/sherpa-ncnn/csrc/silero-vad-model.cc @@ -18,6 +18,8 @@ #include "sherpa-ncnn/csrc/silero-vad-model.h" +#include + #include "net.h" // NOLINT #include "sherpa-ncnn/csrc/model.h" #include "sherpa-ncnn/csrc/silero-vad-model-config.h" diff --git a/sherpa-ncnn/csrc/wave-reader.cc b/sherpa-ncnn/csrc/wave-reader.cc index eff0b8da..5c1c8eb5 100644 --- a/sherpa-ncnn/csrc/wave-reader.cc +++ b/sherpa-ncnn/csrc/wave-reader.cc @@ -23,7 +23,7 @@ #include #include -#include "platform.h" +#include "platform.h" // NOLINT namespace sherpa_ncnn { namespace { diff --git a/sherpa-ncnn/csrc/wave-writer.cc b/sherpa-ncnn/csrc/wave-writer.cc new file mode 100644 index 00000000..02b4554e --- /dev/null +++ b/sherpa-ncnn/csrc/wave-writer.cc @@ -0,0 +1,95 @@ +/** + * Copyright (c) 2022-2024 Xiaomi Corporation (authors: Fangjun Kuang) + * + * See LICENSE for clarification regarding multiple authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "sherpa-ncnn/csrc/wave-writer.h" + +#include +#include +#include + +#include "platform.h" //NOLINT + +namespace sherpa_ncnn { +namespace { + +// see http://soundfile.sapp.org/doc/WaveFormat/ +// +// Note: We assume little endian here +// TODO(fangjun): Support big endian +struct WaveHeader { + int32_t chunk_id; + int32_t chunk_size; + int32_t format; + int32_t subchunk1_id; + int32_t subchunk1_size; + int16_t audio_format; + int16_t num_channels; + int32_t sample_rate; + int32_t byte_rate; + int16_t block_align; + int16_t bits_per_sample; + int32_t subchunk2_id; // a tag of this chunk + int32_t subchunk2_size; // size of subchunk2 +}; + +} // namespace + +bool WriteWave(const std::string &filename, int32_t sampling_rate, + const float *samples, int32_t n) { + WaveHeader header{}; + header.chunk_id = 0x46464952; // FFIR + header.format = 0x45564157; // EVAW + header.subchunk1_id = 0x20746d66; // "fmt " + header.subchunk1_size = 16; // 16 for PCM + header.audio_format = 1; // PCM =1 + + int32_t num_channels = 1; + int32_t bits_per_sample = 16; // int16_t + header.num_channels = num_channels; + header.sample_rate = sampling_rate; + header.byte_rate = sampling_rate * num_channels * bits_per_sample / 8; + header.block_align = num_channels * bits_per_sample / 8; + header.bits_per_sample = bits_per_sample; + header.subchunk2_id = 0x61746164; // atad + header.subchunk2_size = n * num_channels * bits_per_sample / 8; + + header.chunk_size = 36 + header.subchunk2_size; + + std::vector samples_int16(n); + for (int32_t i = 0; i != n; ++i) { + samples_int16[i] = samples[i] * 32676; + } + + std::ofstream os(filename, std::ios::binary); + if (!os) { + NCNN_LOGE("Failed to create %s", filename.c_str()); + return false; + } + + os.write(reinterpret_cast(&header), sizeof(header)); + os.write(reinterpret_cast(samples_int16.data()), + samples_int16.size() * sizeof(int16_t)); + + if (!os) { + NCNN_LOGE("Write %s failed", filename.c_str()); + return false; + } + + return true; +} + +} // namespace sherpa_ncnn diff --git a/sherpa-ncnn/csrc/wave-writer.h b/sherpa-ncnn/csrc/wave-writer.h new file mode 100644 index 00000000..deb9fa75 --- /dev/null +++ b/sherpa-ncnn/csrc/wave-writer.h @@ -0,0 +1,40 @@ +/** + * Copyright (c) 2022-2024 Xiaomi Corporation (authors: Fangjun Kuang) + * + * See LICENSE for clarification regarding multiple authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SHERPA_NCNN_CSRC_WAVE_WRITER_H_ +#define SHERPA_NCNN_CSRC_WAVE_WRITER_H_ + +#include +#include + +namespace sherpa_ncnn { + +// Write a single channel wave file. +// Note that the input samples are in the range [-1, 1]. It will be multiplied +// by 32767 and saved in int16_t format in the wave file. +// +// @param filename Path to save the samples. +// @param sampling_rate Sample rate of the samples. +// @param samples Pointer to the samples +// @param n Number of samples +// @return Return true if the write succeeds; return false otherwise. +bool WriteWave(const std::string &filename, int32_t sampling_rate, + const float *samples, int32_t n); + +} // namespace sherpa_ncnn + +#endif // SHERPA_NCNN_CSRC_WAVE_WRITER_H_