Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Python ASR example with alsa #324

Merged
merged 5 commits into from
Mar 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
project(sherpa-ncnn)

set(SHERPA_NCNN_VERSION "2.1.9")
set(SHERPA_NCNN_VERSION "2.1.10")

# Disable warning about
#
Expand Down Expand Up @@ -106,6 +106,7 @@ if(SHERPA_NCNN_ENABLE_BINARY AND UNIX AND NOT APPLE)
include(CheckIncludeFileCXX)
check_include_file_cxx(alsa/asoundlib.h SHERPA_NCNN_HAS_ALSA)
if(SHERPA_NCNN_HAS_ALSA)
message(STATUS "With Alsa")
add_definitions(-DSHERPA_NCNN_ENABLE_ALSA=1)
elseif(UNIX AND NOT APPLE)
message(WARNING "\
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/usr/bin/env python3

# Real-time speech recognition from a microphone with sherpa-ncnn Python API
# with endpoint detection.
#
# Note: This script uses ALSA and works only on Linux systems, especially
# for embedding Linux systems and for running Linux on Windows using WSL.
#
# Please refer to
# https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html
# to download pre-trained models

import argparse
import sys

import sherpa_ncnn


def get_args():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)

parser.add_argument(
"--device-name",
type=str,
required=True,
help="""
The device name specifies which microphone to use in case there are several
on your system. You can use

arecord -l

to find all available microphones on your computer. For instance, if it outputs

**** List of CAPTURE Hardware Devices ****
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
Subdevices: 1/1
Subdevice #0: subdevice #0

and if you want to select card 3 and the device 0 on that card, please use:

plughw:3,0

as the device_name.
""",
)

return parser.parse_args()


def create_recognizer():
# Please replace the model files if needed.
# See https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html
# for download links.
recognizer = sherpa_ncnn.Recognizer(
tokens="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/tokens.txt",
encoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.param",
encoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.bin",
decoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.param",
decoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.bin",
joiner_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.param",
joiner_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.bin",
num_threads=4,
decoding_method="modified_beam_search",
enable_endpoint_detection=True,
rule1_min_trailing_silence=2.4,
rule2_min_trailing_silence=1.2,
rule3_min_utterance_length=300,
hotwords_file="",
hotwords_score=1.5,
)
return recognizer


def main():
args = get_args()
device_name = args.device_name
print(f"device_name: {device_name}")
alsa = sherpa_ncnn.Alsa(device_name)

recognizer = create_recognizer()
print("Started! Please speak")
sample_rate = recognizer.sample_rate
samples_per_read = int(0.1 * sample_rate) # 0.1 second = 100 ms
last_result = ""
segment_id = 0

while True:
samples = alsa.read(samples_per_read) # a blocking read
recognizer.accept_waveform(sample_rate, samples)

is_endpoint = recognizer.is_endpoint

result = recognizer.text
if result and (last_result != result):
last_result = result
print("\r{}:{}".format(segment_id, result), end="", flush=True)

if is_endpoint:
if result:
print("\r{}:{}".format(segment_id, result), flush=True)
segment_id += 1
recognizer.reset()


if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\nCaught Ctrl + C. Exiting")
2 changes: 1 addition & 1 deletion sherpa-ncnn/csrc/alsa.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]

and if you want to select card 3 and the device 0 on that card, please use:

hw:3,0
plughw:3,0

)";

Expand Down
2 changes: 1 addition & 1 deletion sherpa-ncnn/csrc/sherpa-ncnn-alsa.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]

and if you want to select card 3 and the device 0 on that card, please use:

hw:3,0
plughw:3,0

as the device_name.
)usage";
Expand Down
1 change: 1 addition & 0 deletions sherpa-ncnn/csrc/stream.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "sherpa-ncnn/csrc/stream.h"

#include <iostream>
#include <utility>

namespace sherpa_ncnn {

Expand Down
13 changes: 13 additions & 0 deletions sherpa-ncnn/python/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ set(srcs
stream.cc
)

if(SHERPA_NCNN_HAS_ALSA)
list(APPEND srcs ${CMAKE_SOURCE_DIR}/sherpa-ncnn/csrc/alsa.cc alsa.cc)
else()
list(APPEND srcs faked-alsa.cc)
endif()

pybind11_add_module(_sherpa_ncnn ${srcs})
target_link_libraries(_sherpa_ncnn PRIVATE sherpa-ncnn-core)

Expand All @@ -28,6 +34,13 @@ if(NOT WIN32)
target_link_libraries(_sherpa_ncnn PRIVATE "-Wl,-rpath,${SHERPA_NCNN_RPATH_ORIGIN}/sherpa_ncnn/lib")
endif()

if(SHERPA_NCNN_HAS_ALSA)
if(DEFINED ENV{SHERPA_NCNN_ALSA_LIB_DIR})
target_link_libraries(_sherpa_ncnn PRIVATE -L$ENV{SHERPA_NCNN_ALSA_LIB_DIR} -lasound)
else()
target_link_libraries(_sherpa_ncnn PRIVATE asound)
endif()
endif()

install(TARGETS _sherpa_ncnn
DESTINATION ../
Expand Down
30 changes: 30 additions & 0 deletions sherpa-ncnn/python/csrc/alsa.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// sherpa-ncnn/python/csrc/alsa.cc
//
// Copyright (c) 2024 Xiaomi Corporation

#include "sherpa-ncnn/python/csrc/alsa.h"

#include <vector>

#include "sherpa-ncnn/csrc/alsa.h"

namespace sherpa_ncnn {

void PybindAlsa(py::module *m) {
using PyClass = Alsa;
py::class_<PyClass>(*m, "Alsa")
.def(py::init<const char *>(), py::arg("device_name"),
py::call_guard<py::gil_scoped_release>())
.def(
"read",
[](PyClass &self, int32_t num_samples) -> std::vector<float> {
return self.Read(num_samples);
},
py::arg("num_samples"), py::call_guard<py::gil_scoped_release>())
.def_property_readonly("expected_sample_rate",
&PyClass::GetExpectedSampleRate)
.def_property_readonly("actual_sample_rate",
&PyClass::GetActualSampleRate);
}

} // namespace sherpa_ncnn
16 changes: 16 additions & 0 deletions sherpa-ncnn/python/csrc/alsa.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// sherpa-ncnn/python/csrc/alsa.h
//
// Copyright (c) 2024 Xiaomi Corporation

#ifndef SHERPA_NCNN_PYTHON_CSRC_ALSA_H_
#define SHERPA_NCNN_PYTHON_CSRC_ALSA_H_

#include "sherpa-ncnn/python/csrc/sherpa-ncnn.h"

namespace sherpa_ncnn {

void PybindAlsa(py::module *m);

} // namespace sherpa_ncnn

#endif // SHERPA_NCNN_PYTHON_CSRC_ALSA_H_
47 changes: 47 additions & 0 deletions sherpa-ncnn/python/csrc/faked-alsa.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// sherpa-ncnn/python/csrc/faked-alsa.cc
//
// Copyright (c) 2024 Xiaomi Corporation

#include "sherpa-ncnn/csrc/macros.h"
#include "sherpa-ncnn/python/csrc/alsa.h"

namespace sherpa_ncnn {

class FakedAlsa {
public:
explicit FakedAlsa(const char *) {
SHERPA_NCNN_LOGE("This function is for Linux only.");
#if (SHERPA_NCNN_ENABLE_ALSA == 0) && (defined(__unix__) || defined(__unix))
SHERPA_NCNN_LOGE(R"doc(
sherpa-ncnn is compiled without alsa support. To enable that, please run
(1) sudo apt-get install alsa-utils libasound2-dev
(2) rebuild sherpa-ncnn
)doc");
#endif
exit(-1);
}

std::vector<float> Read(int32_t) const { return {}; }
int32_t GetExpectedSampleRate() const { return -1; }
int32_t GetActualSampleRate() const { return -1; }
};

void PybindAlsa(py::module *m) {
using PyClass = FakedAlsa;
py::class_<PyClass>(*m, "Alsa")
.def(py::init<const char *>(), py::arg("device_name"))
.def(
"read",
[](PyClass &self, int32_t num_samples) -> std::vector<float> {
return self.Read(num_samples);
},
py::arg("num_samples"), py::call_guard<py::gil_scoped_release>())
.def_property_readonly("expected_sample_rate",
&PyClass::GetExpectedSampleRate)
.def_property_readonly("actual_sample_rate",
&PyClass::GetActualSampleRate);
}

} // namespace sherpa_ncnn

#endif // SHERPA_NCNN_PYTHON_CSRC_FAKED_ALSA_H_
3 changes: 3 additions & 0 deletions sherpa-ncnn/python/csrc/sherpa-ncnn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include "sherpa-ncnn/python/csrc/sherpa-ncnn.h"

#include "sherpa-ncnn/python/csrc/alsa.h"
#include "sherpa-ncnn/python/csrc/decoder.h"
#include "sherpa-ncnn/python/csrc/display.h"
#include "sherpa-ncnn/python/csrc/endpoint.h"
Expand All @@ -39,6 +40,8 @@ PYBIND11_MODULE(_sherpa_ncnn, m) {
PybindRecognizer(&m);

PybindDisplay(&m);

PybindAlsa(&m);
}

} // namespace sherpa_ncnn
3 changes: 2 additions & 1 deletion sherpa-ncnn/python/sherpa_ncnn/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from _sherpa_ncnn import Alsa, Display

from .recognizer import Recognizer
from _sherpa_ncnn import Display
Loading