feat: 完整的语音助手系统实现

主要功能: - ✅ 离线语音识别 (ASR) - Paraformer中文模型 - ✅ 在线语音识别 - Streaming Paraformer中英文双语模型 - ✅ 语音合成 (TTS) - MeloTTS中英文混合模型 - ✅ 语音唤醒 (KWS) - Zipformer关键词检测模型 - ✅ 麦克风录音功能 - 支持多种格式和实时转换 - ✅ 模型设置界面 - 完整的图形化配置管理 KWS优化亮点: - 🎯 成功实现关键词检测 (测试成功率10%→预期50%+) - ⚙️ 可调参数: 阈值、活跃路径、尾随空白、分数权重、线程数 - 🔧 智能参数验证和实时反馈 - 📊 详细的调试信息和成功统计 - 🎛️ 用户友好的设置界面技术架构: - 模块化设计: ASRManager, TTSManager, KWSManager - 实时音频处理: 自动格式转换 (任意格式→16kHz单声道) - 智能设备检测: 自动选择最佳音频格式 - 完整资源管理: 正确的创建和销毁流程 - 跨平台支持: macOS优化的音频权限处理界面特性: - 2×2网格布局: ASR、TTS、录音、KWS四大功能模块 - 分离录音设置: 设备参数 + 输出格式独立配置 - 实时状态显示: 音频电平、处理次数、成功统计 - 详细的用户指导和错误提示
2025-12-23 13:47:00 +08:00
commit e92cb0b4e5
44 changed files with 10943 additions and 0 deletions
--- a/lib/sherpa_onnx/include/cargs.h
+++ b/lib/sherpa_onnx/include/cargs.h
@@ -0,0 +1,162 @@
+#pragma once
+
+/**
+ * This is a simple alternative cross-platform implementation of getopt, which
+ * is used to parse argument strings submitted to the executable (argc and argv
+ * which are received in the main function).
+ */
+
+#ifndef CAG_LIBRARY_H
+#define CAG_LIBRARY_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#define CAG_EXPORT __declspec(dllexport)
+#define CAG_IMPORT __declspec(dllimport)
+#elif __GNUC__ >= 4
+#define CAG_EXPORT __attribute__((visibility("default")))
+#define CAG_IMPORT __attribute__((visibility("default")))
+#else
+#define CAG_EXPORT
+#define CAG_IMPORT
+#endif
+
+#if defined(CAG_SHARED)
+#if defined(CAG_EXPORTS)
+#define CAG_PUBLIC CAG_EXPORT
+#else
+#define CAG_PUBLIC CAG_IMPORT
+#endif
+#else
+#define CAG_PUBLIC
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * An option is used to describe a flag/argument option submitted when the
+ * program is run.
+ */
+typedef struct cag_option
+{
+  const char identifier;
+  const char *access_letters;
+  const char *access_name;
+  const char *value_name;
+  const char *description;
+} cag_option;
+
+/**
+ * A context is used to iterate over all options provided. It stores the parsing
+ * state.
+ */
+typedef struct cag_option_context
+{
+  const struct cag_option *options;
+  size_t option_count;
+  int argc;
+  char **argv;
+  int index;
+  int inner_index;
+  bool forced_end;
+  char identifier;
+  char *value;
+} cag_option_context;
+
+/**
+ * This is just a small macro which calculates the size of an array.
+ */
+#define CAG_ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+/**
+ * @brief Prints all options to the terminal.
+ *
+ * This function prints all options to the terminal. This can be used to
+ * generate the output for a "--help" option.
+ *
+ * @param options The options which will be printed.
+ * @param option_count The option count which will be printed.
+ * @param destination The destination where the output will be printed.
+ */
+CAG_PUBLIC void cag_option_print(const cag_option *options, size_t option_count,
+  FILE *destination);
+
+/**
+ * @brief Prepare argument options context for parsing.
+ *
+ * This function prepares the context for iteration and initializes the context
+ * with the supplied options and arguments. After the context has been prepared,
+ * it can be used to fetch arguments from it.
+ *
+ * @param context The context which will be initialized.
+ * @param options The registered options which are available for the program.
+ * @param option_count The amount of options which are available for the
+ * program.
+ * @param argc The amount of arguments the user supplied in the main function.
+ * @param argv A pointer to the arguments of the main function.
+ */
+CAG_PUBLIC void cag_option_prepare(cag_option_context *context,
+  const cag_option *options, size_t option_count, int argc, char **argv);
+
+/**
+ * @brief Fetches an option from the argument list.
+ *
+ * This function fetches a single option from the argument list. The context
+ * will be moved to that item. Information can be extracted from the context
+ * after the item has been fetched.
+ * The arguments will be re-ordered, which means that non-option arguments will
+ * be moved to the end of the argument list. After all options have been
+ * fetched, all non-option arguments will be positioned after the index of
+ * the context.
+ *
+ * @param context The context from which we will fetch the option.
+ * @return Returns true if there was another option or false if the end is
+ * reached.
+ */
+CAG_PUBLIC bool cag_option_fetch(cag_option_context *context);
+
+/**
+ * @brief Gets the identifier of the option.
+ *
+ * This function gets the identifier of the option, which should be unique to
+ * this option and can be used to determine what kind of option this is.
+ *
+ * @param context The context from which the option was fetched.
+ * @return Returns the identifier of the option.
+ */
+CAG_PUBLIC char cag_option_get(const cag_option_context *context);
+
+/**
+ * @brief Gets the value from the option.
+ *
+ * This function gets the value from the option, if any. If the option does not
+ * contain a value, this function will return NULL.
+ *
+ * @param context The context from which the option was fetched.
+ * @return Returns a pointer to the value or NULL if there is no value.
+ */
+CAG_PUBLIC const char *cag_option_get_value(const cag_option_context *context);
+
+/**
+ * @brief Gets the current index of the context.
+ *
+ * This function gets the index within the argv arguments of the context. The
+ * context always points to the next item which it will inspect. This is
+ * particularly useful to inspect the original argument array, or to get
+ * non-option arguments after option fetching has finished.
+ *
+ * @param context The context from which the option was fetched.
+ * @return Returns the current index of the context.
+ */
+CAG_PUBLIC int cag_option_get_index(const cag_option_context *context);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif
--- a/lib/sherpa_onnx/include/sherpa-onnx/c-api/c-api.h
+++ b/lib/sherpa_onnx/include/sherpa-onnx/c-api/c-api.h
--- a/lib/sherpa_onnx/include/sherpa-onnx/c-api/cxx-api.h
+++ b/lib/sherpa_onnx/include/sherpa-onnx/c-api/cxx-api.h
@@ -0,0 +1,458 @@
+// sherpa-onnx/c-api/cxx-api.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+
+// C++ Wrapper of the C API for sherpa-onnx
+#ifndef SHERPA_ONNX_C_API_CXX_API_H_
+#define SHERPA_ONNX_C_API_CXX_API_H_
+
+#include <string>
+#include <vector>
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+namespace sherpa_onnx::cxx {
+
+// ============================================================================
+// Streaming ASR
+// ============================================================================
+struct OnlineTransducerModelConfig {
+  std::string encoder;
+  std::string decoder;
+  std::string joiner;
+};
+
+struct OnlineParaformerModelConfig {
+  std::string encoder;
+  std::string decoder;
+};
+
+struct OnlineZipformer2CtcModelConfig {
+  std::string model;
+};
+
+struct OnlineModelConfig {
+  OnlineTransducerModelConfig transducer;
+  OnlineParaformerModelConfig paraformer;
+  OnlineZipformer2CtcModelConfig zipformer2_ctc;
+  std::string tokens;
+  int32_t num_threads = 1;
+  std::string provider = "cpu";
+  bool debug = false;
+  std::string model_type;
+  std::string modeling_unit = "cjkchar";
+  std::string bpe_vocab;
+  std::string tokens_buf;
+};
+
+struct FeatureConfig {
+  int32_t sample_rate = 16000;
+  int32_t feature_dim = 80;
+};
+
+struct OnlineCtcFstDecoderConfig {
+  std::string graph;
+  int32_t max_active = 3000;
+};
+
+struct OnlineRecognizerConfig {
+  FeatureConfig feat_config;
+  OnlineModelConfig model_config;
+
+  std::string decoding_method = "greedy_search";
+
+  int32_t max_active_paths = 4;
+
+  bool enable_endpoint = false;
+
+  float rule1_min_trailing_silence = 2.4;
+
+  float rule2_min_trailing_silence = 1.2;
+
+  float rule3_min_utterance_length = 20;
+
+  std::string hotwords_file;
+
+  float hotwords_score = 1.5;
+
+  OnlineCtcFstDecoderConfig ctc_fst_decoder_config;
+  std::string rule_fsts;
+  std::string rule_fars;
+  float blank_penalty = 0;
+
+  std::string hotwords_buf;
+};
+
+struct OnlineRecognizerResult {
+  std::string text;
+  std::vector<std::string> tokens;
+  std::vector<float> timestamps;
+  std::string json;
+};
+
+struct Wave {
+  std::vector<float> samples;
+  int32_t sample_rate;
+};
+
+SHERPA_ONNX_API Wave ReadWave(const std::string &filename);
+
+// Return true on success;
+// Return false on failure
+SHERPA_ONNX_API bool WriteWave(const std::string &filename, const Wave &wave);
+
+template <typename Derived, typename T>
+class SHERPA_ONNX_API MoveOnly {
+ public:
+  explicit MoveOnly(const T *p) : p_(p) {}
+
+  ~MoveOnly() { Destroy(); }
+
+  MoveOnly(const MoveOnly &) = delete;
+
+  MoveOnly &operator=(const MoveOnly &) = delete;
+
+  MoveOnly(MoveOnly &&other) : p_(other.Release()) {}
+
+  MoveOnly &operator=(MoveOnly &&other) {
+    if (&other == this) {
+      return *this;
+    }
+
+    Destroy();
+
+    p_ = other.Release();
+
+    return *this;
+  }
+
+  const T *Get() const { return p_; }
+
+  const T *Release() {
+    const T *p = p_;
+    p_ = nullptr;
+    return p;
+  }
+
+ private:
+  void Destroy() {
+    if (p_ == nullptr) {
+      return;
+    }
+
+    static_cast<Derived *>(this)->Destroy(p_);
+
+    p_ = nullptr;
+  }
+
+ protected:
+  const T *p_ = nullptr;
+};
+
+class SHERPA_ONNX_API OnlineStream
+    : public MoveOnly<OnlineStream, SherpaOnnxOnlineStream> {
+ public:
+  explicit OnlineStream(const SherpaOnnxOnlineStream *p);
+
+  void AcceptWaveform(int32_t sample_rate, const float *samples,
+                      int32_t n) const;
+
+  void InputFinished() const;
+
+  void Destroy(const SherpaOnnxOnlineStream *p) const;
+};
+
+class SHERPA_ONNX_API OnlineRecognizer
+    : public MoveOnly<OnlineRecognizer, SherpaOnnxOnlineRecognizer> {
+ public:
+  static OnlineRecognizer Create(const OnlineRecognizerConfig &config);
+
+  void Destroy(const SherpaOnnxOnlineRecognizer *p) const;
+
+  OnlineStream CreateStream() const;
+
+  OnlineStream CreateStream(const std::string &hotwords) const;
+
+  bool IsReady(const OnlineStream *s) const;
+
+  void Decode(const OnlineStream *s) const;
+
+  void Decode(const OnlineStream *ss, int32_t n) const;
+
+  OnlineRecognizerResult GetResult(const OnlineStream *s) const;
+
+  void Reset(const OnlineStream *s) const;
+
+  bool IsEndpoint(const OnlineStream *s) const;
+
+ private:
+  explicit OnlineRecognizer(const SherpaOnnxOnlineRecognizer *p);
+};
+
+// ============================================================================
+// Non-streaming ASR
+// ============================================================================
+struct SHERPA_ONNX_API OfflineTransducerModelConfig {
+  std::string encoder;
+  std::string decoder;
+  std::string joiner;
+};
+
+struct SHERPA_ONNX_API OfflineParaformerModelConfig {
+  std::string model;
+};
+
+struct SHERPA_ONNX_API OfflineNemoEncDecCtcModelConfig {
+  std::string model;
+};
+
+struct SHERPA_ONNX_API OfflineWhisperModelConfig {
+  std::string encoder;
+  std::string decoder;
+  std::string language;
+  std::string task = "transcribe";
+  int32_t tail_paddings = -1;
+};
+
+struct SHERPA_ONNX_API OfflineTdnnModelConfig {
+  std::string model;
+};
+
+struct SHERPA_ONNX_API OfflineSenseVoiceModelConfig {
+  std::string model;
+  std::string language;
+  bool use_itn = false;
+};
+
+struct SHERPA_ONNX_API OfflineMoonshineModelConfig {
+  std::string preprocessor;
+  std::string encoder;
+  std::string uncached_decoder;
+  std::string cached_decoder;
+};
+
+struct SHERPA_ONNX_API OfflineModelConfig {
+  OfflineTransducerModelConfig transducer;
+  OfflineParaformerModelConfig paraformer;
+  OfflineNemoEncDecCtcModelConfig nemo_ctc;
+  OfflineWhisperModelConfig whisper;
+  OfflineTdnnModelConfig tdnn;
+
+  std::string tokens;
+  int32_t num_threads = 1;
+  bool debug = false;
+  std::string provider = "cpu";
+  std::string model_type;
+  std::string modeling_unit = "cjkchar";
+  std::string bpe_vocab;
+  std::string telespeech_ctc;
+  OfflineSenseVoiceModelConfig sense_voice;
+  OfflineMoonshineModelConfig moonshine;
+};
+
+struct SHERPA_ONNX_API OfflineLMConfig {
+  std::string model;
+  float scale = 1.0;
+};
+
+struct SHERPA_ONNX_API OfflineRecognizerConfig {
+  FeatureConfig feat_config;
+  OfflineModelConfig model_config;
+  OfflineLMConfig lm_config;
+
+  std::string decoding_method = "greedy_search";
+  int32_t max_active_paths = 4;
+
+  std::string hotwords_file;
+
+  float hotwords_score = 1.5;
+  std::string rule_fsts;
+  std::string rule_fars;
+  float blank_penalty = 0;
+};
+
+struct SHERPA_ONNX_API OfflineRecognizerResult {
+  std::string text;
+  std::vector<float> timestamps;
+  std::vector<std::string> tokens;
+  std::string json;
+  std::string lang;
+  std::string emotion;
+  std::string event;
+};
+
+class SHERPA_ONNX_API OfflineStream
+    : public MoveOnly<OfflineStream, SherpaOnnxOfflineStream> {
+ public:
+  explicit OfflineStream(const SherpaOnnxOfflineStream *p);
+
+  void AcceptWaveform(int32_t sample_rate, const float *samples,
+                      int32_t n) const;
+
+  void Destroy(const SherpaOnnxOfflineStream *p) const;
+};
+
+class SHERPA_ONNX_API OfflineRecognizer
+    : public MoveOnly<OfflineRecognizer, SherpaOnnxOfflineRecognizer> {
+ public:
+  static OfflineRecognizer Create(const OfflineRecognizerConfig &config);
+
+  void Destroy(const SherpaOnnxOfflineRecognizer *p) const;
+
+  OfflineStream CreateStream() const;
+
+  void Decode(const OfflineStream *s) const;
+
+  void Decode(const OfflineStream *ss, int32_t n) const;
+
+  OfflineRecognizerResult GetResult(const OfflineStream *s) const;
+
+ private:
+  explicit OfflineRecognizer(const SherpaOnnxOfflineRecognizer *p);
+};
+
+// ============================================================================
+// Non-streaming TTS
+// ============================================================================
+struct OfflineTtsVitsModelConfig {
+  std::string model;
+  std::string lexicon;
+  std::string tokens;
+  std::string data_dir;
+  std::string dict_dir;
+
+  float noise_scale = 0.667;
+  float noise_scale_w = 0.8;
+  float length_scale = 1.0;  // < 1, faster in speed; > 1, slower in speed
+};
+
+struct OfflineTtsMatchaModelConfig {
+  std::string acoustic_model;
+  std::string vocoder;
+  std::string lexicon;
+  std::string tokens;
+  std::string data_dir;
+  std::string dict_dir;
+
+  float noise_scale = 0.667;
+  float length_scale = 1.0;  // < 1, faster in speed; > 1, slower in speed
+};
+
+struct OfflineTtsKokoroModelConfig {
+  std::string model;
+  std::string voices;
+  std::string tokens;
+  std::string data_dir;
+
+  float length_scale = 1.0;  // < 1, faster in speed; > 1, slower in speed
+};
+
+struct OfflineTtsModelConfig {
+  OfflineTtsVitsModelConfig vits;
+  OfflineTtsMatchaModelConfig matcha;
+  OfflineTtsKokoroModelConfig kokoro;
+  int32_t num_threads = 1;
+  bool debug = false;
+  std::string provider = "cpu";
+};
+
+struct OfflineTtsConfig {
+  OfflineTtsModelConfig model;
+  std::string rule_fsts;
+  std::string rule_fars;
+  int32_t max_num_sentences = 1;
+};
+
+struct GeneratedAudio {
+  std::vector<float> samples;  // in the range [-1, 1]
+  int32_t sample_rate;
+};
+
+// Return 1 to continue generating
+// Return 0 to stop generating
+using OfflineTtsCallback = int32_t (*)(const float *samples,
+                                       int32_t num_samples, float progress,
+                                       void *arg);
+
+class SHERPA_ONNX_API OfflineTts
+    : public MoveOnly<OfflineTts, SherpaOnnxOfflineTts> {
+ public:
+  static OfflineTts Create(const OfflineTtsConfig &config);
+
+  void Destroy(const SherpaOnnxOfflineTts *p) const;
+
+  // Return the sample rate of the generated audio
+  int32_t SampleRate() const;
+
+  // Number of supported speakers.
+  // If it supports only a single speaker, then it return 0 or 1.
+  int32_t NumSpeakers() const;
+
+  // @param text A string containing words separated by spaces
+  // @param sid Speaker ID. Used only for multi-speaker models, e.g., models
+  //            trained using the VCTK dataset. It is not used for
+  //            single-speaker models, e.g., models trained using the ljspeech
+  //            dataset.
+  // @param speed The speed for the generated speech. E.g., 2 means 2x faster.
+  // @param callback If not NULL, it is called whenever config.max_num_sentences
+  //                 sentences have been processed. The callback is called in
+  //                 the current thread.
+  GeneratedAudio Generate(const std::string &text, int32_t sid = 0,
+                          float speed = 1.0,
+                          OfflineTtsCallback callback = nullptr,
+                          void *arg = nullptr) const;
+
+ private:
+  explicit OfflineTts(const SherpaOnnxOfflineTts *p);
+};
+
+// ============================================================
+// For Keyword Spotter
+// ============================================================
+
+struct KeywordResult {
+  std::string keyword;
+  std::vector<std::string> tokens;
+  std::vector<float> timestamps;
+  float start_time;
+  std::string json;
+};
+
+struct KeywordSpotterConfig {
+  FeatureConfig feat_config;
+  OnlineModelConfig model_config;
+  int32_t max_active_paths = 4;
+  int32_t num_trailing_blanks = 1;
+  float keywords_score = 1.0f;
+  float keywords_threshold = 0.25f;
+  std::string keywords_file;
+};
+
+class SHERPA_ONNX_API KeywordSpotter
+    : public MoveOnly<KeywordSpotter, SherpaOnnxKeywordSpotter> {
+ public:
+  static KeywordSpotter Create(const KeywordSpotterConfig &config);
+
+  void Destroy(const SherpaOnnxKeywordSpotter *p) const;
+
+  OnlineStream CreateStream() const;
+
+  OnlineStream CreateStream(const std::string &keywords) const;
+
+  bool IsReady(const OnlineStream *s) const;
+
+  void Decode(const OnlineStream *s) const;
+
+  void Decode(const OnlineStream *ss, int32_t n) const;
+
+  void Reset(const OnlineStream *s) const;
+
+  KeywordResult GetResult(const OnlineStream *s) const;
+
+ private:
+  explicit KeywordSpotter(const SherpaOnnxKeywordSpotter *p);
+};
+
+}  // namespace sherpa_onnx::cxx
+
+#endif  // SHERPA_ONNX_C_API_CXX_API_H_
--- a/lib/sherpa_onnx/lib/cargs.h
+++ b/lib/sherpa_onnx/lib/cargs.h
@@ -0,0 +1,162 @@
+#pragma once
+
+/**
+ * This is a simple alternative cross-platform implementation of getopt, which
+ * is used to parse argument strings submitted to the executable (argc and argv
+ * which are received in the main function).
+ */
+
+#ifndef CAG_LIBRARY_H
+#define CAG_LIBRARY_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#define CAG_EXPORT __declspec(dllexport)
+#define CAG_IMPORT __declspec(dllimport)
+#elif __GNUC__ >= 4
+#define CAG_EXPORT __attribute__((visibility("default")))
+#define CAG_IMPORT __attribute__((visibility("default")))
+#else
+#define CAG_EXPORT
+#define CAG_IMPORT
+#endif
+
+#if defined(CAG_SHARED)
+#if defined(CAG_EXPORTS)
+#define CAG_PUBLIC CAG_EXPORT
+#else
+#define CAG_PUBLIC CAG_IMPORT
+#endif
+#else
+#define CAG_PUBLIC
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * An option is used to describe a flag/argument option submitted when the
+ * program is run.
+ */
+typedef struct cag_option
+{
+  const char identifier;
+  const char *access_letters;
+  const char *access_name;
+  const char *value_name;
+  const char *description;
+} cag_option;
+
+/**
+ * A context is used to iterate over all options provided. It stores the parsing
+ * state.
+ */
+typedef struct cag_option_context
+{
+  const struct cag_option *options;
+  size_t option_count;
+  int argc;
+  char **argv;
+  int index;
+  int inner_index;
+  bool forced_end;
+  char identifier;
+  char *value;
+} cag_option_context;
+
+/**
+ * This is just a small macro which calculates the size of an array.
+ */
+#define CAG_ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+/**
+ * @brief Prints all options to the terminal.
+ *
+ * This function prints all options to the terminal. This can be used to
+ * generate the output for a "--help" option.
+ *
+ * @param options The options which will be printed.
+ * @param option_count The option count which will be printed.
+ * @param destination The destination where the output will be printed.
+ */
+CAG_PUBLIC void cag_option_print(const cag_option *options, size_t option_count,
+  FILE *destination);
+
+/**
+ * @brief Prepare argument options context for parsing.
+ *
+ * This function prepares the context for iteration and initializes the context
+ * with the supplied options and arguments. After the context has been prepared,
+ * it can be used to fetch arguments from it.
+ *
+ * @param context The context which will be initialized.
+ * @param options The registered options which are available for the program.
+ * @param option_count The amount of options which are available for the
+ * program.
+ * @param argc The amount of arguments the user supplied in the main function.
+ * @param argv A pointer to the arguments of the main function.
+ */
+CAG_PUBLIC void cag_option_prepare(cag_option_context *context,
+  const cag_option *options, size_t option_count, int argc, char **argv);
+
+/**
+ * @brief Fetches an option from the argument list.
+ *
+ * This function fetches a single option from the argument list. The context
+ * will be moved to that item. Information can be extracted from the context
+ * after the item has been fetched.
+ * The arguments will be re-ordered, which means that non-option arguments will
+ * be moved to the end of the argument list. After all options have been
+ * fetched, all non-option arguments will be positioned after the index of
+ * the context.
+ *
+ * @param context The context from which we will fetch the option.
+ * @return Returns true if there was another option or false if the end is
+ * reached.
+ */
+CAG_PUBLIC bool cag_option_fetch(cag_option_context *context);
+
+/**
+ * @brief Gets the identifier of the option.
+ *
+ * This function gets the identifier of the option, which should be unique to
+ * this option and can be used to determine what kind of option this is.
+ *
+ * @param context The context from which the option was fetched.
+ * @return Returns the identifier of the option.
+ */
+CAG_PUBLIC char cag_option_get(const cag_option_context *context);
+
+/**
+ * @brief Gets the value from the option.
+ *
+ * This function gets the value from the option, if any. If the option does not
+ * contain a value, this function will return NULL.
+ *
+ * @param context The context from which the option was fetched.
+ * @return Returns a pointer to the value or NULL if there is no value.
+ */
+CAG_PUBLIC const char *cag_option_get_value(const cag_option_context *context);
+
+/**
+ * @brief Gets the current index of the context.
+ *
+ * This function gets the index within the argv arguments of the context. The
+ * context always points to the next item which it will inspect. This is
+ * particularly useful to inspect the original argument array, or to get
+ * non-option arguments after option fetching has finished.
+ *
+ * @param context The context from which the option was fetched.
+ * @return Returns the current index of the context.
+ */
+CAG_PUBLIC int cag_option_get_index(const cag_option_context *context);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif
--- a/lib/sherpa_onnx/lib/libcargs.dylib
+++ b/lib/sherpa_onnx/lib/libcargs.dylib
--- a/lib/sherpa_onnx/lib/libonnxruntime.1.17.1.dylib
+++ b/lib/sherpa_onnx/lib/libonnxruntime.1.17.1.dylib
--- a/lib/sherpa_onnx/lib/libonnxruntime.dylib
+++ b/lib/sherpa_onnx/lib/libonnxruntime.dylib
--- a/lib/sherpa_onnx/lib/libsherpa-onnx-c-api.dylib
+++ b/lib/sherpa_onnx/lib/libsherpa-onnx-c-api.dylib
--- a/lib/sherpa_onnx/lib/libsherpa-onnx-cxx-api.dylib
+++ b/lib/sherpa_onnx/lib/libsherpa-onnx-cxx-api.dylib
--- a/lib/sherpa_onnx/lib/pkgconfig/espeak-ng.pc
+++ b/lib/sherpa_onnx/lib/pkgconfig/espeak-ng.pc
@@ -0,0 +1,11 @@
+prefix=/tmp/sherpa-onnx/shared
+exec_prefix=/tmp/sherpa-onnx/shared
+libdir=${exec_prefix}/lib
+includedir=${prefix}/include
+
+Name: espeak-ng
+Description: espeak-ng is a multi-lingual software speech synthesizer
+Version: 1.52.0.1
+Requires:
+Libs: -L${libdir} -lespeak-ng
+Cflags: -I${includedir}