Files
QSmartAssistant/ASRManager.cpp
lizhuoran e92cb0b4e5 feat: 完整的语音助手系统实现
主要功能:
-  离线语音识别 (ASR) - Paraformer中文模型
-  在线语音识别 - Streaming Paraformer中英文双语模型
-  语音合成 (TTS) - MeloTTS中英文混合模型
-  语音唤醒 (KWS) - Zipformer关键词检测模型
-  麦克风录音功能 - 支持多种格式和实时转换
-  模型设置界面 - 完整的图形化配置管理

KWS优化亮点:
- 🎯 成功实现关键词检测 (测试成功率10%→预期50%+)
- ⚙️ 可调参数: 阈值、活跃路径、尾随空白、分数权重、线程数
- 🔧 智能参数验证和实时反馈
- 📊 详细的调试信息和成功统计
- 🎛️ 用户友好的设置界面

技术架构:
- 模块化设计: ASRManager, TTSManager, KWSManager
- 实时音频处理: 自动格式转换 (任意格式→16kHz单声道)
- 智能设备检测: 自动选择最佳音频格式
- 完整资源管理: 正确的创建和销毁流程
- 跨平台支持: macOS优化的音频权限处理

界面特性:
- 2×2网格布局: ASR、TTS、录音、KWS四大功能模块
- 分离录音设置: 设备参数 + 输出格式独立配置
- 实时状态显示: 音频电平、处理次数、成功统计
- 详细的用户指导和错误提示
2025-12-23 13:47:00 +08:00

241 lines
7.9 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "ASRManager.h"
#include <QDir>
#include <QFile>
#include <QDebug>
#include <QIODevice>
#include <vector>
ASRManager::ASRManager(QObject* parent) : QObject(parent) {
}
ASRManager::~ASRManager() {
cleanup();
}
bool ASRManager::initialize() {
// 初始化ASR模型
QString dataPath = QDir::homePath() + "/.config/QSmartAssistant/Data/";
QString asrModelPath = dataPath + "sherpa-onnx-paraformer-zh-2024-03-09/model.int8.onnx";
QString asrTokensPath = dataPath + "sherpa-onnx-paraformer-zh-2024-03-09/tokens.txt";
memset(&asrConfig, 0, sizeof(asrConfig));
asrConfig.feat_config.feature_dim = 80;
asrConfig.feat_config.sample_rate = 16000;
asrConfig.model_config.num_threads = 2;
asrConfig.model_config.provider = "cpu";
asrConfig.max_active_paths = 4;
asrConfig.decoding_method = "greedy_search";
asrModelPathStd = asrModelPath.toStdString();
asrTokensPathStd = asrTokensPath.toStdString();
asrConfig.model_config.tokens = asrTokensPathStd.c_str();
asrConfig.model_config.paraformer.model = asrModelPathStd.c_str();
asrRecognizer = const_cast<SherpaOnnxOfflineRecognizer*>(
SherpaOnnxCreateOfflineRecognizer(&asrConfig));
qDebug() << "离线ASR识别器:" << (asrRecognizer ? "成功" : "失败");
return asrRecognizer != nullptr;
}
bool ASRManager::initializeOnlineRecognizer() {
// 初始化在线识别器使用streaming-paraformer-bilingual模型
QString dataPath = QDir::homePath() + "/.config/QSmartAssistant/Data/";
QString onlineEncoderPath = dataPath + "sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx";
QString onlineDecoderPath = dataPath + "sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx";
QString onlineTokensPath = dataPath + "sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt";
// 检查文件是否存在
if (!QFile::exists(onlineEncoderPath) || !QFile::exists(onlineDecoderPath) || !QFile::exists(onlineTokensPath)) {
qDebug() << "在线模型文件不存在,跳过在线识别器初始化";
return false;
}
memset(&onlineAsrConfig, 0, sizeof(onlineAsrConfig));
// 特征配置
onlineAsrConfig.feat_config.sample_rate = 16000;
onlineAsrConfig.feat_config.feature_dim = 80;
// 模型配置
onlineAsrConfig.model_config.num_threads = 2;
onlineAsrConfig.model_config.provider = "cpu";
onlineAsrConfig.model_config.debug = 0;
// Paraformer配置
onlineEncoderPathStd = onlineEncoderPath.toStdString();
onlineDecoderPathStd = onlineDecoderPath.toStdString();
onlineTokensPathStd = onlineTokensPath.toStdString();
onlineAsrConfig.model_config.paraformer.encoder = onlineEncoderPathStd.c_str();
onlineAsrConfig.model_config.paraformer.decoder = onlineDecoderPathStd.c_str();
onlineAsrConfig.model_config.tokens = onlineTokensPathStd.c_str();
// 解码配置
onlineAsrConfig.decoding_method = "greedy_search";
onlineAsrConfig.max_active_paths = 4;
// 端点检测配置
onlineAsrConfig.enable_endpoint = 1;
onlineAsrConfig.rule1_min_trailing_silence = 2.4f;
onlineAsrConfig.rule2_min_trailing_silence = 1.2f;
onlineAsrConfig.rule3_min_utterance_length = 20.0f;
onlineAsrRecognizer = const_cast<SherpaOnnxOnlineRecognizer*>(
SherpaOnnxCreateOnlineRecognizer(&onlineAsrConfig));
qDebug() << "在线ASR识别器:" << (onlineAsrRecognizer ? "成功" : "失败");
if (onlineAsrRecognizer) {
qDebug() << "使用模型: sherpa-onnx-streaming-paraformer-bilingual-zh-en";
}
return onlineAsrRecognizer != nullptr;
}
QString ASRManager::recognizeWavFile(const QString& filePath) {
if (!asrRecognizer) {
return "ASR模型未初始化";
}
QFile file(filePath);
if (!file.open(QIODevice::ReadOnly)) {
return "无法打开文件";
}
// 跳过WAV头部44字节
QByteArray header = file.read(44);
if (header.size() < 44) {
return "无效的WAV文件";
}
// 读取音频数据
QByteArray audioData = file.readAll();
file.close();
// 创建音频流
const SherpaOnnxOfflineStream* stream = SherpaOnnxCreateOfflineStream(asrRecognizer);
// 转换音频数据
const int16_t* intData = reinterpret_cast<const int16_t*>(audioData.data());
int dataLength = audioData.length() / 2;
std::vector<float> samples(16000);
int currentPos = 0;
while (currentPos < dataLength) {
int currentLength = std::min(16000, dataLength - currentPos);
for (int i = 0; i < currentLength; i++) {
samples[i] = intData[i + currentPos] / 32768.0f;
}
SherpaOnnxAcceptWaveformOffline(stream, 16000, samples.data(), currentLength);
currentPos += currentLength;
}
// 执行识别
SherpaOnnxDecodeOfflineStream(asrRecognizer, stream);
// 获取结果
const SherpaOnnxOfflineRecognizerResult* result = SherpaOnnxGetOfflineStreamResult(stream);
QString recognizedText = "";
if (result && strlen(result->text) > 0) {
recognizedText = QString::fromUtf8(result->text);
}
// 清理资源
SherpaOnnxDestroyOfflineRecognizerResult(result);
SherpaOnnxDestroyOfflineStream(stream);
return recognizedText.isEmpty() ? "[无识别结果]" : recognizedText;
}
const SherpaOnnxOnlineStream* ASRManager::createOnlineStream() {
if (!onlineAsrRecognizer) {
return nullptr;
}
return SherpaOnnxCreateOnlineStream(onlineAsrRecognizer);
}
void ASRManager::destroyOnlineStream(const SherpaOnnxOnlineStream* stream) {
if (stream) {
SherpaOnnxDestroyOnlineStream(stream);
}
}
void ASRManager::acceptWaveform(const SherpaOnnxOnlineStream* stream, const float* samples, int32_t sampleCount) {
if (stream && samples && sampleCount > 0) {
SherpaOnnxOnlineStreamAcceptWaveform(stream, 16000, samples, sampleCount);
static int totalSamples = 0;
totalSamples += sampleCount;
// 每处理1秒的音频数据输出一次调试信息
if (totalSamples % 16000 == 0) {
qDebug() << "ASR已处理音频:" << (totalSamples / 16000) << "";
}
}
}
bool ASRManager::isStreamReady(const SherpaOnnxOnlineStream* stream) {
if (!onlineAsrRecognizer || !stream) {
return false;
}
return SherpaOnnxIsOnlineStreamReady(onlineAsrRecognizer, stream) == 1;
}
void ASRManager::decodeStream(const SherpaOnnxOnlineStream* stream) {
if (onlineAsrRecognizer && stream) {
SherpaOnnxDecodeOnlineStream(onlineAsrRecognizer, stream);
}
}
QString ASRManager::getStreamResult(const SherpaOnnxOnlineStream* stream) {
if (!onlineAsrRecognizer || !stream) {
return "";
}
const SherpaOnnxOnlineRecognizerResult* result =
SherpaOnnxGetOnlineStreamResult(onlineAsrRecognizer, stream);
QString text = "";
if (result) {
if (strlen(result->text) > 0) {
text = QString::fromUtf8(result->text);
qDebug() << "ASR识别结果:" << text;
}
SherpaOnnxDestroyOnlineRecognizerResult(result);
} else {
qDebug() << "ASR识别结果为空";
}
return text;
}
void ASRManager::inputFinished(const SherpaOnnxOnlineStream* stream) {
if (stream) {
SherpaOnnxOnlineStreamInputFinished(stream);
}
}
bool ASRManager::isEndpoint(const SherpaOnnxOnlineStream* stream) {
if (!onlineAsrRecognizer || !stream) {
return false;
}
return SherpaOnnxOnlineStreamIsEndpoint(onlineAsrRecognizer, stream) == 1;
}
void ASRManager::cleanup() {
// 清理离线识别器
if (asrRecognizer) {
SherpaOnnxDestroyOfflineRecognizer(asrRecognizer);
asrRecognizer = nullptr;
}
// 清理在线识别器
if (onlineAsrRecognizer) {
SherpaOnnxDestroyOnlineRecognizer(onlineAsrRecognizer);
onlineAsrRecognizer = nullptr;
}
}