#include "ASRManager.h" #include #include #include #include #include ASRManager::ASRManager(QObject* parent) : QObject(parent) { } ASRManager::~ASRManager() { cleanup(); } bool ASRManager::initialize() { // 初始化ASR模型 QString dataPath = QDir::homePath() + "/.config/QSmartAssistant/Data/"; QString asrModelPath = dataPath + "sherpa-onnx-paraformer-zh-2024-03-09/model.int8.onnx"; QString asrTokensPath = dataPath + "sherpa-onnx-paraformer-zh-2024-03-09/tokens.txt"; memset(&asrConfig, 0, sizeof(asrConfig)); asrConfig.feat_config.feature_dim = 80; asrConfig.feat_config.sample_rate = 16000; asrConfig.model_config.num_threads = 2; asrConfig.model_config.provider = "cpu"; asrConfig.max_active_paths = 4; asrConfig.decoding_method = "greedy_search"; asrModelPathStd = asrModelPath.toStdString(); asrTokensPathStd = asrTokensPath.toStdString(); asrConfig.model_config.tokens = asrTokensPathStd.c_str(); asrConfig.model_config.paraformer.model = asrModelPathStd.c_str(); asrRecognizer = const_cast( SherpaOnnxCreateOfflineRecognizer(&asrConfig)); qDebug() << "离线ASR识别器:" << (asrRecognizer ? "成功" : "失败"); return asrRecognizer != nullptr; } bool ASRManager::initializeOnlineRecognizer() { // 初始化在线识别器,使用streaming-paraformer-bilingual模型 QString dataPath = QDir::homePath() + "/.config/QSmartAssistant/Data/"; QString onlineEncoderPath = dataPath + "sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx"; QString onlineDecoderPath = dataPath + "sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx"; QString onlineTokensPath = dataPath + "sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt"; // 检查文件是否存在 if (!QFile::exists(onlineEncoderPath) || !QFile::exists(onlineDecoderPath) || !QFile::exists(onlineTokensPath)) { qDebug() << "在线模型文件不存在,跳过在线识别器初始化"; return false; } memset(&onlineAsrConfig, 0, sizeof(onlineAsrConfig)); // 特征配置 onlineAsrConfig.feat_config.sample_rate = 16000; onlineAsrConfig.feat_config.feature_dim = 80; // 模型配置 onlineAsrConfig.model_config.num_threads = 2; onlineAsrConfig.model_config.provider = "cpu"; onlineAsrConfig.model_config.debug = 0; // Paraformer配置 onlineEncoderPathStd = onlineEncoderPath.toStdString(); onlineDecoderPathStd = onlineDecoderPath.toStdString(); onlineTokensPathStd = onlineTokensPath.toStdString(); onlineAsrConfig.model_config.paraformer.encoder = onlineEncoderPathStd.c_str(); onlineAsrConfig.model_config.paraformer.decoder = onlineDecoderPathStd.c_str(); onlineAsrConfig.model_config.tokens = onlineTokensPathStd.c_str(); // 解码配置 onlineAsrConfig.decoding_method = "greedy_search"; onlineAsrConfig.max_active_paths = 4; // 端点检测配置 onlineAsrConfig.enable_endpoint = 1; onlineAsrConfig.rule1_min_trailing_silence = 2.4f; onlineAsrConfig.rule2_min_trailing_silence = 1.2f; onlineAsrConfig.rule3_min_utterance_length = 20.0f; onlineAsrRecognizer = const_cast( SherpaOnnxCreateOnlineRecognizer(&onlineAsrConfig)); qDebug() << "在线ASR识别器:" << (onlineAsrRecognizer ? "成功" : "失败"); if (onlineAsrRecognizer) { qDebug() << "使用模型: sherpa-onnx-streaming-paraformer-bilingual-zh-en"; } return onlineAsrRecognizer != nullptr; } QString ASRManager::recognizeWavFile(const QString& filePath) { if (!asrRecognizer) { return "ASR模型未初始化"; } QFile file(filePath); if (!file.open(QIODevice::ReadOnly)) { return "无法打开文件"; } // 跳过WAV头部(44字节) QByteArray header = file.read(44); if (header.size() < 44) { return "无效的WAV文件"; } // 读取音频数据 QByteArray audioData = file.readAll(); file.close(); // 创建音频流 const SherpaOnnxOfflineStream* stream = SherpaOnnxCreateOfflineStream(asrRecognizer); // 转换音频数据 const int16_t* intData = reinterpret_cast(audioData.data()); int dataLength = audioData.length() / 2; std::vector samples(16000); int currentPos = 0; while (currentPos < dataLength) { int currentLength = std::min(16000, dataLength - currentPos); for (int i = 0; i < currentLength; i++) { samples[i] = intData[i + currentPos] / 32768.0f; } SherpaOnnxAcceptWaveformOffline(stream, 16000, samples.data(), currentLength); currentPos += currentLength; } // 执行识别 SherpaOnnxDecodeOfflineStream(asrRecognizer, stream); // 获取结果 const SherpaOnnxOfflineRecognizerResult* result = SherpaOnnxGetOfflineStreamResult(stream); QString recognizedText = ""; if (result && strlen(result->text) > 0) { recognizedText = QString::fromUtf8(result->text); } // 清理资源 SherpaOnnxDestroyOfflineRecognizerResult(result); SherpaOnnxDestroyOfflineStream(stream); return recognizedText.isEmpty() ? "[无识别结果]" : recognizedText; } const SherpaOnnxOnlineStream* ASRManager::createOnlineStream() { if (!onlineAsrRecognizer) { return nullptr; } return SherpaOnnxCreateOnlineStream(onlineAsrRecognizer); } void ASRManager::destroyOnlineStream(const SherpaOnnxOnlineStream* stream) { if (stream) { SherpaOnnxDestroyOnlineStream(stream); } } void ASRManager::acceptWaveform(const SherpaOnnxOnlineStream* stream, const float* samples, int32_t sampleCount) { if (stream && samples && sampleCount > 0) { SherpaOnnxOnlineStreamAcceptWaveform(stream, 16000, samples, sampleCount); static int totalSamples = 0; totalSamples += sampleCount; // 每处理1秒的音频数据输出一次调试信息 if (totalSamples % 16000 == 0) { qDebug() << "ASR已处理音频:" << (totalSamples / 16000) << "秒"; } } } bool ASRManager::isStreamReady(const SherpaOnnxOnlineStream* stream) { if (!onlineAsrRecognizer || !stream) { return false; } return SherpaOnnxIsOnlineStreamReady(onlineAsrRecognizer, stream) == 1; } void ASRManager::decodeStream(const SherpaOnnxOnlineStream* stream) { if (onlineAsrRecognizer && stream) { SherpaOnnxDecodeOnlineStream(onlineAsrRecognizer, stream); } } QString ASRManager::getStreamResult(const SherpaOnnxOnlineStream* stream) { if (!onlineAsrRecognizer || !stream) { return ""; } const SherpaOnnxOnlineRecognizerResult* result = SherpaOnnxGetOnlineStreamResult(onlineAsrRecognizer, stream); QString text = ""; if (result) { if (strlen(result->text) > 0) { text = QString::fromUtf8(result->text); qDebug() << "ASR识别结果:" << text; } SherpaOnnxDestroyOnlineRecognizerResult(result); } else { qDebug() << "ASR识别结果为空"; } return text; } void ASRManager::inputFinished(const SherpaOnnxOnlineStream* stream) { if (stream) { SherpaOnnxOnlineStreamInputFinished(stream); } } bool ASRManager::isEndpoint(const SherpaOnnxOnlineStream* stream) { if (!onlineAsrRecognizer || !stream) { return false; } return SherpaOnnxOnlineStreamIsEndpoint(onlineAsrRecognizer, stream) == 1; } void ASRManager::cleanup() { // 清理离线识别器 if (asrRecognizer) { SherpaOnnxDestroyOfflineRecognizer(asrRecognizer); asrRecognizer = nullptr; } // 清理在线识别器 if (onlineAsrRecognizer) { SherpaOnnxDestroyOnlineRecognizer(onlineAsrRecognizer); onlineAsrRecognizer = nullptr; } }