主要功能: - ✅ 离线语音识别 (ASR) - Paraformer中文模型 - ✅ 在线语音识别 - Streaming Paraformer中英文双语模型 - ✅ 语音合成 (TTS) - MeloTTS中英文混合模型 - ✅ 语音唤醒 (KWS) - Zipformer关键词检测模型 - ✅ 麦克风录音功能 - 支持多种格式和实时转换 - ✅ 模型设置界面 - 完整的图形化配置管理 KWS优化亮点: - 🎯 成功实现关键词检测 (测试成功率10%→预期50%+) - ⚙️ 可调参数: 阈值、活跃路径、尾随空白、分数权重、线程数 - 🔧 智能参数验证和实时反馈 - 📊 详细的调试信息和成功统计 - 🎛️ 用户友好的设置界面 技术架构: - 模块化设计: ASRManager, TTSManager, KWSManager - 实时音频处理: 自动格式转换 (任意格式→16kHz单声道) - 智能设备检测: 自动选择最佳音频格式 - 完整资源管理: 正确的创建和销毁流程 - 跨平台支持: macOS优化的音频权限处理 界面特性: - 2×2网格布局: ASR、TTS、录音、KWS四大功能模块 - 分离录音设置: 设备参数 + 输出格式独立配置 - 实时状态显示: 音频电平、处理次数、成功统计 - 详细的用户指导和错误提示
1854 lines
71 KiB
C++
1854 lines
71 KiB
C++
#include "SpeechTestMainWindow.h"
|
||
#include <QApplication>
|
||
#include <QFileDialog>
|
||
#include <QMessageBox>
|
||
#include <QDebug>
|
||
#include <QDir>
|
||
#include <QDateTime>
|
||
#include <QProcess>
|
||
#include <QMediaDevices>
|
||
#include <QAudioFormat>
|
||
#include <QMenuBar>
|
||
#include <QAction>
|
||
#include <QCheckBox>
|
||
#include <QMenu>
|
||
#include <QGridLayout>
|
||
|
||
SpeechTestMainWindow::SpeechTestMainWindow(QWidget* parent) : QMainWindow(parent) {
|
||
// 创建管理器
|
||
asrManager = new ASRManager(this);
|
||
ttsManager = new TTSManager(this);
|
||
kwsManager = new KWSManager(this);
|
||
|
||
setupUI();
|
||
setupMenuBar();
|
||
createOutputDirectories();
|
||
connectSignals();
|
||
|
||
// 初始化模型
|
||
bool asrOk = asrManager->initialize();
|
||
bool ttsOk = ttsManager->initialize();
|
||
bool kwsOk = kwsManager->initialize();
|
||
|
||
// 尝试初始化在线识别器(当前会失败)
|
||
asrManager->initializeOnlineRecognizer();
|
||
|
||
setWindowTitle("QSmartAssistant 语音测试工具");
|
||
setMinimumSize(1200, 800); // 增加最小尺寸以适应网格布局
|
||
resize(1400, 900); // 增加默认尺寸
|
||
|
||
// 根据在线识别器状态更新麦克风按钮
|
||
if (asrManager->isOnlineInitialized()) {
|
||
micRecordBtn->setEnabled(true);
|
||
micRecordBtn->setText("开始麦克风识别");
|
||
micRecordBtn->setStyleSheet("QPushButton { background-color: #FF5722; color: white; font-weight: bold; }");
|
||
} else {
|
||
micRecordBtn->setEnabled(false);
|
||
micRecordBtn->setText("麦克风识别(模型未加载)");
|
||
micRecordBtn->setStyleSheet("QPushButton { background-color: #9E9E9E; color: white; font-weight: bold; }");
|
||
}
|
||
|
||
// 更新状态栏
|
||
if (asrOk && ttsOk && kwsOk) {
|
||
QString modelInfo = ttsManager->isMultilingualModel() ? "(支持中英文混合)" : "(仅支持中文)";
|
||
QString micInfo = asrManager->isOnlineInitialized() ? ",支持实时识别" : ",麦克风识别不可用";
|
||
QString kwsInfo = kwsOk ? ",支持语音唤醒" : ",语音唤醒不可用";
|
||
statusBar()->showMessage("模型初始化成功,就绪 " + modelInfo + micInfo + kwsInfo);
|
||
} else {
|
||
statusBar()->showMessage("模型初始化失败");
|
||
if (!asrOk) qDebug() << "离线ASR初始化失败";
|
||
if (!ttsOk) qDebug() << "TTS初始化失败";
|
||
if (!kwsOk) qDebug() << "KWS初始化失败";
|
||
}
|
||
}
|
||
|
||
SpeechTestMainWindow::~SpeechTestMainWindow() {
|
||
// 停止麦克风识别
|
||
if (isRecording) {
|
||
stopMicRecognition();
|
||
}
|
||
|
||
// 停止录音
|
||
if (isRecordingWav) {
|
||
stopRecording();
|
||
}
|
||
|
||
// 清理音频输入
|
||
if (audioSource) {
|
||
audioSource->stop();
|
||
delete audioSource;
|
||
audioSource = nullptr;
|
||
}
|
||
|
||
if (audioTimer) {
|
||
audioTimer->stop();
|
||
delete audioTimer;
|
||
audioTimer = nullptr;
|
||
}
|
||
|
||
// 清理录音资源
|
||
if (recordAudioSource) {
|
||
recordAudioSource->stop();
|
||
delete recordAudioSource;
|
||
recordAudioSource = nullptr;
|
||
}
|
||
|
||
if (recordTimer) {
|
||
recordTimer->stop();
|
||
delete recordTimer;
|
||
recordTimer = nullptr;
|
||
}
|
||
|
||
// 清理语音唤醒资源
|
||
if (kwsAudioSource) {
|
||
kwsAudioSource->stop();
|
||
delete kwsAudioSource;
|
||
kwsAudioSource = nullptr;
|
||
}
|
||
|
||
if (kwsTimer) {
|
||
kwsTimer->stop();
|
||
delete kwsTimer;
|
||
kwsTimer = nullptr;
|
||
}
|
||
}
|
||
|
||
void SpeechTestMainWindow::setupUI() {
|
||
auto* centralWidget = new QWidget(this);
|
||
setCentralWidget(centralWidget);
|
||
|
||
auto* mainLayout = new QVBoxLayout(centralWidget);
|
||
|
||
// 创建网格布局(两行两列)
|
||
auto* gridLayout = new QGridLayout();
|
||
gridLayout->setSpacing(10);
|
||
gridLayout->setContentsMargins(10, 10, 10, 10);
|
||
|
||
// 设置行列拉伸策略,让各模块均匀分配空间
|
||
gridLayout->setRowStretch(0, 1); // 第一行拉伸因子为1
|
||
gridLayout->setRowStretch(1, 1); // 第二行拉伸因子为1
|
||
gridLayout->setColumnStretch(0, 1); // 第一列拉伸因子为1
|
||
gridLayout->setColumnStretch(1, 1); // 第二列拉伸因子为1
|
||
|
||
// 创建一个容器widget来包含网格布局
|
||
auto* gridWidget = new QWidget(this);
|
||
gridWidget->setLayout(gridLayout);
|
||
mainLayout->addWidget(gridWidget);
|
||
|
||
// ASR部分
|
||
auto* asrGroup = new QGroupBox("语音识别 (ASR)", this);
|
||
auto* asrLayout = new QVBoxLayout(asrGroup);
|
||
|
||
// 文件选择
|
||
auto* fileLayout = new QHBoxLayout();
|
||
filePathEdit = new QLineEdit(this);
|
||
filePathEdit->setPlaceholderText("选择WAV音频文件...");
|
||
auto* browseBtn = new QPushButton("浏览", this);
|
||
browseBtn->setObjectName("browseBtn");
|
||
auto* recognizeBtn = new QPushButton("开始识别", this);
|
||
recognizeBtn->setObjectName("recognizeBtn");
|
||
recognizeBtn->setStyleSheet("QPushButton { background-color: #4CAF50; color: white; font-weight: bold; }");
|
||
|
||
fileLayout->addWidget(new QLabel("音频文件:", this));
|
||
fileLayout->addWidget(filePathEdit, 1);
|
||
fileLayout->addWidget(browseBtn);
|
||
fileLayout->addWidget(recognizeBtn);
|
||
|
||
asrLayout->addLayout(fileLayout);
|
||
|
||
// 麦克风识别控件
|
||
auto* micLayout = new QHBoxLayout();
|
||
micRecordBtn = new QPushButton("开始麦克风识别", this);
|
||
micRecordBtn->setStyleSheet("QPushButton { background-color: #FF5722; color: white; font-weight: bold; }");
|
||
micStopBtn = new QPushButton("停止识别", this);
|
||
micStopBtn->setStyleSheet("QPushButton { background-color: #9E9E9E; color: white; font-weight: bold; }");
|
||
micStopBtn->setEnabled(false);
|
||
|
||
micLayout->addWidget(new QLabel("实时识别:", this));
|
||
micLayout->addStretch();
|
||
micLayout->addWidget(micRecordBtn);
|
||
micLayout->addWidget(micStopBtn);
|
||
|
||
asrLayout->addLayout(micLayout);
|
||
|
||
// 识别结果
|
||
asrResultEdit = new QTextEdit(this);
|
||
asrResultEdit->setPlaceholderText("识别结果将显示在这里...");
|
||
asrResultEdit->setMinimumHeight(100);
|
||
asrResultEdit->setMaximumHeight(200);
|
||
asrLayout->addWidget(new QLabel("识别结果:", this));
|
||
asrLayout->addWidget(asrResultEdit);
|
||
|
||
// 将ASR组件添加到网格布局的第一行第一列
|
||
gridLayout->addWidget(asrGroup, 0, 0);
|
||
|
||
// TTS部分
|
||
auto* ttsGroup = new QGroupBox("文字转语音 (TTS)", this);
|
||
auto* ttsLayout = new QVBoxLayout(ttsGroup);
|
||
|
||
// 文本输入
|
||
ttsTextEdit = new QTextEdit(this);
|
||
ttsTextEdit->setPlaceholderText("请输入要合成的文本(支持中英文混合)...");
|
||
ttsTextEdit->setMinimumHeight(80);
|
||
ttsTextEdit->setMaximumHeight(120);
|
||
ttsLayout->addWidget(new QLabel("输入文本:", this));
|
||
ttsLayout->addWidget(ttsTextEdit);
|
||
|
||
// TTS设置
|
||
auto* ttsSettingsLayout = new QHBoxLayout();
|
||
|
||
speakerIdSpinBox = new QSpinBox(this);
|
||
speakerIdSpinBox->setRange(0, 100);
|
||
speakerIdSpinBox->setValue(0);
|
||
|
||
auto* synthesizeBtn = new QPushButton("开始合成", this);
|
||
synthesizeBtn->setObjectName("synthesizeBtn");
|
||
synthesizeBtn->setStyleSheet("QPushButton { background-color: #2196F3; color: white; font-weight: bold; }");
|
||
|
||
ttsSettingsLayout->addWidget(new QLabel("说话人ID:", this));
|
||
ttsSettingsLayout->addWidget(speakerIdSpinBox);
|
||
ttsSettingsLayout->addStretch();
|
||
ttsSettingsLayout->addWidget(synthesizeBtn);
|
||
|
||
ttsLayout->addLayout(ttsSettingsLayout);
|
||
|
||
// TTS结果
|
||
ttsResultEdit = new QTextEdit(this);
|
||
ttsResultEdit->setPlaceholderText("合成结果将显示在这里...");
|
||
ttsResultEdit->setMinimumHeight(80);
|
||
ttsResultEdit->setMaximumHeight(120);
|
||
ttsLayout->addWidget(new QLabel("合成结果:", this));
|
||
ttsLayout->addWidget(ttsResultEdit);
|
||
|
||
// 将TTS组件添加到网格布局的第一行第二列
|
||
gridLayout->addWidget(ttsGroup, 0, 1);
|
||
|
||
// 录音功能部分
|
||
auto* recordGroup = new QGroupBox("麦克风录音", this);
|
||
auto* recordLayout = new QVBoxLayout(recordGroup);
|
||
|
||
// 录音设置区域(设备参数)
|
||
auto* recordSettingsGroup = new QGroupBox("录音设置(设备参数)", this);
|
||
auto* recordSettingsLayout = new QHBoxLayout(recordSettingsGroup);
|
||
|
||
// 录音采样率设置
|
||
recordSampleRateComboBox = new QComboBox(this);
|
||
recordSampleRateComboBox->addItem("自动检测最佳", -1);
|
||
recordSampleRateComboBox->addItem("48000 Hz (专业)", 48000);
|
||
recordSampleRateComboBox->addItem("44100 Hz (CD质量)", 44100);
|
||
recordSampleRateComboBox->addItem("22050 Hz", 22050);
|
||
recordSampleRateComboBox->addItem("16000 Hz", 16000);
|
||
recordSampleRateComboBox->setCurrentIndex(0); // 默认自动检测
|
||
recordSampleRateComboBox->setToolTip("选择录音时使用的采样率,自动检测会选择设备支持的最佳格式");
|
||
|
||
// 录音声道设置
|
||
recordChannelComboBox = new QComboBox(this);
|
||
recordChannelComboBox->addItem("自动检测最佳", -1);
|
||
recordChannelComboBox->addItem("立体声 (Stereo)", 2);
|
||
recordChannelComboBox->addItem("单声道 (Mono)", 1);
|
||
recordChannelComboBox->setCurrentIndex(0); // 默认自动检测
|
||
recordChannelComboBox->setToolTip("选择录音时使用的声道数,自动检测会选择设备支持的最佳格式");
|
||
|
||
recordSettingsLayout->addWidget(new QLabel("录音采样率:", this));
|
||
recordSettingsLayout->addWidget(recordSampleRateComboBox);
|
||
recordSettingsLayout->addWidget(new QLabel("录音声道:", this));
|
||
recordSettingsLayout->addWidget(recordChannelComboBox);
|
||
recordSettingsLayout->addStretch();
|
||
|
||
recordLayout->addWidget(recordSettingsGroup);
|
||
|
||
// 输出设置区域(保存格式)
|
||
auto* outputSettingsGroup = new QGroupBox("输出设置(保存格式)", this);
|
||
auto* outputSettingsLayout = new QHBoxLayout(outputSettingsGroup);
|
||
|
||
// 输出采样率设置
|
||
outputSampleRateComboBox = new QComboBox(this);
|
||
outputSampleRateComboBox->addItem("8000 Hz", 8000);
|
||
outputSampleRateComboBox->addItem("16000 Hz (语音识别)", 16000);
|
||
outputSampleRateComboBox->addItem("22050 Hz", 22050);
|
||
outputSampleRateComboBox->addItem("44100 Hz (CD质量)", 44100);
|
||
outputSampleRateComboBox->addItem("48000 Hz (专业)", 48000);
|
||
outputSampleRateComboBox->setCurrentIndex(1); // 默认选择16000 Hz
|
||
outputSampleRateComboBox->setToolTip("选择最终保存文件的采样率");
|
||
|
||
// 输出声道设置
|
||
outputChannelComboBox = new QComboBox(this);
|
||
outputChannelComboBox->addItem("单声道 (Mono)", 1);
|
||
outputChannelComboBox->addItem("立体声 (Stereo)", 2);
|
||
outputChannelComboBox->setCurrentIndex(0); // 默认选择单声道
|
||
outputChannelComboBox->setToolTip("选择最终保存文件的声道数");
|
||
|
||
// 添加预设配置按钮
|
||
auto* presetBtn = new QPushButton("预设", this);
|
||
presetBtn->setToolTip("选择常用输出预设配置");
|
||
presetBtn->setMaximumWidth(60);
|
||
|
||
// 连接预设按钮信号
|
||
connect(presetBtn, &QPushButton::clicked, this, [this, presetBtn]() {
|
||
QMenu* presetMenu = new QMenu(this);
|
||
|
||
QAction* voiceAction = presetMenu->addAction("🎤 语音识别 (16kHz 单声道)");
|
||
connect(voiceAction, &QAction::triggered, this, [this]() {
|
||
outputSampleRateComboBox->setCurrentIndex(1); // 16000 Hz
|
||
outputChannelComboBox->setCurrentIndex(0); // 单声道
|
||
});
|
||
|
||
QAction* musicAction = presetMenu->addAction("🎵 音乐保存 (44.1kHz 立体声)");
|
||
connect(musicAction, &QAction::triggered, this, [this]() {
|
||
outputSampleRateComboBox->setCurrentIndex(3); // 44100 Hz
|
||
outputChannelComboBox->setCurrentIndex(1); // 立体声
|
||
});
|
||
|
||
QAction* professionalAction = presetMenu->addAction("🎙️ 专业保存 (48kHz 立体声)");
|
||
connect(professionalAction, &QAction::triggered, this, [this]() {
|
||
outputSampleRateComboBox->setCurrentIndex(4); // 48000 Hz
|
||
outputChannelComboBox->setCurrentIndex(1); // 立体声
|
||
});
|
||
|
||
QAction* compactAction = presetMenu->addAction("📱 紧凑保存 (22kHz 单声道)");
|
||
connect(compactAction, &QAction::triggered, this, [this]() {
|
||
outputSampleRateComboBox->setCurrentIndex(2); // 22050 Hz
|
||
outputChannelComboBox->setCurrentIndex(0); // 单声道
|
||
});
|
||
|
||
presetMenu->exec(presetBtn->mapToGlobal(QPoint(0, presetBtn->height())));
|
||
presetMenu->deleteLater();
|
||
});
|
||
|
||
outputSettingsLayout->addWidget(new QLabel("输出采样率:", this));
|
||
outputSettingsLayout->addWidget(outputSampleRateComboBox);
|
||
outputSettingsLayout->addWidget(new QLabel("输出声道:", this));
|
||
outputSettingsLayout->addWidget(outputChannelComboBox);
|
||
outputSettingsLayout->addWidget(presetBtn);
|
||
|
||
// 添加文件大小预估标签
|
||
auto* fileSizeLabel = new QLabel(this);
|
||
fileSizeLabel->setStyleSheet("QLabel { color: #888; font-size: 10px; }");
|
||
fileSizeLabel->setObjectName("fileSizeLabel");
|
||
|
||
// 连接设置变化信号来更新文件大小预估
|
||
auto updateFileSizeEstimate = [this, fileSizeLabel]() {
|
||
int sampleRate = outputSampleRateComboBox->currentData().toInt();
|
||
int channels = outputChannelComboBox->currentData().toInt();
|
||
|
||
// 计算每秒的字节数 (采样率 × 声道数 × 2字节/样本)
|
||
int bytesPerSecond = sampleRate * channels * 2;
|
||
double mbPerMinute = (bytesPerSecond * 60.0) / (1024.0 * 1024.0);
|
||
|
||
QString sizeText = QString("预估输出文件大小: ~%1 MB/分钟").arg(mbPerMinute, 0, 'f', 1);
|
||
fileSizeLabel->setText(sizeText);
|
||
};
|
||
|
||
connect(outputSampleRateComboBox, QOverload<int>::of(&QComboBox::currentIndexChanged), updateFileSizeEstimate);
|
||
connect(outputChannelComboBox, QOverload<int>::of(&QComboBox::currentIndexChanged), updateFileSizeEstimate);
|
||
|
||
// 初始计算
|
||
updateFileSizeEstimate();
|
||
|
||
outputSettingsLayout->addWidget(fileSizeLabel);
|
||
outputSettingsLayout->addStretch();
|
||
|
||
recordLayout->addWidget(outputSettingsGroup);
|
||
|
||
// 录音控制按钮
|
||
auto* recordControlLayout = new QHBoxLayout();
|
||
recordBtn = new QPushButton("开始录音", this);
|
||
recordBtn->setStyleSheet("QPushButton { background-color: #E91E63; color: white; font-weight: bold; }");
|
||
recordStopBtn = new QPushButton("停止录音", this);
|
||
recordStopBtn->setStyleSheet("QPushButton { background-color: #9E9E9E; color: white; font-weight: bold; }");
|
||
recordStopBtn->setEnabled(false);
|
||
|
||
recordControlLayout->addWidget(new QLabel("WAV录音:", this));
|
||
recordControlLayout->addStretch();
|
||
recordControlLayout->addWidget(recordBtn);
|
||
recordControlLayout->addWidget(recordStopBtn);
|
||
|
||
recordLayout->addLayout(recordControlLayout);
|
||
|
||
// 录音结果显示
|
||
recordResultEdit = new QTextEdit(this);
|
||
recordResultEdit->setPlaceholderText("录音文件信息将显示在这里...");
|
||
recordResultEdit->setMinimumHeight(80);
|
||
recordResultEdit->setMaximumHeight(120);
|
||
recordLayout->addWidget(new QLabel("录音结果:", this));
|
||
recordLayout->addWidget(recordResultEdit);
|
||
|
||
// 将录音组件添加到网格布局的第二行第一列
|
||
gridLayout->addWidget(recordGroup, 1, 0);
|
||
|
||
// 语音唤醒功能部分
|
||
auto* kwsGroup = new QGroupBox("语音唤醒 (KWS)", this);
|
||
auto* kwsLayout = new QVBoxLayout(kwsGroup);
|
||
|
||
// 语音唤醒控制按钮
|
||
auto* kwsControlLayout = new QHBoxLayout();
|
||
kwsStartBtn = new QPushButton("开始语音唤醒", this);
|
||
kwsStartBtn->setStyleSheet("QPushButton { background-color: #9C27B0; color: white; font-weight: bold; }");
|
||
kwsStopBtn = new QPushButton("停止唤醒", this);
|
||
kwsStopBtn->setStyleSheet("QPushButton { background-color: #9E9E9E; color: white; font-weight: bold; }");
|
||
kwsStopBtn->setEnabled(false);
|
||
|
||
kwsControlLayout->addWidget(new QLabel("关键词检测:", this));
|
||
kwsControlLayout->addStretch();
|
||
kwsControlLayout->addWidget(kwsStartBtn);
|
||
kwsControlLayout->addWidget(kwsStopBtn);
|
||
|
||
kwsLayout->addLayout(kwsControlLayout);
|
||
|
||
// 语音唤醒结果显示
|
||
kwsResultEdit = new QTextEdit(this);
|
||
kwsResultEdit->setPlaceholderText("语音唤醒检测结果将显示在这里...");
|
||
kwsResultEdit->setMinimumHeight(80);
|
||
kwsResultEdit->setMaximumHeight(120);
|
||
kwsLayout->addWidget(new QLabel("唤醒结果:", this));
|
||
kwsLayout->addWidget(kwsResultEdit);
|
||
|
||
// 将语音唤醒组件添加到网格布局的第二行第二列
|
||
gridLayout->addWidget(kwsGroup, 1, 1);
|
||
|
||
// 设置一些示例文本(中英文混合)
|
||
ttsTextEdit->setPlainText("你好,这是语音合成测试。Hello, this is a speech synthesis test. 今天天气很好,适合出门散步。The weather is nice today.");
|
||
}
|
||
|
||
void SpeechTestMainWindow::setupMenuBar() {
|
||
// 创建菜单栏
|
||
QMenuBar* menuBar = this->menuBar();
|
||
|
||
// 文件菜单
|
||
QMenu* fileMenu = menuBar->addMenu("文件(&F)");
|
||
|
||
QAction* exitAction = new QAction("退出(&X)", this);
|
||
exitAction->setShortcut(QKeySequence::Quit);
|
||
connect(exitAction, &QAction::triggered, this, &QWidget::close);
|
||
fileMenu->addAction(exitAction);
|
||
|
||
// 设置菜单
|
||
QMenu* settingsMenu = menuBar->addMenu("设置(&S)");
|
||
|
||
QAction* modelSettingsAction = new QAction("模型设置(&M)...", this);
|
||
modelSettingsAction->setShortcut(QKeySequence("Ctrl+M"));
|
||
modelSettingsAction->setToolTip("配置ASR和TTS模型");
|
||
connect(modelSettingsAction, &QAction::triggered, this, &SpeechTestMainWindow::openModelSettings);
|
||
settingsMenu->addAction(modelSettingsAction);
|
||
|
||
// 帮助菜单
|
||
QMenu* helpMenu = menuBar->addMenu("帮助(&H)");
|
||
|
||
QAction* aboutAction = new QAction("关于(&A)...", this);
|
||
connect(aboutAction, &QAction::triggered, [this]() {
|
||
QMessageBox::about(this, "关于",
|
||
"QSmartAssistant 语音测试工具 v1.0\n\n"
|
||
"基于sherpa-onnx的语音识别和合成工具\n"
|
||
"支持中英文混合语音合成");
|
||
});
|
||
helpMenu->addAction(aboutAction);
|
||
}
|
||
|
||
void SpeechTestMainWindow::createOutputDirectories() {
|
||
// 创建TTS输出目录
|
||
QString ttsOutputDir = QDir::currentPath() + "/tts_output";
|
||
if (!QDir().exists(ttsOutputDir)) {
|
||
QDir().mkpath(ttsOutputDir);
|
||
qDebug() << "创建TTS输出目录:" << ttsOutputDir;
|
||
}
|
||
|
||
// 创建录音输出目录
|
||
QString recordOutputDir = QDir::currentPath() + "/recordings";
|
||
if (!QDir().exists(recordOutputDir)) {
|
||
QDir().mkpath(recordOutputDir);
|
||
qDebug() << "创建录音输出目录:" << recordOutputDir;
|
||
}
|
||
}
|
||
|
||
void SpeechTestMainWindow::connectSignals() {
|
||
// 通过对象名称查找按钮并连接信号
|
||
QPushButton* browseBtn = findChild<QPushButton*>("browseBtn");
|
||
QPushButton* recognizeBtn = findChild<QPushButton*>("recognizeBtn");
|
||
QPushButton* synthesizeBtn = findChild<QPushButton*>("synthesizeBtn");
|
||
|
||
if (browseBtn) {
|
||
connect(browseBtn, &QPushButton::clicked, this, &SpeechTestMainWindow::browseFile);
|
||
}
|
||
if (recognizeBtn) {
|
||
connect(recognizeBtn, &QPushButton::clicked, this, &SpeechTestMainWindow::startRecognition);
|
||
}
|
||
if (synthesizeBtn) {
|
||
connect(synthesizeBtn, &QPushButton::clicked, this, &SpeechTestMainWindow::startSynthesis);
|
||
}
|
||
|
||
// 连接麦克风按钮信号
|
||
connect(micRecordBtn, &QPushButton::clicked, this, &SpeechTestMainWindow::startMicRecognition);
|
||
connect(micStopBtn, &QPushButton::clicked, this, &SpeechTestMainWindow::stopMicRecognition);
|
||
|
||
// 连接录音按钮信号
|
||
connect(recordBtn, &QPushButton::clicked, this, &SpeechTestMainWindow::startRecording);
|
||
connect(recordStopBtn, &QPushButton::clicked, this, &SpeechTestMainWindow::stopRecording);
|
||
|
||
// 连接语音唤醒按钮信号
|
||
connect(kwsStartBtn, &QPushButton::clicked, this, &SpeechTestMainWindow::startKWS);
|
||
connect(kwsStopBtn, &QPushButton::clicked, this, &SpeechTestMainWindow::stopKWS);
|
||
}
|
||
|
||
void SpeechTestMainWindow::browseFile() {
|
||
QString fileName = QFileDialog::getOpenFileName(
|
||
this, "选择WAV音频文件", "", "WAV Files (*.wav)");
|
||
if (!fileName.isEmpty()) {
|
||
filePathEdit->setText(fileName);
|
||
}
|
||
}
|
||
|
||
void SpeechTestMainWindow::startRecognition() {
|
||
QString filePath = filePathEdit->text().trimmed();
|
||
if (filePath.isEmpty()) {
|
||
QMessageBox::warning(this, "警告", "请先选择音频文件");
|
||
return;
|
||
}
|
||
|
||
if (!QFile::exists(filePath)) {
|
||
QMessageBox::warning(this, "警告", "文件不存在: " + filePath);
|
||
return;
|
||
}
|
||
|
||
if (!asrManager->isInitialized()) {
|
||
QMessageBox::critical(this, "错误", "ASR模型未初始化");
|
||
return;
|
||
}
|
||
|
||
asrResultEdit->clear();
|
||
asrResultEdit->append("正在识别,请稍候...");
|
||
statusBar()->showMessage("正在进行语音识别...");
|
||
|
||
// 使用QTimer延迟执行,避免界面卡顿
|
||
QTimer::singleShot(100, this, [this, filePath]() {
|
||
QString result = asrManager->recognizeWavFile(filePath);
|
||
asrResultEdit->clear();
|
||
asrResultEdit->append("识别结果: " + result);
|
||
statusBar()->showMessage("语音识别完成");
|
||
});
|
||
}
|
||
|
||
void SpeechTestMainWindow::startSynthesis() {
|
||
QString text = ttsTextEdit->toPlainText().trimmed();
|
||
if (text.isEmpty()) {
|
||
QMessageBox::warning(this, "警告", "请输入要合成的文本");
|
||
return;
|
||
}
|
||
|
||
if (!ttsManager->isInitialized()) {
|
||
QMessageBox::critical(this, "错误", "TTS模型未初始化");
|
||
return;
|
||
}
|
||
|
||
int speakerId = speakerIdSpinBox->value();
|
||
|
||
// 创建项目目录下的输出文件夹
|
||
QString outputDir = QDir::currentPath() + "/tts_output";
|
||
QDir().mkpath(outputDir);
|
||
|
||
QString outputPath = outputDir + "/tts_" +
|
||
QDateTime::currentDateTime().toString("yyyyMMdd_hhmmss") +
|
||
"_speaker" + QString::number(speakerId) + ".wav";
|
||
|
||
ttsResultEdit->clear();
|
||
ttsResultEdit->append("正在合成,请稍候...");
|
||
statusBar()->showMessage("正在进行语音合成...");
|
||
|
||
// 使用QTimer延迟执行,避免界面卡顿
|
||
QTimer::singleShot(100, this, [this, text, speakerId, outputPath]() {
|
||
bool success = ttsManager->synthesizeText(text, speakerId, outputPath);
|
||
|
||
ttsResultEdit->clear();
|
||
if (success) {
|
||
ttsResultEdit->append("语音合成成功");
|
||
|
||
// 显示相对路径,更简洁
|
||
QString relativePath = QDir::current().relativeFilePath(outputPath);
|
||
ttsResultEdit->append("输出文件: " + relativePath);
|
||
ttsResultEdit->append("完整路径: " + outputPath);
|
||
|
||
statusBar()->showMessage("语音合成完成,保存至: " + relativePath);
|
||
|
||
// 询问是否播放
|
||
int ret = QMessageBox::question(this, "合成完成",
|
||
"语音合成完成!是否要播放生成的音频?\n\n文件位置: " + outputPath,
|
||
QMessageBox::Yes | QMessageBox::No);
|
||
|
||
if (ret == QMessageBox::Yes) {
|
||
// 在macOS上使用afplay播放音频
|
||
QProcess::startDetached("afplay", QStringList() << outputPath);
|
||
}
|
||
} else {
|
||
ttsResultEdit->append("语音合成失败");
|
||
statusBar()->showMessage("语音合成失败");
|
||
}
|
||
});
|
||
}
|
||
|
||
void SpeechTestMainWindow::startMicRecognition() {
|
||
if (!asrManager->isOnlineInitialized()) {
|
||
QMessageBox::information(this, "功能不可用",
|
||
"在线识别模型未初始化。\n"
|
||
"请确保sherpa-onnx-streaming-paraformer-bilingual-zh-en模型已正确安装。");
|
||
return;
|
||
}
|
||
|
||
if (isRecording) {
|
||
return;
|
||
}
|
||
|
||
// 提示用户检查麦克风权限
|
||
qDebug() << "开始麦克风识别,请确保已授予麦克风权限";
|
||
|
||
// 获取默认音频设备
|
||
QAudioDevice defaultDevice = QMediaDevices::defaultAudioInput();
|
||
qDebug() << "默认音频设备:" << defaultDevice.description();
|
||
qDebug() << "设备ID:" << defaultDevice.id();
|
||
|
||
// 首先尝试使用设备的首选格式
|
||
QAudioFormat preferredFormat = defaultDevice.preferredFormat();
|
||
qDebug() << "设备首选格式 - 采样率:" << preferredFormat.sampleRate()
|
||
<< "声道:" << preferredFormat.channelCount()
|
||
<< "格式:" << static_cast<int>(preferredFormat.sampleFormat());
|
||
|
||
// 使用设备支持的最佳格式进行录制,然后转换为16kHz单声道
|
||
QAudioFormat format;
|
||
|
||
// 优先尝试高质量格式
|
||
QList<int> preferredSampleRates = {48000, 44100, 22050, 16000};
|
||
QList<int> preferredChannels = {2, 1}; // 优先立体声
|
||
QList<QAudioFormat::SampleFormat> preferredFormats = {QAudioFormat::Int16, QAudioFormat::Float};
|
||
|
||
bool formatFound = false;
|
||
|
||
// 寻找设备支持的最佳格式
|
||
for (int sampleRate : preferredSampleRates) {
|
||
for (int channels : preferredChannels) {
|
||
for (QAudioFormat::SampleFormat sampleFormat : preferredFormats) {
|
||
format.setSampleRate(sampleRate);
|
||
format.setChannelCount(channels);
|
||
format.setSampleFormat(sampleFormat);
|
||
|
||
if (defaultDevice.isFormatSupported(format)) {
|
||
qDebug() << "找到最佳支持格式 - 采样率:" << sampleRate
|
||
<< "声道:" << channels
|
||
<< "格式:" << static_cast<int>(sampleFormat);
|
||
formatFound = true;
|
||
break;
|
||
}
|
||
}
|
||
if (formatFound) break;
|
||
}
|
||
if (formatFound) break;
|
||
}
|
||
|
||
if (!formatFound) {
|
||
// 如果都不支持,使用设备首选格式
|
||
format = preferredFormat;
|
||
qDebug() << "使用设备首选格式";
|
||
}
|
||
|
||
qDebug() << "最终使用的音频格式 - 采样率:" << format.sampleRate()
|
||
<< "声道:" << format.channelCount()
|
||
<< "格式:" << static_cast<int>(format.sampleFormat());
|
||
|
||
// 创建在线流
|
||
onlineStream = asrManager->createOnlineStream();
|
||
if (!onlineStream) {
|
||
QMessageBox::critical(this, "错误", "无法创建在线识别流");
|
||
return;
|
||
}
|
||
qDebug() << "在线识别流创建成功";
|
||
|
||
// 保存音频格式信息用于后续处理
|
||
currentAudioFormat = format;
|
||
originalSampleRate = format.sampleRate();
|
||
originalChannelCount = format.channelCount();
|
||
|
||
// 创建音频源 - 使用更保守的设置
|
||
audioSource = new QAudioSource(defaultDevice, format, this);
|
||
|
||
// 使用较小的缓冲区,有时大缓冲区会导致问题
|
||
audioSource->setBufferSize(4096);
|
||
|
||
// 设置音量
|
||
audioSource->setVolume(1.0);
|
||
|
||
// 连接状态变化信号
|
||
connect(audioSource, &QAudioSource::stateChanged, this, [this](QAudio::State state) {
|
||
qDebug() << "音频源状态变化:" << state;
|
||
if (state == QAudio::StoppedState) {
|
||
qDebug() << "音频源错误:" << audioSource->error();
|
||
} else if (state == QAudio::ActiveState) {
|
||
qDebug() << "音频源已激活!";
|
||
}
|
||
});
|
||
|
||
qDebug() << "尝试启动音频输入...";
|
||
|
||
// 启动音频输入
|
||
audioDevice = audioSource->start();
|
||
if (!audioDevice) {
|
||
qDebug() << "第一次启动失败,尝试其他方法...";
|
||
|
||
// 尝试使用pull模式
|
||
QByteArray buffer;
|
||
buffer.resize(4096);
|
||
audioDevice = audioSource->start();
|
||
|
||
if (!audioDevice) {
|
||
QMessageBox::critical(this, "错误", "无法启动音频输入,请检查麦克风权限");
|
||
asrManager->destroyOnlineStream(onlineStream);
|
||
onlineStream = nullptr;
|
||
delete audioSource;
|
||
audioSource = nullptr;
|
||
return;
|
||
}
|
||
}
|
||
|
||
qDebug() << "音频输入启动成功";
|
||
qDebug() << "初始音频源状态:" << audioSource->state();
|
||
qDebug() << "音频源错误:" << audioSource->error();
|
||
qDebug() << "缓冲区大小:" << audioSource->bufferSize();
|
||
|
||
// 等待音频源状态稳定并进行测试
|
||
QTimer::singleShot(200, this, [this]() {
|
||
if (audioSource) {
|
||
qDebug() << "音频源最终状态:" << audioSource->state();
|
||
qDebug() << "音频源错误状态:" << audioSource->error();
|
||
|
||
// 尝试强制激活音频源
|
||
if (audioSource->state() == QAudio::IdleState) {
|
||
qDebug() << "音频源处于空闲状态,尝试多种激活方法...";
|
||
|
||
// 方法1:暂停和恢复
|
||
audioSource->suspend();
|
||
QTimer::singleShot(50, this, [this]() {
|
||
if (audioSource) {
|
||
audioSource->resume();
|
||
qDebug() << "方法1恢复后状态:" << audioSource->state();
|
||
|
||
// 方法2:如果仍然是IdleState,尝试重新创建
|
||
if (audioSource->state() == QAudio::IdleState) {
|
||
QTimer::singleShot(100, this, [this]() {
|
||
if (audioSource) {
|
||
qDebug() << "尝试重新创建音频源...";
|
||
audioSource->stop();
|
||
delete audioSource;
|
||
|
||
// 重新创建音频源
|
||
QAudioDevice device = QMediaDevices::defaultAudioInput();
|
||
audioSource = new QAudioSource(device, currentAudioFormat, this);
|
||
audioSource->setBufferSize(16384);
|
||
|
||
// 重新连接信号
|
||
connect(audioSource, &QAudioSource::stateChanged, this, [this](QAudio::State state) {
|
||
qDebug() << "重新创建后音频源状态变化:" << state;
|
||
});
|
||
|
||
audioDevice = audioSource->start();
|
||
qDebug() << "重新创建后音频源状态:" << audioSource->state();
|
||
}
|
||
});
|
||
}
|
||
}
|
||
});
|
||
}
|
||
|
||
// 显示麦克风权限提示
|
||
if (audioSource->state() != QAudio::ActiveState) {
|
||
statusBar()->showMessage("提示:如果没有声音输入,请检查系统设置中的麦克风权限");
|
||
asrResultEdit->append("提示:请确保已在系统设置 → 安全性与隐私 → 麦克风中授予权限");
|
||
}
|
||
}
|
||
});
|
||
|
||
// 创建定时器读取音频数据
|
||
audioTimer = new QTimer(this);
|
||
connect(audioTimer, &QTimer::timeout, this, &SpeechTestMainWindow::processAudioData);
|
||
audioTimer->start(100); // 每100ms处理一次音频数据
|
||
|
||
// 添加一个备用定时器,用于强制检查音频状态
|
||
QTimer* statusTimer = new QTimer(this);
|
||
connect(statusTimer, &QTimer::timeout, this, [this]() {
|
||
if (audioSource && isRecording) {
|
||
static int checkCount = 0;
|
||
checkCount++;
|
||
|
||
if (checkCount % 10 == 0) { // 每秒检查一次
|
||
qDebug() << "状态检查 - 音频源状态:" << audioSource->state()
|
||
<< "错误:" << audioSource->error()
|
||
<< "可用字节:" << (audioDevice ? audioDevice->bytesAvailable() : 0);
|
||
|
||
// 如果长时间处于IdleState,尝试重新启动
|
||
if (audioSource->state() == QAudio::IdleState && checkCount > 50) {
|
||
qDebug() << "长时间空闲,尝试重新启动音频源...";
|
||
audioSource->stop();
|
||
QTimer::singleShot(100, this, [this]() {
|
||
if (audioSource && isRecording) {
|
||
audioDevice = audioSource->start();
|
||
}
|
||
});
|
||
checkCount = 0;
|
||
}
|
||
}
|
||
}
|
||
});
|
||
statusTimer->start(100);
|
||
|
||
isRecording = true;
|
||
micRecordBtn->setEnabled(false);
|
||
micStopBtn->setEnabled(true);
|
||
micRecordBtn->setText("识别中...");
|
||
|
||
asrResultEdit->clear();
|
||
asrResultEdit->append("开始麦克风识别,请说话...");
|
||
statusBar()->showMessage("正在进行麦克风识别...");
|
||
|
||
qDebug() << "麦克风识别已启动";
|
||
}
|
||
|
||
void SpeechTestMainWindow::stopMicRecognition() {
|
||
if (!isRecording) {
|
||
return;
|
||
}
|
||
|
||
isRecording = false;
|
||
|
||
// 停止音频输入
|
||
if (audioSource) {
|
||
audioSource->stop();
|
||
delete audioSource;
|
||
audioSource = nullptr;
|
||
}
|
||
|
||
// 停止定时器
|
||
if (audioTimer) {
|
||
audioTimer->stop();
|
||
delete audioTimer;
|
||
audioTimer = nullptr;
|
||
}
|
||
|
||
// 获取最终识别结果
|
||
if (onlineStream) {
|
||
asrManager->inputFinished(onlineStream);
|
||
|
||
// 等待最后的识别结果
|
||
QTimer::singleShot(500, this, [this]() {
|
||
if (onlineStream) {
|
||
QString finalText = asrManager->getStreamResult(onlineStream);
|
||
if (!finalText.isEmpty()) {
|
||
asrResultEdit->append("最终识别结果: " + finalText);
|
||
}
|
||
|
||
asrManager->destroyOnlineStream(onlineStream);
|
||
onlineStream = nullptr;
|
||
}
|
||
});
|
||
}
|
||
|
||
micRecordBtn->setEnabled(true);
|
||
micStopBtn->setEnabled(false);
|
||
micRecordBtn->setText("开始麦克风识别");
|
||
|
||
statusBar()->showMessage("麦克风识别已停止");
|
||
qDebug() << "麦克风识别已停止";
|
||
}
|
||
|
||
void SpeechTestMainWindow::processAudioData() {
|
||
if (!audioDevice || !onlineStream || !isRecording) {
|
||
return;
|
||
}
|
||
|
||
// 检查音频源状态,但不立即返回
|
||
if (audioSource->state() != QAudio::ActiveState) {
|
||
static int idleCount = 0;
|
||
idleCount++;
|
||
if (idleCount % 50 == 0) { // 每50次输出一次警告
|
||
qDebug() << "音频源状态异常:" << audioSource->state() << "错误:" << audioSource->error();
|
||
}
|
||
|
||
// 尝试重新启动音频源
|
||
if (idleCount > 100 && audioSource->state() == QAudio::IdleState) {
|
||
qDebug() << "尝试重新启动音频源...";
|
||
audioSource->stop();
|
||
audioDevice = audioSource->start();
|
||
idleCount = 0;
|
||
}
|
||
|
||
// 即使状态异常,也尝试读取数据
|
||
}
|
||
|
||
// 强制读取音频数据,即使状态不是Active
|
||
QByteArray audioData;
|
||
|
||
if (audioDevice) {
|
||
audioData = audioDevice->readAll();
|
||
|
||
// 如果没有数据,尝试直接从音频源读取
|
||
if (audioData.isEmpty() && audioSource) {
|
||
qint64 bytesAvailable = audioDevice->bytesAvailable();
|
||
if (bytesAvailable > 0) {
|
||
audioData = audioDevice->read(std::min(bytesAvailable, qint64(4096)));
|
||
}
|
||
}
|
||
}
|
||
|
||
if (audioData.isEmpty()) {
|
||
return;
|
||
}
|
||
|
||
static int totalSamples = 0;
|
||
static int callCount = 0;
|
||
callCount++;
|
||
|
||
// 每100次调用输出一次调试信息
|
||
if (callCount % 100 == 0) {
|
||
qDebug() << "原始音频数据 - 调用次数:" << callCount
|
||
<< "数据大小:" << audioData.size() << "字节"
|
||
<< "格式:" << currentAudioFormat.sampleRate() << "Hz"
|
||
<< currentAudioFormat.channelCount() << "声道";
|
||
}
|
||
|
||
// 定义目标格式(语音识别需要的格式)
|
||
QAudioFormat targetFormat;
|
||
targetFormat.setSampleRate(16000);
|
||
targetFormat.setChannelCount(1);
|
||
targetFormat.setSampleFormat(QAudioFormat::Float);
|
||
|
||
// 使用音频格式转换方法
|
||
QByteArray convertedData = convertAudioFormat(audioData, currentAudioFormat, targetFormat);
|
||
|
||
if (convertedData.isEmpty()) {
|
||
return;
|
||
}
|
||
|
||
// 转换后的数据已经是16kHz单声道浮点格式
|
||
const float* samples = reinterpret_cast<const float*>(convertedData.data());
|
||
int sampleCount = convertedData.size() / sizeof(float);
|
||
|
||
totalSamples += sampleCount;
|
||
|
||
if (callCount % 100 == 0) {
|
||
qDebug() << "转换后音频数据 - 样本数:" << sampleCount
|
||
<< "总样本数:" << totalSamples;
|
||
}
|
||
|
||
// 发送音频数据到识别器
|
||
if (sampleCount > 0) {
|
||
asrManager->acceptWaveform(onlineStream, samples, sampleCount);
|
||
}
|
||
|
||
// 检查是否有识别结果
|
||
int decodeCount = 0;
|
||
while (asrManager->isStreamReady(onlineStream)) {
|
||
asrManager->decodeStream(onlineStream);
|
||
decodeCount++;
|
||
if (decodeCount > 10) break; // 防止无限循环
|
||
}
|
||
|
||
// 获取部分识别结果
|
||
QString partialText = asrManager->getStreamResult(onlineStream);
|
||
if (!partialText.isEmpty()) {
|
||
qDebug() << "识别到文本:" << partialText;
|
||
|
||
// 更新显示(这里显示实时识别结果)
|
||
statusBar()->showMessage("识别中: " + partialText);
|
||
|
||
// 检查是否检测到端点
|
||
if (asrManager->isEndpoint(onlineStream)) {
|
||
asrResultEdit->append("识别片段: " + partialText);
|
||
qDebug() << "检测到端点,重置流";
|
||
|
||
// 重置流以继续识别
|
||
asrManager->destroyOnlineStream(onlineStream);
|
||
onlineStream = asrManager->createOnlineStream();
|
||
}
|
||
} else {
|
||
// 即使没有文本,也显示正在处理的状态
|
||
if (callCount % 20 == 0) { // 每20次调用更新一次状态
|
||
// 计算音频电平
|
||
float maxLevel = 0.0f;
|
||
for (int i = 0; i < sampleCount; i++) {
|
||
maxLevel = std::max(maxLevel, std::abs(samples[i]));
|
||
}
|
||
|
||
QString statusMsg = QString("正在监听... (样本: %1, 电平: %2)")
|
||
.arg(totalSamples)
|
||
.arg(maxLevel, 0, 'f', 3);
|
||
statusBar()->showMessage(statusMsg);
|
||
|
||
// 如果检测到音频信号
|
||
if (maxLevel > 0.01f) {
|
||
qDebug() << "检测到音频信号,电平:" << maxLevel;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
void SpeechTestMainWindow::openModelSettings() {
|
||
ModelSettingsDialog dialog(this);
|
||
|
||
// 设置当前配置
|
||
ModelConfig offlineAsrConfig;
|
||
offlineAsrConfig.modelPath = ""; // 从ASRManager获取当前配置
|
||
dialog.setCurrentOfflineASRConfig(offlineAsrConfig);
|
||
|
||
ModelConfig onlineAsrConfig;
|
||
onlineAsrConfig.modelPath = ""; // 从ASRManager获取当前配置
|
||
dialog.setCurrentOnlineASRConfig(onlineAsrConfig);
|
||
|
||
ModelConfig kwsConfig;
|
||
kwsConfig.modelPath = ""; // 从KWS管理器获取当前配置
|
||
dialog.setCurrentKWSConfig(kwsConfig);
|
||
|
||
ModelConfig ttsConfig;
|
||
ttsConfig.modelPath = ""; // 从TTSManager获取当前配置
|
||
dialog.setCurrentTTSConfig(ttsConfig);
|
||
|
||
// 连接信号
|
||
connect(&dialog, &ModelSettingsDialog::modelsChanged,
|
||
this, &SpeechTestMainWindow::onModelsChanged);
|
||
|
||
dialog.exec();
|
||
}
|
||
|
||
void SpeechTestMainWindow::onModelsChanged() {
|
||
// 重新初始化模型
|
||
reinitializeModels();
|
||
|
||
// 更新状态栏
|
||
bool asrOk = asrManager->isInitialized();
|
||
bool ttsOk = ttsManager->isInitialized();
|
||
bool kwsOk = kwsManager->isInitialized();
|
||
|
||
if (asrOk && ttsOk && kwsOk) {
|
||
QString modelInfo = ttsManager->isMultilingualModel() ? "(支持中英文混合)" : "(仅支持中文)";
|
||
QString micInfo = asrManager->isOnlineInitialized() ? "" : ",麦克风识别暂不可用";
|
||
QString kwsInfo = kwsOk ? ",语音唤醒可用" : ",语音唤醒不可用";
|
||
statusBar()->showMessage("模型重新加载成功 " + modelInfo + micInfo + kwsInfo);
|
||
} else {
|
||
statusBar()->showMessage("模型重新加载失败");
|
||
}
|
||
}
|
||
|
||
void SpeechTestMainWindow::reinitializeModels() {
|
||
// 如果KWS正在运行,先停止它
|
||
bool wasKWSActive = isKWSActive;
|
||
if (isKWSActive) {
|
||
stopKWS();
|
||
}
|
||
|
||
// 重新初始化ASR管理器
|
||
bool asrOk = asrManager->initialize();
|
||
|
||
// 重新初始化TTS管理器
|
||
bool ttsOk = ttsManager->initialize();
|
||
|
||
// 重新初始化KWS管理器
|
||
bool kwsOk = kwsManager->initialize();
|
||
|
||
// 尝试初始化在线识别器
|
||
asrManager->initializeOnlineRecognizer();
|
||
|
||
qDebug() << "模型重新初始化 - ASR:" << (asrOk ? "成功" : "失败")
|
||
<< "TTS:" << (ttsOk ? "成功" : "失败")
|
||
<< "KWS:" << (kwsOk ? "成功" : "失败");
|
||
|
||
// 如果之前KWS是激活的,重新启动它
|
||
if (wasKWSActive && kwsOk) {
|
||
QTimer::singleShot(1000, this, &SpeechTestMainWindow::startKWS);
|
||
qDebug() << "将在1秒后重新启动KWS";
|
||
}
|
||
}
|
||
|
||
|
||
|
||
void SpeechTestMainWindow::startRecording() {
|
||
if (isRecordingWav) {
|
||
return;
|
||
}
|
||
|
||
// 检查是否正在进行语音识别
|
||
if (isRecording) {
|
||
QMessageBox::information(this, "提示", "请先停止语音识别再开始录音");
|
||
return;
|
||
}
|
||
|
||
qDebug() << "开始WAV录音";
|
||
|
||
// 获取默认音频设备
|
||
QAudioDevice defaultDevice = QMediaDevices::defaultAudioInput();
|
||
qDebug() << "录音设备:" << defaultDevice.description();
|
||
|
||
// 获取录音设置(设备参数)
|
||
int recordSampleRate = recordSampleRateComboBox->currentData().toInt();
|
||
int recordChannels = recordChannelComboBox->currentData().toInt();
|
||
|
||
// 获取输出设置(保存格式)
|
||
int outputSampleRate = outputSampleRateComboBox->currentData().toInt();
|
||
int outputChannels = outputChannelComboBox->currentData().toInt();
|
||
|
||
qDebug() << "录音设置 - 采样率:" << recordSampleRate << "Hz, 声道:" << recordChannels;
|
||
qDebug() << "输出设置 - 采样率:" << outputSampleRate << "Hz, 声道:" << outputChannels;
|
||
|
||
// 确定实际录音格式
|
||
QAudioFormat deviceOptimalFormat;
|
||
|
||
if (recordSampleRate == -1 || recordChannels == -1) {
|
||
// 自动检测设备最佳格式
|
||
qDebug() << "自动检测设备最佳录音格式...";
|
||
|
||
QList<int> deviceSampleRates = {48000, 44100, 22050, 16000};
|
||
QList<int> deviceChannels = {2, 1};
|
||
QList<QAudioFormat::SampleFormat> deviceFormats = {QAudioFormat::Int16, QAudioFormat::Float};
|
||
|
||
bool foundDeviceFormat = false;
|
||
for (int sampleRate : deviceSampleRates) {
|
||
for (int channels : deviceChannels) {
|
||
for (QAudioFormat::SampleFormat format : deviceFormats) {
|
||
deviceOptimalFormat.setSampleRate(sampleRate);
|
||
deviceOptimalFormat.setChannelCount(channels);
|
||
deviceOptimalFormat.setSampleFormat(format);
|
||
|
||
if (defaultDevice.isFormatSupported(deviceOptimalFormat)) {
|
||
qDebug() << "找到设备最佳格式:" << sampleRate << "Hz,"
|
||
<< channels << "声道," << static_cast<int>(format);
|
||
foundDeviceFormat = true;
|
||
break;
|
||
}
|
||
}
|
||
if (foundDeviceFormat) break;
|
||
}
|
||
if (foundDeviceFormat) break;
|
||
}
|
||
|
||
if (!foundDeviceFormat) {
|
||
deviceOptimalFormat = defaultDevice.preferredFormat();
|
||
qDebug() << "使用设备首选格式";
|
||
}
|
||
} else {
|
||
// 使用用户指定的录音格式
|
||
deviceOptimalFormat.setSampleRate(recordSampleRate);
|
||
deviceOptimalFormat.setChannelCount(recordChannels);
|
||
deviceOptimalFormat.setSampleFormat(QAudioFormat::Int16);
|
||
|
||
// 检查用户指定格式是否被支持
|
||
if (!defaultDevice.isFormatSupported(deviceOptimalFormat)) {
|
||
qDebug() << "用户指定的录音格式不被支持,自动寻找最佳格式...";
|
||
|
||
// 回退到自动检测
|
||
QList<int> deviceSampleRates = {recordSampleRate, 48000, 44100, 22050, 16000};
|
||
QList<int> deviceChannels = {recordChannels, 2, 1};
|
||
QList<QAudioFormat::SampleFormat> deviceFormats = {QAudioFormat::Int16, QAudioFormat::Float};
|
||
|
||
bool foundDeviceFormat = false;
|
||
for (int sampleRate : deviceSampleRates) {
|
||
for (int channels : deviceChannels) {
|
||
for (QAudioFormat::SampleFormat format : deviceFormats) {
|
||
deviceOptimalFormat.setSampleRate(sampleRate);
|
||
deviceOptimalFormat.setChannelCount(channels);
|
||
deviceOptimalFormat.setSampleFormat(format);
|
||
|
||
if (defaultDevice.isFormatSupported(deviceOptimalFormat)) {
|
||
qDebug() << "找到兼容格式:" << sampleRate << "Hz,"
|
||
<< channels << "声道," << static_cast<int>(format);
|
||
foundDeviceFormat = true;
|
||
break;
|
||
}
|
||
}
|
||
if (foundDeviceFormat) break;
|
||
}
|
||
if (foundDeviceFormat) break;
|
||
}
|
||
|
||
if (!foundDeviceFormat) {
|
||
deviceOptimalFormat = defaultDevice.preferredFormat();
|
||
qDebug() << "使用设备首选格式";
|
||
}
|
||
}
|
||
}
|
||
|
||
// 使用确定的设备格式进行录制
|
||
recordAudioFormat = deviceOptimalFormat;
|
||
|
||
// 检查格式支持并智能降级
|
||
QString formatInfo = QString("尝试格式: %1 Hz, %2声道")
|
||
.arg(recordAudioFormat.sampleRate())
|
||
.arg(recordAudioFormat.channelCount() == 1 ? "单" : "立体");
|
||
qDebug() << formatInfo;
|
||
|
||
if (!defaultDevice.isFormatSupported(recordAudioFormat)) {
|
||
qDebug() << "设备不支持选择的格式,尝试降级...";
|
||
|
||
// 如果是立体声,尝试单声道
|
||
if (recordAudioFormat.channelCount() == 2) {
|
||
recordAudioFormat.setChannelCount(1);
|
||
qDebug() << "尝试单声道格式";
|
||
|
||
if (!defaultDevice.isFormatSupported(recordAudioFormat)) {
|
||
// 尝试降低采样率
|
||
QList<int> fallbackRates = {44100, 22050, 16000, 8000};
|
||
bool foundSupported = false;
|
||
|
||
for (int rate : fallbackRates) {
|
||
if (rate < recordSampleRate) {
|
||
recordAudioFormat.setSampleRate(rate);
|
||
if (defaultDevice.isFormatSupported(recordAudioFormat)) {
|
||
qDebug() << "降级到采样率:" << rate << "Hz";
|
||
foundSupported = true;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
if (!foundSupported) {
|
||
// 最后使用设备首选格式
|
||
recordAudioFormat = defaultDevice.preferredFormat();
|
||
qDebug() << "使用设备首选录音格式";
|
||
}
|
||
}
|
||
} else {
|
||
// 单声道情况下,尝试降低采样率
|
||
QList<int> fallbackRates = {44100, 22050, 16000, 8000};
|
||
bool foundSupported = false;
|
||
|
||
for (int rate : fallbackRates) {
|
||
if (rate < recordSampleRate) {
|
||
recordAudioFormat.setSampleRate(rate);
|
||
if (defaultDevice.isFormatSupported(recordAudioFormat)) {
|
||
qDebug() << "降级到采样率:" << rate << "Hz";
|
||
foundSupported = true;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
if (!foundSupported) {
|
||
recordAudioFormat = defaultDevice.preferredFormat();
|
||
qDebug() << "使用设备首选录音格式";
|
||
}
|
||
}
|
||
|
||
// 显示实际使用的格式
|
||
QString actualFormat = QString("实际使用格式: %1 Hz, %2声道")
|
||
.arg(recordAudioFormat.sampleRate())
|
||
.arg(recordAudioFormat.channelCount() == 1 ? "单" : "立体");
|
||
qDebug() << actualFormat;
|
||
|
||
// 如果格式发生了变化,通知用户
|
||
if (recordAudioFormat.sampleRate() != recordSampleRate ||
|
||
recordAudioFormat.channelCount() != recordChannels) {
|
||
|
||
recordResultEdit->append("注意:设备不支持选择的格式,已自动调整");
|
||
}
|
||
}
|
||
|
||
qDebug() << "录音格式 - 采样率:" << recordAudioFormat.sampleRate()
|
||
<< "声道:" << recordAudioFormat.channelCount()
|
||
<< "格式:" << static_cast<int>(recordAudioFormat.sampleFormat());
|
||
|
||
// 创建输出文件路径
|
||
QString outputDir = QDir::currentPath() + "/recordings";
|
||
QDir().mkpath(outputDir);
|
||
|
||
currentRecordingPath = outputDir + "/recording_" +
|
||
QDateTime::currentDateTime().toString("yyyyMMdd_hhmmss") +
|
||
".wav";
|
||
|
||
// 清空录音数据缓冲区
|
||
recordedData.clear();
|
||
|
||
// 创建音频源
|
||
recordAudioSource = new QAudioSource(defaultDevice, recordAudioFormat, this);
|
||
recordAudioSource->setBufferSize(8192);
|
||
recordAudioSource->setVolume(1.0);
|
||
|
||
// 连接状态变化信号
|
||
connect(recordAudioSource, &QAudioSource::stateChanged, this, [this](QAudio::State state) {
|
||
qDebug() << "录音音频源状态变化:" << state;
|
||
if (state == QAudio::StoppedState) {
|
||
qDebug() << "录音音频源错误:" << recordAudioSource->error();
|
||
} else if (state == QAudio::ActiveState) {
|
||
qDebug() << "录音音频源已激活!";
|
||
}
|
||
});
|
||
|
||
// 启动音频输入
|
||
recordAudioDevice = recordAudioSource->start();
|
||
if (!recordAudioDevice) {
|
||
QMessageBox::critical(this, "错误", "无法启动录音,请检查麦克风权限");
|
||
delete recordAudioSource;
|
||
recordAudioSource = nullptr;
|
||
return;
|
||
}
|
||
|
||
// 创建定时器读取音频数据
|
||
recordTimer = new QTimer(this);
|
||
connect(recordTimer, &QTimer::timeout, this, &SpeechTestMainWindow::processRecordingData);
|
||
recordTimer->start(100); // 每100ms处理一次音频数据
|
||
|
||
isRecordingWav = true;
|
||
recordBtn->setEnabled(false);
|
||
recordStopBtn->setEnabled(true);
|
||
recordBtn->setText("录音中...");
|
||
|
||
// 录音期间禁用设置选项
|
||
recordSampleRateComboBox->setEnabled(false);
|
||
recordChannelComboBox->setEnabled(false);
|
||
outputSampleRateComboBox->setEnabled(false);
|
||
outputChannelComboBox->setEnabled(false);
|
||
|
||
recordResultEdit->clear();
|
||
recordResultEdit->append("开始录音,请说话...");
|
||
recordResultEdit->append(QString("录音格式: %1 Hz, %2")
|
||
.arg(recordAudioFormat.sampleRate())
|
||
.arg(recordAudioFormat.channelCount() == 1 ? "单声道" : "立体声"));
|
||
recordResultEdit->append(QString("输出格式: %1 Hz, %2")
|
||
.arg(outputSampleRate)
|
||
.arg(outputChannels == 1 ? "单声道" : "立体声"));
|
||
recordResultEdit->append("输出文件: " + QDir::current().relativeFilePath(currentRecordingPath));
|
||
|
||
statusBar()->showMessage("正在录音...");
|
||
qDebug() << "WAV录音已启动,输出文件:" << currentRecordingPath;
|
||
}
|
||
|
||
void SpeechTestMainWindow::stopRecording() {
|
||
if (!isRecordingWav) {
|
||
return;
|
||
}
|
||
|
||
isRecordingWav = false;
|
||
|
||
// 停止音频输入
|
||
if (recordAudioSource) {
|
||
recordAudioSource->stop();
|
||
delete recordAudioSource;
|
||
recordAudioSource = nullptr;
|
||
}
|
||
|
||
// 停止定时器
|
||
if (recordTimer) {
|
||
recordTimer->stop();
|
||
delete recordTimer;
|
||
recordTimer = nullptr;
|
||
}
|
||
|
||
recordBtn->setEnabled(true);
|
||
recordStopBtn->setEnabled(false);
|
||
recordBtn->setText("开始录音");
|
||
|
||
// 重新启用设置选项
|
||
recordSampleRateComboBox->setEnabled(true);
|
||
recordChannelComboBox->setEnabled(true);
|
||
outputSampleRateComboBox->setEnabled(true);
|
||
outputChannelComboBox->setEnabled(true);
|
||
|
||
// 保存WAV文件
|
||
if (!recordedData.isEmpty()) {
|
||
// 获取输出设置
|
||
int outputSampleRate = outputSampleRateComboBox->currentData().toInt();
|
||
int outputChannels = outputChannelComboBox->currentData().toInt();
|
||
|
||
QAudioFormat outputFormat;
|
||
outputFormat.setSampleRate(outputSampleRate);
|
||
outputFormat.setChannelCount(outputChannels);
|
||
outputFormat.setSampleFormat(QAudioFormat::Int16);
|
||
|
||
QByteArray finalAudioData = recordedData;
|
||
QAudioFormat finalFormat = recordAudioFormat;
|
||
|
||
// 如果录制格式与输出格式不同,进行转换
|
||
if (recordAudioFormat.sampleRate() != outputSampleRate ||
|
||
recordAudioFormat.channelCount() != outputChannels) {
|
||
|
||
qDebug() << "转换录音格式从" << recordAudioFormat.sampleRate() << "Hz"
|
||
<< recordAudioFormat.channelCount() << "声道到"
|
||
<< outputSampleRate << "Hz" << outputChannels << "声道";
|
||
|
||
finalAudioData = convertAudioFormat(recordedData, recordAudioFormat, outputFormat);
|
||
finalFormat = outputFormat;
|
||
|
||
if (finalAudioData.isEmpty()) {
|
||
recordResultEdit->append("音频格式转换失败!");
|
||
statusBar()->showMessage("录音保存失败 - 格式转换错误");
|
||
return;
|
||
}
|
||
|
||
recordResultEdit->append("✅ 音频格式转换完成");
|
||
} else {
|
||
recordResultEdit->append("✅ 录音格式与输出格式一致,无需转换");
|
||
}
|
||
|
||
// 保存输出格式的文件
|
||
bool success = saveWavFile(currentRecordingPath, finalAudioData, finalFormat);
|
||
|
||
if (success) {
|
||
QFileInfo fileInfo(currentRecordingPath);
|
||
double durationSeconds = (double)finalAudioData.size() /
|
||
(finalFormat.sampleRate() *
|
||
finalFormat.channelCount() *
|
||
(finalFormat.sampleFormat() == QAudioFormat::Int16 ? 2 : 4));
|
||
|
||
recordResultEdit->append(QString("🎉 录音完成!时长: %1 秒").arg(durationSeconds, 0, 'f', 1));
|
||
recordResultEdit->append(QString("📊 最终格式: %1 Hz, %2, 16位")
|
||
.arg(finalFormat.sampleRate())
|
||
.arg(finalFormat.channelCount() == 1 ? "单声道" : "立体声"));
|
||
recordResultEdit->append(QString("📁 文件大小: %1 KB").arg(fileInfo.size() / 1024.0, 0, 'f', 1));
|
||
recordResultEdit->append("📂 完整路径: " + currentRecordingPath);
|
||
|
||
statusBar()->showMessage("录音已保存: " + QDir::current().relativeFilePath(currentRecordingPath));
|
||
|
||
// 询问是否播放录音
|
||
int ret = QMessageBox::question(this, "录音完成",
|
||
QString("录音已保存!\n文件: %1\n时长: %2 秒\n\n是否要播放录音?")
|
||
.arg(QDir::current().relativeFilePath(currentRecordingPath))
|
||
.arg(durationSeconds, 0, 'f', 1),
|
||
QMessageBox::Yes | QMessageBox::No);
|
||
|
||
if (ret == QMessageBox::Yes) {
|
||
// 在macOS上使用afplay播放音频
|
||
QProcess::startDetached("afplay", QStringList() << currentRecordingPath);
|
||
}
|
||
|
||
} else {
|
||
recordResultEdit->append("录音保存失败!");
|
||
statusBar()->showMessage("录音保存失败");
|
||
}
|
||
} else {
|
||
recordResultEdit->append("没有录制到音频数据");
|
||
statusBar()->showMessage("录音失败 - 没有数据");
|
||
}
|
||
|
||
qDebug() << "WAV录音已停止";
|
||
}
|
||
|
||
void SpeechTestMainWindow::processRecordingData() {
|
||
if (!recordAudioDevice || !isRecordingWav) {
|
||
return;
|
||
}
|
||
|
||
// 读取音频数据
|
||
QByteArray audioData = recordAudioDevice->readAll();
|
||
|
||
if (!audioData.isEmpty()) {
|
||
// 将数据添加到录音缓冲区
|
||
recordedData.append(audioData);
|
||
|
||
// 更新录音状态显示
|
||
static int updateCount = 0;
|
||
updateCount++;
|
||
if (updateCount % 10 == 0) { // 每秒更新一次
|
||
double durationSeconds = (double)recordedData.size() /
|
||
(recordAudioFormat.sampleRate() *
|
||
recordAudioFormat.channelCount() *
|
||
(recordAudioFormat.sampleFormat() == QAudioFormat::Int16 ? 2 : 4));
|
||
|
||
statusBar()->showMessage(QString("录音中... %1 秒").arg(durationSeconds, 0, 'f', 1));
|
||
}
|
||
}
|
||
}
|
||
|
||
bool SpeechTestMainWindow::saveWavFile(const QString& filePath, const QByteArray& audioData, const QAudioFormat& format) {
|
||
QFile file(filePath);
|
||
if (!file.open(QIODevice::WriteOnly)) {
|
||
qDebug() << "无法创建WAV文件:" << filePath;
|
||
return false;
|
||
}
|
||
|
||
// WAV文件头
|
||
QDataStream stream(&file);
|
||
stream.setByteOrder(QDataStream::LittleEndian);
|
||
|
||
// RIFF头
|
||
stream.writeRawData("RIFF", 4);
|
||
quint32 fileSize = 36 + audioData.size();
|
||
stream << fileSize;
|
||
stream.writeRawData("WAVE", 4);
|
||
|
||
// fmt子块
|
||
stream.writeRawData("fmt ", 4);
|
||
quint32 fmtSize = 16;
|
||
stream << fmtSize;
|
||
|
||
quint16 audioFormat = 1; // PCM
|
||
stream << audioFormat;
|
||
|
||
quint16 numChannels = format.channelCount();
|
||
stream << numChannels;
|
||
|
||
quint32 sampleRate = format.sampleRate();
|
||
stream << sampleRate;
|
||
|
||
quint16 bitsPerSample = (format.sampleFormat() == QAudioFormat::Int16) ? 16 : 32;
|
||
quint32 byteRate = sampleRate * numChannels * (bitsPerSample / 8);
|
||
stream << byteRate;
|
||
|
||
quint16 blockAlign = numChannels * (bitsPerSample / 8);
|
||
stream << blockAlign;
|
||
|
||
stream << bitsPerSample;
|
||
|
||
// data子块
|
||
stream.writeRawData("data", 4);
|
||
quint32 dataSize = audioData.size();
|
||
stream << dataSize;
|
||
|
||
// 写入音频数据
|
||
stream.writeRawData(audioData.constData(), audioData.size());
|
||
|
||
file.close();
|
||
|
||
qDebug() << "WAV文件保存成功:" << filePath;
|
||
qDebug() << "文件大小:" << (fileSize + 8) << "字节";
|
||
qDebug() << "音频格式:" << numChannels << "声道," << sampleRate << "Hz," << bitsPerSample << "位";
|
||
|
||
return true;
|
||
}
|
||
|
||
QByteArray SpeechTestMainWindow::convertAudioFormat(const QByteArray& inputData,
|
||
const QAudioFormat& inputFormat,
|
||
const QAudioFormat& outputFormat) {
|
||
if (inputData.isEmpty()) {
|
||
return QByteArray();
|
||
}
|
||
|
||
// 如果格式相同,直接返回
|
||
if (inputFormat.sampleRate() == outputFormat.sampleRate() &&
|
||
inputFormat.channelCount() == outputFormat.channelCount() &&
|
||
inputFormat.sampleFormat() == outputFormat.sampleFormat()) {
|
||
return inputData;
|
||
}
|
||
|
||
// qDebug() << "音频格式转换:"
|
||
// << inputFormat.sampleRate() << "Hz" << inputFormat.channelCount() << "声道"
|
||
// << "→"
|
||
// << outputFormat.sampleRate() << "Hz" << outputFormat.channelCount() << "声道";
|
||
|
||
// 第一步:转换为浮点格式
|
||
std::vector<float> samples;
|
||
int inputSampleCount = 0;
|
||
|
||
if (inputFormat.sampleFormat() == QAudioFormat::Int16) {
|
||
const int16_t* intData = reinterpret_cast<const int16_t*>(inputData.data());
|
||
inputSampleCount = inputData.size() / 2;
|
||
samples.resize(inputSampleCount);
|
||
for (int i = 0; i < inputSampleCount; i++) {
|
||
samples[i] = intData[i] / 32768.0f;
|
||
}
|
||
} else if (inputFormat.sampleFormat() == QAudioFormat::Float) {
|
||
const float* floatData = reinterpret_cast<const float*>(inputData.data());
|
||
inputSampleCount = inputData.size() / sizeof(float);
|
||
samples.assign(floatData, floatData + inputSampleCount);
|
||
} else {
|
||
qDebug() << "不支持的输入音频格式:" << static_cast<int>(inputFormat.sampleFormat());
|
||
return QByteArray();
|
||
}
|
||
|
||
// 第二步:处理多声道转单声道
|
||
if (inputFormat.channelCount() > outputFormat.channelCount() && outputFormat.channelCount() == 1) {
|
||
std::vector<float> monoSamples;
|
||
int frameCount = inputSampleCount / inputFormat.channelCount();
|
||
monoSamples.reserve(frameCount);
|
||
|
||
for (int frame = 0; frame < frameCount; frame++) {
|
||
float sum = 0.0f;
|
||
for (int ch = 0; ch < inputFormat.channelCount(); ch++) {
|
||
int index = frame * inputFormat.channelCount() + ch;
|
||
if (index < inputSampleCount) {
|
||
sum += samples[index];
|
||
}
|
||
}
|
||
monoSamples.push_back(sum / inputFormat.channelCount());
|
||
}
|
||
samples = std::move(monoSamples);
|
||
inputSampleCount = samples.size();
|
||
}
|
||
|
||
// 第三步:重采样
|
||
if (inputFormat.sampleRate() != outputFormat.sampleRate()) {
|
||
std::vector<float> resampledSamples;
|
||
float ratio = static_cast<float>(outputFormat.sampleRate()) / inputFormat.sampleRate();
|
||
int newSampleCount = static_cast<int>(inputSampleCount * ratio);
|
||
resampledSamples.reserve(newSampleCount);
|
||
|
||
for (int i = 0; i < newSampleCount; i++) {
|
||
float srcIndex = i / ratio;
|
||
int index = static_cast<int>(srcIndex);
|
||
|
||
if (index < inputSampleCount - 1) {
|
||
// 线性插值
|
||
float frac = srcIndex - index;
|
||
float sample = samples[index] * (1.0f - frac) + samples[index + 1] * frac;
|
||
resampledSamples.push_back(sample);
|
||
} else if (index < inputSampleCount) {
|
||
resampledSamples.push_back(samples[index]);
|
||
}
|
||
}
|
||
samples = std::move(resampledSamples);
|
||
inputSampleCount = samples.size();
|
||
}
|
||
|
||
// 第四步:转换为目标格式
|
||
QByteArray outputData;
|
||
|
||
if (outputFormat.sampleFormat() == QAudioFormat::Int16) {
|
||
outputData.resize(inputSampleCount * 2);
|
||
int16_t* intData = reinterpret_cast<int16_t*>(outputData.data());
|
||
for (int i = 0; i < inputSampleCount; i++) {
|
||
// 限制范围并转换为16位整数
|
||
float sample = std::max(-1.0f, std::min(1.0f, samples[i]));
|
||
intData[i] = static_cast<int16_t>(sample * 32767.0f);
|
||
}
|
||
} else if (outputFormat.sampleFormat() == QAudioFormat::Float) {
|
||
outputData.resize(inputSampleCount * sizeof(float));
|
||
float* floatData = reinterpret_cast<float*>(outputData.data());
|
||
for (int i = 0; i < inputSampleCount; i++) {
|
||
floatData[i] = samples[i];
|
||
}
|
||
}
|
||
|
||
//qDebug() << "音频转换完成,输出大小:" << outputData.size() << "字节";
|
||
return outputData;
|
||
}
|
||
|
||
void SpeechTestMainWindow::startKWS() {
|
||
if (isKWSActive) {
|
||
return;
|
||
}
|
||
|
||
// 检查是否正在进行其他音频操作,如果是则自动停止
|
||
if (isRecording) {
|
||
qDebug() << "KWS启动:自动停止ASR麦克风识别";
|
||
stopMicRecognition();
|
||
kwsResultEdit->append("🔄 自动停止ASR麦克风识别以启动语音唤醒");
|
||
}
|
||
|
||
if (isRecordingWav) {
|
||
qDebug() << "KWS启动:自动停止录音功能";
|
||
stopRecording();
|
||
kwsResultEdit->append("🔄 自动停止录音功能以启动语音唤醒");
|
||
}
|
||
|
||
qDebug() << "开始语音唤醒检测";
|
||
|
||
// 获取默认音频设备
|
||
QAudioDevice defaultDevice = QMediaDevices::defaultAudioInput();
|
||
qDebug() << "语音唤醒设备:" << defaultDevice.description();
|
||
|
||
// 使用默认配置:设备首选格式
|
||
kwsAudioFormat = defaultDevice.preferredFormat();
|
||
qDebug() << "KWS使用默认格式 - 采样率:" << kwsAudioFormat.sampleRate()
|
||
<< "声道:" << kwsAudioFormat.channelCount()
|
||
<< "格式:" << static_cast<int>(kwsAudioFormat.sampleFormat());
|
||
|
||
// 检查KWS管理器是否已初始化
|
||
if (!kwsManager->isInitialized()) {
|
||
QMessageBox::critical(this, "错误", "KWS模型未初始化,请检查模型配置");
|
||
return;
|
||
}
|
||
|
||
// 创建KWS检测器
|
||
kwsSpotter = kwsManager->createKeywordSpotter();
|
||
if (!kwsSpotter) {
|
||
QMessageBox::critical(this, "错误", "无法创建KWS关键词检测器");
|
||
return;
|
||
}
|
||
|
||
// 创建KWS流
|
||
kwsStream = kwsManager->createKeywordStream(kwsSpotter);
|
||
if (!kwsStream) {
|
||
QMessageBox::critical(this, "错误", "无法创建KWS关键词流");
|
||
kwsManager->destroyKeywordSpotter(kwsSpotter);
|
||
kwsSpotter = nullptr;
|
||
return;
|
||
}
|
||
|
||
qDebug() << "KWS检测器和流创建成功";
|
||
|
||
// 创建音频源 - 优化缓冲区设置
|
||
kwsAudioSource = new QAudioSource(defaultDevice, kwsAudioFormat, this);
|
||
kwsAudioSource->setBufferSize(16384); // 增大缓冲区,减少音频丢失
|
||
kwsAudioSource->setVolume(1.0);
|
||
|
||
// 启动音频输入
|
||
kwsAudioDevice = kwsAudioSource->start();
|
||
if (!kwsAudioDevice) {
|
||
QMessageBox::critical(this, "错误", "无法启动语音唤醒音频输入");
|
||
delete kwsAudioSource;
|
||
kwsAudioSource = nullptr;
|
||
return;
|
||
}
|
||
|
||
// 创建定时器处理音频数据 - 优化处理频率
|
||
kwsTimer = new QTimer(this);
|
||
connect(kwsTimer, &QTimer::timeout, this, &SpeechTestMainWindow::processKWSData);
|
||
kwsTimer->start(30); // 30ms处理一次,更频繁的处理提高识别率
|
||
|
||
isKWSActive = true;
|
||
kwsStartBtn->setEnabled(false);
|
||
kwsStopBtn->setEnabled(true);
|
||
kwsStartBtn->setText("唤醒检测中...");
|
||
|
||
kwsResultEdit->clear();
|
||
kwsResultEdit->append("🎤 语音唤醒检测已启动");
|
||
kwsResultEdit->append("⚙️ 音频配置:默认格式 → 16kHz单声道");
|
||
|
||
// 尝试读取关键词文件
|
||
QString keywordsPath = QDir::homePath() + "/.config/QSmartAssistant/Data/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/keywords.txt";
|
||
QFile keywordsFile(keywordsPath);
|
||
|
||
kwsResultEdit->append("📋 支持的关键词:");
|
||
if (keywordsFile.open(QIODevice::ReadOnly | QIODevice::Text)) {
|
||
QTextStream in(&keywordsFile);
|
||
QString line;
|
||
int lineCount = 0;
|
||
while (!in.atEnd() && lineCount < 8) { // 显示前8个关键词
|
||
line = in.readLine().trimmed();
|
||
if (!line.isEmpty() && !line.startsWith("#")) {
|
||
kwsResultEdit->append(QString(" • %1").arg(line));
|
||
lineCount++;
|
||
}
|
||
}
|
||
keywordsFile.close();
|
||
|
||
if (lineCount == 0) {
|
||
kwsResultEdit->append(" • 小米小米");
|
||
kwsResultEdit->append(" • 小爱同学");
|
||
kwsResultEdit->append(" • 你好问问");
|
||
}
|
||
} else {
|
||
kwsResultEdit->append(" • 小米小米");
|
||
kwsResultEdit->append(" • 小爱同学");
|
||
kwsResultEdit->append(" • 你好问问");
|
||
}
|
||
|
||
kwsResultEdit->append("🎯 等待关键词检测...");
|
||
kwsResultEdit->append("⚙️ 优化配置:阈值=0.25 (提高识别率)");
|
||
kwsResultEdit->append("💡 提示:发音要清晰标准,现在更容易检测");
|
||
|
||
statusBar()->showMessage("语音唤醒检测运行中");
|
||
qDebug() << "KWS启动完成";
|
||
}
|
||
|
||
void SpeechTestMainWindow::stopKWS() {
|
||
if (!isKWSActive) {
|
||
return;
|
||
}
|
||
|
||
isKWSActive = false;
|
||
|
||
// 停止音频输入
|
||
if (kwsAudioSource) {
|
||
kwsAudioSource->stop();
|
||
delete kwsAudioSource;
|
||
kwsAudioSource = nullptr;
|
||
}
|
||
|
||
// 停止定时器
|
||
if (kwsTimer) {
|
||
kwsTimer->stop();
|
||
delete kwsTimer;
|
||
kwsTimer = nullptr;
|
||
}
|
||
|
||
// 清理KWS资源
|
||
if (kwsStream) {
|
||
kwsManager->destroyKeywordStream(kwsStream);
|
||
kwsStream = nullptr;
|
||
qDebug() << "KWS关键词流已销毁";
|
||
}
|
||
|
||
if (kwsSpotter) {
|
||
kwsManager->destroyKeywordSpotter(kwsSpotter);
|
||
kwsSpotter = nullptr;
|
||
qDebug() << "KWS关键词检测器已销毁";
|
||
}
|
||
|
||
kwsStartBtn->setEnabled(true);
|
||
kwsStopBtn->setEnabled(false);
|
||
kwsStartBtn->setText("开始语音唤醒");
|
||
|
||
kwsResultEdit->append("🛑 语音唤醒检测已停止");
|
||
kwsResultEdit->append("📊 KWS资源已清理完成");
|
||
statusBar()->showMessage("语音唤醒检测已停止");
|
||
|
||
qDebug() << "语音唤醒检测已停止,资源已清理";
|
||
}
|
||
|
||
void SpeechTestMainWindow::processKWSData() {
|
||
if (!kwsAudioDevice || !isKWSActive || !kwsStream || !kwsSpotter) {
|
||
return;
|
||
}
|
||
|
||
// 读取音频数据
|
||
QByteArray audioData = kwsAudioDevice->readAll();
|
||
if (audioData.isEmpty()) {
|
||
return;
|
||
}
|
||
|
||
// 定义目标格式:16kHz单声道
|
||
QAudioFormat targetFormat;
|
||
targetFormat.setSampleRate(16000);
|
||
targetFormat.setChannelCount(1);
|
||
targetFormat.setSampleFormat(QAudioFormat::Float);
|
||
|
||
// 转换音频格式为16kHz单声道
|
||
QByteArray convertedData = convertAudioFormat(audioData, kwsAudioFormat, targetFormat);
|
||
if (convertedData.isEmpty()) {
|
||
return;
|
||
}
|
||
|
||
// 转换后的数据是16kHz单声道浮点格式
|
||
const float* samples = reinterpret_cast<const float*>(convertedData.data());
|
||
int sampleCount = convertedData.size() / sizeof(float);
|
||
|
||
// 分块发送音频数据,提高处理效果
|
||
const int chunkSize = 1600; // 100ms的音频数据 (16000 * 0.1)
|
||
for (int i = 0; i < sampleCount; i += chunkSize) {
|
||
int currentChunkSize = std::min(chunkSize, sampleCount - i);
|
||
kwsManager->acceptWaveform(kwsStream, samples + i, currentChunkSize);
|
||
|
||
// 每个块都检查是否准备好解码
|
||
while (kwsManager->isReady(kwsStream, kwsSpotter)) {
|
||
kwsManager->decode(kwsStream, kwsSpotter);
|
||
|
||
// 立即检查结果
|
||
QString detectedKeyword = kwsManager->getResult(kwsStream, kwsSpotter);
|
||
if (!detectedKeyword.isEmpty()) {
|
||
static int successCount = 0;
|
||
successCount++;
|
||
|
||
qDebug() << "🎯 KWS检测到关键词:" << detectedKeyword << "(第" << successCount << "次)";
|
||
|
||
kwsResultEdit->append(QString("🎯 检测到关键词: %1 (第%2次)")
|
||
.arg(detectedKeyword).arg(successCount));
|
||
statusBar()->showMessage(QString("🎯 检测到关键词: %1 (总计%2次)")
|
||
.arg(detectedKeyword).arg(successCount));
|
||
|
||
// 重置流以继续检测
|
||
kwsManager->reset(kwsStream, kwsSpotter);
|
||
return; // 检测到关键词后立即返回
|
||
}
|
||
}
|
||
}
|
||
|
||
// 简化的调试信息
|
||
static int callCount = 0;
|
||
callCount++;
|
||
|
||
if (callCount % 100 == 0) { // 减少调试输出频率
|
||
// 计算音频电平
|
||
float maxLevel = 0.0f;
|
||
for (int i = 0; i < std::min(sampleCount, 1000); i++) {
|
||
maxLevel = std::max(maxLevel, std::abs(samples[i]));
|
||
}
|
||
|
||
qDebug() << "KWS处理:" << callCount << "次,样本数:" << sampleCount
|
||
<< "电平:" << maxLevel << "阈值:0.25";
|
||
|
||
if (maxLevel > 0.02f) {
|
||
statusBar()->showMessage(QString("检测中... (电平: %1)")
|
||
.arg(maxLevel, 0, 'f', 3));
|
||
}
|
||
}
|
||
} |