feat: 改造语音的实现逻辑

This commit is contained in:
2026-05-15 12:07:27 +08:00
parent f516e4fe2e
commit f71f432a45
4 changed files with 322 additions and 98 deletions

View File

@@ -1,6 +1,7 @@
// App 端 yao-asdRealSpeech 使用的阿里云 DashScope 实时语音识别配置。 // App 端 yao-asdRealSpeech 使用的阿里云 DashScope 实时语音识别配置。
// 将 apikey 填成实际的 DashScope API Key 后App 端语音识别即可发起连接。 // 将 apikey 填成实际的 DashScope API Key 后App 端语音识别即可发起连接。
export const appSpeechRecognitionOptions = { export const appSpeechRecognitionOptions = {
apikey: "SnoHqdtJ832riRg4", apikey: "sk-2cab1c221b4b47749119d33ab991360a",
language_hints: ["zh"], language_hints: ["zh"],
saveAudioFile: false,
}; };

View File

@@ -1,6 +1,10 @@
<template> <template>
<view class="input-area-wrapper"> <view
<view v-if="!visibleWaveBtn" class="area-input"> class="input-area-wrapper"
@touchend="handleVoiceTouchEndFromContainer"
@touchcancel="handleVoiceTouchEndFromContainer"
>
<view class="area-input">
<!-- 语音/键盘切换 --> <!-- 语音/键盘切换 -->
<view <view
v-if="isSpeechRecognitionSupported" v-if="isSpeechRecognitionSupported"
@@ -51,11 +55,16 @@
<view <view
v-if="isVoiceMode" v-if="isVoiceMode"
class="hold-to-talk-button" class="hold-to-talk-button"
@longpress="handleVoiceTouchStart" @touchstart="handleVoiceTouchStart"
@touchend="handleVoiceTouchEnd" @touchend="handleVoiceTouchEnd"
@touchcancel="handleVoiceTouchEnd" @touchcancel="handleVoiceTouchEnd"
> >
按住 说话 <RecordingWaveBtn
v-if="visibleWaveBtn"
class="recording-wave-inline"
ref="recordingWaveBtnRef"
/>
<text v-else>按住 说话</text>
</view> </view>
</view> </view>
@@ -71,12 +80,10 @@
</view> </view>
</view> </view>
<!-- 录音按钮 -->
<RecordingWaveBtn v-if="visibleWaveBtn" ref="recordingWaveBtnRef" />
<!-- #ifdef APP-PLUS --> <!-- #ifdef APP-PLUS -->
<yao-asdRealSpeech <yao-asdRealSpeech
v-if="isSpeechRecognitionSupported" v-if="isSpeechRecognitionSupported && appSpeechVisible"
:key="appSpeechKey"
ref="appSpeechRef" ref="appSpeechRef"
:options="appSpeechOptions" :options="appSpeechOptions"
@result="handleAppSpeechResult" @result="handleAppSpeechResult"
@@ -98,7 +105,7 @@ import { appSpeechRecognitionOptions } from "@/constant/speech";
let manager = null; let manager = null;
let speechProvider = ""; let speechProvider = "";
const isSpeechRecognitionEnabled = ref(false); const isSpeechRecognitionEnabled = ref(true);
const isSpeechRecognitionSupported = ref(false); const isSpeechRecognitionSupported = ref(false);
const appSpeechOptions = appSpeechRecognitionOptions; const appSpeechOptions = appSpeechRecognitionOptions;
@@ -134,6 +141,8 @@ const emit = defineEmits([
const textareaRef = ref(null); const textareaRef = ref(null);
const recordingWaveBtnRef = ref(null); const recordingWaveBtnRef = ref(null);
const appSpeechRef = ref(null); const appSpeechRef = ref(null);
const appSpeechKey = ref(0);
const appSpeechVisible = ref(true);
const placeholder = computed(() => { const placeholder = computed(() => {
const config = getCurrentConfig(); const config = getCurrentConfig();
return `快告诉${config.name}您在想什么~`; return `快告诉${config.name}您在想什么~`;
@@ -144,7 +153,10 @@ const keyboardHeight = ref(0);
const isVoiceMode = ref(false); const isVoiceMode = ref(false);
const visibleWaveBtn = ref(false); const visibleWaveBtn = ref(false);
const isRecording = ref(false); const isRecording = ref(false);
const isVoicePressing = ref(false);
const appRecognizedText = ref(""); const appRecognizedText = ref("");
const isAppSpeechStarting = ref(false);
const isAppWaitingToSend = ref(false);
let watchDogTimer = null; let watchDogTimer = null;
let appStopFallbackTimer = null; let appStopFallbackTimer = null;
let hasSentAppRecognition = false; let hasSentAppRecognition = false;
@@ -168,7 +180,12 @@ const resetUI = () => {
const startWatchDog = (timeout = 10000) => { const startWatchDog = (timeout = 10000) => {
if (watchDogTimer) clearTimeout(watchDogTimer); if (watchDogTimer) clearTimeout(watchDogTimer);
watchDogTimer = setTimeout(() => { watchDogTimer = setTimeout(() => {
watchDogTimer = null;
console.warn("recording watchdog triggered, forcing UI reset"); console.warn("recording watchdog triggered, forcing UI reset");
if (hasActiveVoiceRecognition()) {
stopActiveVoiceRecognition({ shouldSend: true });
return;
}
resetUI(); resetUI();
}, timeout); }, timeout);
}; };
@@ -180,6 +197,48 @@ const clearAppStopFallback = () => {
} }
}; };
const showRecordingUI = () => {
isRecording.value = true;
visibleWaveBtn.value = true;
nextTick(() => {
if (recordingWaveBtnRef.value) {
recordingWaveBtnRef.value.startAnimation();
}
});
startWatchDog(10000);
};
const resetAppSpeechComponent = () => {
if (speechProvider !== "app") return;
isAppSpeechStarting.value = false;
appSpeechVisible.value = false;
nextTick(() => {
appSpeechKey.value += 1;
appSpeechVisible.value = true;
});
};
const getPendingAppSpeechText = () => {
return (appRecognizedText.value || inputMessage.value || "").trim();
};
const scheduleAppRecognizedTextSend = (timeout = 1200) => {
isAppWaitingToSend.value = true;
clearAppStopFallback();
if (getPendingAppSpeechText()) {
sendAppRecognizedText();
return;
}
appStopFallbackTimer = setTimeout(() => {
appStopFallbackTimer = null;
sendAppRecognizedText();
}, timeout);
};
// 保持和父组件同步 // 保持和父组件同步
watch( watch(
() => props.modelValue, () => props.modelValue,
@@ -199,17 +258,24 @@ watch(inputMessage, (val) => {
// 切换语音/文本模式 // 切换语音/文本模式
const toggleVoiceMode = () => { const toggleVoiceMode = () => {
if (!isSpeechRecognitionSupported.value) return; if (!isSpeechRecognitionSupported.value) return;
if (isVoiceMode.value) {
stopActiveVoiceRecognition({ shouldSend: false });
}
isVoiceMode.value = !isVoiceMode.value; isVoiceMode.value = !isVoiceMode.value;
}; };
// 处理语音按钮按开始 // 处理语音按钮按开始
const handleVoiceTouchStart = () => { const handleVoiceTouchStart = () => {
if (!isSpeechRecognitionSupported.value) return; if (!isSpeechRecognitionSupported.value) return;
if (isRecording.value || isAppSpeechStarting.value || visibleWaveBtn.value) return;
isVoicePressing.value = true;
try { try {
if (speechProvider === "wechat") { if (speechProvider === "wechat") {
if (!manager) return; if (!manager) return;
manager.start({ lang: "zh_CN" }); manager.start({ lang: "zh_CN" });
showRecordingUI();
} else if (speechProvider === "app") { } else if (speechProvider === "app") {
if (!appSpeechOptions.apikey) { if (!appSpeechOptions.apikey) {
uni.showToast({ uni.showToast({
@@ -220,10 +286,15 @@ const handleVoiceTouchStart = () => {
} }
appRecognizedText.value = ""; appRecognizedText.value = "";
inputMessage.value = "";
isAppWaitingToSend.value = false;
hasSentAppRecognition = false; hasSentAppRecognition = false;
clearAppStopFallback();
isAppSpeechStarting.value = true;
const appSpeech = appSpeechRef.value; const appSpeech = appSpeechRef.value;
if (!appSpeech || typeof appSpeech.start !== "function") { if (!appSpeech || typeof appSpeech.start !== "function") {
isAppSpeechStarting.value = false;
uni.showToast({ uni.showToast({
title: "语音组件未初始化", title: "语音组件未初始化",
icon: "none", icon: "none",
@@ -235,31 +306,65 @@ const handleVoiceTouchStart = () => {
} else { } else {
return; return;
} }
isRecording.value = true;
visibleWaveBtn.value = true;
// 启动音频条动画
nextTick(() => {
if (recordingWaveBtnRef.value) {
recordingWaveBtnRef.value.startAnimation();
}
});
startWatchDog(10000);
} catch (err) { } catch (err) {
console.error("record start error:", err); console.error("record start error:", err);
isAppSpeechStarting.value = false;
// 保底清理 // 保底清理
resetUI(); resetUI();
} }
}; };
// 处理语音按钮长按结束 const hasActiveVoiceRecognition = () => {
const handleVoiceTouchEnd = () => { return (
isVoicePressing.value ||
isRecording.value ||
isAppSpeechStarting.value ||
visibleWaveBtn.value
);
};
const stopActiveVoiceRecognition = ({ shouldSend = true } = {}) => {
isVoicePressing.value = false;
if (!isSpeechRecognitionSupported.value) { if (!isSpeechRecognitionSupported.value) {
resetUI(); resetUI();
return; return;
} }
if (speechProvider === "app") {
const wasStarting = isAppSpeechStarting.value;
const wasRecording = isRecording.value;
if (!wasStarting && !wasRecording) {
if (!shouldSend) {
isAppWaitingToSend.value = false;
clearAppStopFallback();
resetAppSpeechComponent();
}
resetUI();
return false;
}
isAppSpeechStarting.value = false;
try {
appSpeechRef.value?.stop?.();
if (wasRecording && shouldSend) {
scheduleAppRecognizedTextSend();
} else {
isAppWaitingToSend.value = false;
clearAppStopFallback();
resetAppSpeechComponent();
}
} catch (err) {
console.error("record stop error:", err);
resetAppSpeechComponent();
} finally {
resetUI();
}
return true;
}
// 如果本地状态不是录音中,也确保 UI 恢复 // 如果本地状态不是录音中,也确保 UI 恢复
if (!isRecording.value) { if (!isRecording.value) {
if (recordingWaveBtnRef.value) { if (recordingWaveBtnRef.value) {
@@ -270,26 +375,28 @@ const handleVoiceTouchEnd = () => {
clearTimeout(watchDogTimer); clearTimeout(watchDogTimer);
watchDogTimer = null; watchDogTimer = null;
} }
return; return false;
} }
try { try {
if (speechProvider === "wechat") { manager?.stop?.();
manager?.stop?.();
} else if (speechProvider === "app") {
appSpeechRef.value?.stop?.();
clearAppStopFallback();
appStopFallbackTimer = setTimeout(() => {
appStopFallbackTimer = null;
sendAppRecognizedText();
}, 600);
}
} catch (err) { } catch (err) {
console.error("record stop error:", err); console.error("record stop error:", err);
} finally { } finally {
// 无论 stop 是否抛错,都保证 UI 恢复 // 无论 stop 是否抛错,都保证 UI 恢复
resetUI(); resetUI();
} }
return true;
};
// 处理语音按钮长按结束
const handleVoiceTouchEnd = () => {
stopActiveVoiceRecognition({ shouldSend: true });
};
const handleVoiceTouchEndFromContainer = () => {
if (!hasActiveVoiceRecognition()) return;
stopActiveVoiceRecognition({ shouldSend: true });
}; };
// 处理发送原语音 // 处理发送原语音
@@ -336,46 +443,87 @@ const getAppSpeechText = (res) => {
}; };
const sendAppRecognizedText = () => { const sendAppRecognizedText = () => {
if (hasSentAppRecognition) return; if (hasSentAppRecognition) return false;
const text = appRecognizedText.value.trim(); const text = getPendingAppSpeechText();
if (!text) { if (!text) {
console.log("没有说话"); console.log("没有说话");
return; return false;
} }
hasSentAppRecognition = true; hasSentAppRecognition = true;
isAppWaitingToSend.value = false;
clearAppStopFallback(); clearAppStopFallback();
inputMessage.value = text; inputMessage.value = text;
emit("send", text); emit("send", text);
appRecognizedText.value = ""; appRecognizedText.value = "";
return true;
}; };
const handleAppSpeechResult = (res) => { const handleAppSpeechResult = (res) => {
if (hasSentAppRecognition) return;
const text = getAppSpeechText(res); const text = getAppSpeechText(res);
if (!text) return; if (!text) return;
appRecognizedText.value = text; appRecognizedText.value = text;
inputMessage.value = text; inputMessage.value = text;
if (isAppWaitingToSend.value) {
sendAppRecognizedText();
}
}; };
const handleAppSpeechChange = (msg) => { const handleAppSpeechChange = (msg) => {
if (!msg || !msg.status) return; if (!msg || !msg.status) return;
if (msg.status === "START") { if (msg.status === "START") {
isRecording.value = true; isAppSpeechStarting.value = false;
if (!isVoicePressing.value) {
appSpeechRef.value?.stop?.();
resetAppSpeechComponent();
resetUI();
return;
}
showRecordingUI();
return; return;
} }
if (msg.status === "STOP") { if (msg.status === "STOP") {
isAppSpeechStarting.value = false;
resetUI();
if (!sendAppRecognizedText() && !isAppWaitingToSend.value) {
resetAppSpeechComponent();
}
return;
}
if (msg.status === "FINISH") {
isAppSpeechStarting.value = false;
resetUI(); resetUI();
sendAppRecognizedText(); sendAppRecognizedText();
resetAppSpeechComponent();
return;
}
if (msg.status === "CLOSE") {
isAppSpeechStarting.value = false;
resetUI();
if (isAppWaitingToSend.value) {
sendAppRecognizedText();
}
resetAppSpeechComponent();
return; return;
} }
if (msg.status === "ERROR") { if (msg.status === "ERROR") {
console.error("app speech recognition error", msg.msg); console.error("app speech recognition error", msg.msg);
isAppSpeechStarting.value = false;
isAppWaitingToSend.value = false;
clearAppStopFallback();
resetUI(); resetUI();
resetAppSpeechComponent();
} }
}; };
@@ -413,6 +561,9 @@ onUnmounted(() => {
} }
} }
clearAppStopFallback(); clearAppStopFallback();
isAppSpeechStarting.value = false;
isAppWaitingToSend.value = false;
isVoicePressing.value = false;
resetUI(); resetUI();
}); });

View File

@@ -44,6 +44,13 @@
-webkit-user-select: none; -webkit-user-select: none;
} }
.recording-wave-inline {
width: 100%;
margin: 0;
margin-bottom: 0;
box-shadow: none;
}
.input-container-send { .input-container-send {
display: flex; display: flex;
align-items: center; align-items: center;

View File

@@ -16,13 +16,15 @@
}, },
methods:{ methods:{
start(){ start(){
this.task_id='';
this.webSocekt(); this.webSocekt();
}, },
stop(){ stop(){
this.recorderStatus="stop"; this.recorderStatus={
if(ws){ status:"stop",
ws.close(); saveAudioFile: !this.options || this.options.saveAudioFile !== false,
} time: Date.now()
};
}, },
toShowToast(){ toShowToast(){
uni.showToast({ uni.showToast({
@@ -31,9 +33,53 @@
}); });
this.recorderStatus="stop"; this.recorderStatus="stop";
}, },
startRecorder(){ startRecorder(){
this.$emit('change',{status:"START",msg:"开始录音"}); this.$emit('change',{status:"START",msg:"开始录音"});
}, },
recordStopped(data){
setTimeout(()=>{
this.finishTask();
},80);
if(data && data.saveAudioFile === false){
this.$emit('change',{status:"STOP",msg:{}});
}
},
finishTask(){
if(!ws || !this.task_id){
this.closeSocket();
return;
}
var param={
header: {
action: "finish-task",
task_id: this.task_id,
streaming: "duplex"
},
payload:{
input:{}
}
};
ws.send({
data:JSON.stringify(param),
fail:(err)=>{
this.$emit('change',{status:"ERROR",msg:err});
this.closeSocket();
}
});
},
closeSocket(){
if(ws){
var socket=ws;
ws=null;
try{
socket.close();
}catch(e){
}
}
},
webSocekt(){ webSocekt(){
ws = uni.connectSocket({ ws = uni.connectSocket({
url: 'wss://dashscope.aliyuncs.com/api-ws/v1/inference', url: 'wss://dashscope.aliyuncs.com/api-ws/v1/inference',
@@ -75,12 +121,25 @@
var msg=JSON.parse(data.data) var msg=JSON.parse(data.data)
if(msg.header.event=="task-started"){ if(msg.header.event=="task-started"){
this.task_id=msg.header.task_id; this.task_id=msg.header.task_id;
this.recorderStatus="start"; this.recorderStatus={
status:"start",
time: Date.now()
};
} }
if(msg.header.event=="result-generated"){ if(msg.header.event=="result-generated"){
this.$emit('result',msg.payload.output); this.$emit('result',msg.payload.output);
} }
if(msg.header.event=="task-finished"){
this.$emit('change',{status:"FINISH",msg:msg.payload || {}});
this.closeSocket();
}
if(msg.header.event=="task-failed"){
this.$emit('change',{status:"ERROR",msg:msg});
this.closeSocket();
}
}); });
ws.onError((err)=>{ ws.onError((err)=>{
@@ -88,6 +147,7 @@
}); });
ws.onClose(()=>{ ws.onClose(()=>{
ws=null;
this.$emit('change',{status:"CLOSE",msg:"连接已关闭"}); this.$emit('change',{status:"CLOSE",msg:"连接已关闭"});
}) })
}, },
@@ -103,6 +163,7 @@
return result; return result;
}, },
frameRecorded({isLastFrame,frameBuffer}){ frameRecorded({isLastFrame,frameBuffer}){
if(!ws || !frameBuffer) return;
ws.send({data:uni.base64ToArrayBuffer(frameBuffer)}); ws.send({data:uni.base64ToArrayBuffer(frameBuffer)});
}, },
recordedChunks(base64){ recordedChunks(base64){
@@ -210,13 +271,15 @@
}, },
methods: { methods: {
async startRecord(val){ async startRecord(val){
if(val==null) return; if(val==null) return;
const status = typeof val === 'object' ? val.status : val;
const saveAudioFile = typeof val === 'object' ? val.saveAudioFile !== false : true;
if(val == "start"){ if(status == "start"){
try{ try{
recordedChunks = []; recordedChunks = [];
// 配置参数 // 配置参数
var a, i = 16000, var a, i = 16000,
s = 1280; s = 1280;
@@ -281,9 +344,9 @@
this.$ownerInstance.callMethod('toShowToast'); this.$ownerInstance.callMethod('toShowToast');
} }
}else if(val == "stop"){ }else if(status == "stop"){
this.onStop(); this.onStop(saveAudioFile);
} }
}, },
onFrameRecorded({isLastFrame,frameBuffer}){ onFrameRecorded({isLastFrame,frameBuffer}){
@@ -298,7 +361,7 @@
} }
return window.btoa(binary); return window.btoa(binary);
}, },
async onRecordedChunks(chunks){ async onRecordedChunks(chunks){
var mergedBuffer=this.mergeAudioBuffers(chunks); var mergedBuffer=this.mergeAudioBuffers(chunks);
// 2. 将合并的二进制流转换为WAV格式需要添加WAV文件头 // 2. 将合并的二进制流转换为WAV格式需要添加WAV文件头
@@ -307,13 +370,11 @@
const base64 = await this.blobToBase64(wavBlob); const base64 = await this.blobToBase64(wavBlob);
this.$ownerInstance.callMethod('recordedChunks',base64) this.$ownerInstance.callMethod('recordedChunks',base64)
}, },
onStop(){ onStop(saveAudioFile = true){
this.onFrameRecorded({isLastFrame:true,frameBuffer:''}); const chunks = recordedChunks;
recordedChunks = [];
this.onRecordedChunks(recordedChunks); if (mediaStream) {
recordedChunks = [];
if (mediaStream) {
// 停止所有媒体轨道 // 停止所有媒体轨道
mediaStream.getTracks().forEach(track => track.stop()); mediaStream.getTracks().forEach(track => track.stop());
mediaStream = null; mediaStream = null;
@@ -327,11 +388,15 @@
if (audioContext) { if (audioContext) {
// 关闭音频上下文 // 关闭音频上下文
audioContext.close().then(() => { audioContext.close().then(() => {
audioContext = null; audioContext = null;
}); });
} }
}, this.$ownerInstance.callMethod('recordStopped',{saveAudioFile});
if(saveAudioFile){
this.onRecordedChunks(chunks);
}
},
//合并所有buffer //合并所有buffer
mergeAudioBuffers(buffers) { mergeAudioBuffers(buffers) {
let totalLength = buffers.reduce((acc, buf) => acc + buf.byteLength, 0); let totalLength = buffers.reduce((acc, buf) => acc + buf.byteLength, 0);