godot-psd-training/Communication/voice_communication.gd

117 lines
3.1 KiB
GDScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

extends Node
## 语音识别成功信号
signal speech_recognition_successed
## 录音效果器
var effect: AudioEffectRecord
## 录音捕获效果器(用于判断录音音量)
var capture: AudioEffectCapture
## 待语音识别的文本
var targetText: String
## 音量最小阈值
const VolumeMin = 0.03
## 长时间没有说话阈值
const LongTimeNoVoice = 1
var hasVoice = false
var novoiceTime = 0
## 语音识别成功回复音效
var reply_correct = preload("res://Assets/training_speech/correct.mp3")
func _ready():
# We get the index of the "Record" bus.
var idx = AudioServer.get_bus_index("Record")
# And use it to retrieve its first effect, which has been defined
# as an "AudioEffectRecord" resource.
effect = AudioServer.get_bus_effect(idx, 0)
# 音频数据捕获,用于判断录音音量从而判断是否有声音输入
capture = AudioServer.get_bus_effect(idx, 1)
## 启动录音
func startRecord():
print("启动录音")
if not effect.is_recording_active():
effect.set_recording_active(true)
## 停止录音
func stopRecord():
if effect.is_recording_active():
effect.set_recording_active(false)
## 重启录音
func restartRecord():
effect.set_recording_active(false)
effect.set_recording_active(true)
## 播放回复
## PS: 是协程函数外部可以await
func play_reply(reply):
if reply == null:
return
stopRecord()
assert(reply is AudioStream, "reply不是音频资源")
## 确保不循环播放
if reply is AudioStreamMP3:
reply.loop = false
if reply is AudioStreamOggVorbis:
reply.loop = false
if reply is AudioStreamWAV:
reply.loop_mode = AudioStreamWAV.LOOP_DISABLED
$AudioStreamPlayer.stream = reply
$AudioStreamPlayer.play()
await $AudioStreamPlayer.finished
## 录音并语音识别检查
## PS: 是协程函数外部如果关心结果需await
func speech_record_check(text: String):
print("录音采样频率: ", AudioServer.get_mix_rate())
targetText = text
startRecord()
$Timer.start()
await speech_recognition_successed
print("识别成功,结束")
$Timer.stop()
stopRecord()
## 定时处理录音并识别的逻辑
func _on_timer_timeout():
if effect.is_recording_active():
var buf = capture.get_buffer(capture.get_frames_available())
var soundDetected = false
for vec in buf:
if vec.x > VolumeMin or vec.y > VolumeMin:
#print("Left channel volume = ", vec.x, ", Right volume = ", vec.y)
soundDetected = true
# 检测到声音处理
if soundDetected:
hasVoice = true
novoiceTime = 0
# 未检测到声音处理
else:
novoiceTime += $Timer.wait_time
if hasVoice and novoiceTime >= LongTimeNoVoice:
var rcd = effect.get_recording()
if rcd == null:
return
print("音频时长: ", rcd.get_length())
restartRecord()
#await play_reply(rcd)
if rcd.get_length() > 5:
await play_reply(reply_correct)
speech_recognition_successed.emit()
#else:
#startRecord()
hasVoice = false
# 长时间无语音输入,重启录音
if novoiceTime >= LongTimeNoVoice:
print("长时间无声音,重启录音")
restartRecord()
novoiceTime = 0