godot-psd-training/Communication/voice_communication.gd

117 lines
3.1 KiB
GDScript3
Raw Normal View History

extends Node
## 语音识别成功信号
2024-05-11 20:06:02 +08:00
signal speech_recognition_successed
## 录音效果器
2024-05-08 16:56:02 +08:00
var effect: AudioEffectRecord
## 录音捕获效果器(用于判断录音音量)
var capture: AudioEffectCapture
## 待语音识别的文本
var targetText: String
## 音量最小阈值
const VolumeMin = 0.03
## 长时间没有说话阈值
const LongTimeNoVoice = 1
var hasVoice = false
var novoiceTime = 0
## 语音识别成功回复音效
var reply_correct = preload("res://Assets/training_speech/correct.mp3")
func _ready():
# We get the index of the "Record" bus.
var idx = AudioServer.get_bus_index("Record")
# And use it to retrieve its first effect, which has been defined
# as an "AudioEffectRecord" resource.
effect = AudioServer.get_bus_effect(idx, 0)
# 音频数据捕获,用于判断录音音量从而判断是否有声音输入
capture = AudioServer.get_bus_effect(idx, 1)
2024-05-08 16:56:02 +08:00
## 启动录音
func startRecord():
2024-05-11 20:06:02 +08:00
print("启动录音")
2024-05-08 16:56:02 +08:00
if not effect.is_recording_active():
effect.set_recording_active(true)
## 停止录音
func stopRecord():
if effect.is_recording_active():
effect.set_recording_active(false)
2024-05-08 16:56:02 +08:00
## 重启录音
func restartRecord():
effect.set_recording_active(false)
effect.set_recording_active(true)
## 播放回复
## PS: 是协程函数外部可以await
func play_reply(reply):
2024-05-11 20:06:02 +08:00
if reply == null:
return
stopRecord()
assert(reply is AudioStream, "reply不是音频资源")
## 确保不循环播放
if reply is AudioStreamMP3:
reply.loop = false
if reply is AudioStreamOggVorbis:
reply.loop = false
if reply is AudioStreamWAV:
reply.loop_mode = AudioStreamWAV.LOOP_DISABLED
$AudioStreamPlayer.stream = reply
$AudioStreamPlayer.play()
await $AudioStreamPlayer.finished
## 录音并语音识别检查
## PS: 是协程函数外部如果关心结果需await
func speech_record_check(text: String):
print("录音采样频率: ", AudioServer.get_mix_rate())
targetText = text
2024-05-11 20:06:02 +08:00
startRecord()
$Timer.start()
await speech_recognition_successed
print("识别成功,结束")
2024-05-11 20:06:02 +08:00
$Timer.stop()
stopRecord()
2024-05-08 16:56:02 +08:00
## 定时处理录音并识别的逻辑
2024-05-08 16:56:02 +08:00
func _on_timer_timeout():
if effect.is_recording_active():
var buf = capture.get_buffer(capture.get_frames_available())
var soundDetected = false
for vec in buf:
2024-05-11 20:06:02 +08:00
if vec.x > VolumeMin or vec.y > VolumeMin:
#print("Left channel volume = ", vec.x, ", Right volume = ", vec.y)
soundDetected = true
# 检测到声音处理
if soundDetected:
hasVoice = true
novoiceTime = 0
# 未检测到声音处理
2024-05-11 20:06:02 +08:00
else:
novoiceTime += $Timer.wait_time
if hasVoice and novoiceTime >= LongTimeNoVoice:
2024-05-11 20:06:02 +08:00
var rcd = effect.get_recording()
if rcd == null:
return
print("音频时长: ", rcd.get_length())
restartRecord()
#await play_reply(rcd)
2024-05-11 20:06:02 +08:00
if rcd.get_length() > 5:
await play_reply(reply_correct)
2024-05-11 20:06:02 +08:00
speech_recognition_successed.emit()
#else:
#startRecord()
hasVoice = false
# 长时间无语音输入,重启录音
if novoiceTime >= LongTimeNoVoice:
print("长时间无声音,重启录音")
restartRecord()
novoiceTime = 0