godot-psd-training/Communication/voice_communication.gd

extends Node

## 语音识别成功信号
signal speech_recognition_successed

## 录音效果器
var effect: AudioEffectRecord
## 录音捕获效果器（用于判断录音音量）
var capture: AudioEffectCapture

## 待语音识别的文本
var targetText: String
## 音量最小阈值
const VolumeMin = 0.03
## 长时间没有说话阈值
const LongTimeNoVoice = 1
var hasVoice = false
var novoiceTime = 0

## 语音识别成功回复音效
var reply_correct = preload("res://Assets/training_speech/correct.mp3")

func _ready():
	# We get the index of the "Record" bus.
	var idx = AudioServer.get_bus_index("Record")
	# And use it to retrieve its first effect, which has been defined
	# as an "AudioEffectRecord" resource.
	effect = AudioServer.get_bus_effect(idx, 0)
	# 音频数据捕获，用于判断录音音量从而判断是否有声音输入
	capture = AudioServer.get_bus_effect(idx, 1)

## 启动录音
func startRecord():
	print("启动录音")
	if not effect.is_recording_active():
		effect.set_recording_active(true)


## 停止录音
func stopRecord():
	if effect.is_recording_active():
		effect.set_recording_active(false)


## 重启录音
func restartRecord():
	effect.set_recording_active(false)
	effect.set_recording_active(true)


## 播放回复
## PS: 是协程函数，外部可以await
func play_reply(reply):
	if reply == null:
		return
	stopRecord()
	assert(reply is AudioStream, "reply不是音频资源")
	## 确保不循环播放
	if reply is AudioStreamMP3:
		reply.loop = false
	if reply is AudioStreamOggVorbis:
		reply.loop = false
	if reply is AudioStreamWAV:
		reply.loop_mode = AudioStreamWAV.LOOP_DISABLED
	$AudioStreamPlayer.stream = reply
	$AudioStreamPlayer.play()
	await $AudioStreamPlayer.finished

## 录音并语音识别检查
## PS: 是协程函数，外部如果关心结果需await
func speech_record_check(text: String):
	print("录音采样频率: ", AudioServer.get_mix_rate())
	targetText = text
	startRecord()
	$Timer.start()
	await speech_recognition_successed
	print("识别成功，结束")
	$Timer.stop()
	stopRecord()


## 定时处理录音并识别的逻辑
func _on_timer_timeout():
	if effect.is_recording_active():
		var buf = capture.get_buffer(capture.get_frames_available())
		var soundDetected = false
		for vec in buf:
			if vec.x > VolumeMin or vec.y > VolumeMin:
				#print("Left channel volume = ", vec.x, ", Right volume = ", vec.y)
				soundDetected = true
		# 检测到声音处理
		if soundDetected:
			hasVoice = true
			novoiceTime = 0
		# 未检测到声音处理
		else:
			novoiceTime += $Timer.wait_time
			if hasVoice and novoiceTime >= LongTimeNoVoice:
				var rcd = effect.get_recording()
				if rcd == null:
					return
				print("音频时长: ", rcd.get_length())
				restartRecord()
				#await play_reply(rcd)
				if rcd.get_length() > 5:
					await play_reply(reply_correct)
					speech_recognition_successed.emit()
				#else:
					#startRecord()
				hasVoice = false
			# 长时间无语音输入，重启录音
			if novoiceTime >= LongTimeNoVoice:
				print("长时间无声音，重启录音")
				restartRecord()
				novoiceTime = 0