godot-psd-training/Communication/voice_communication.gd

extends Node

## 语音识别成功信号
signal speech_recognition_successed

## 录音效果器
var effect: AudioEffectRecord
## 录音捕获效果器（用于判断录音音量）
var capture: AudioEffectCapture

## 待语音识别的文本
var targetText: String
## 音量最小阈值
const VolumeMin = 0.03
## 长时间没有说话阈值
const LongTimeNoVoice = 1
var hasVoice = false
var novoiceTime = 0

## 语音识别成功回复音效
var reply_correct = preload("res://Assets/training_speech/correct.mp3")

func _ready():
	# We get the index of the "Record" bus.
	var idx = AudioServer.get_bus_index("Record")
	# And use it to retrieve its first effect, which has been defined
	# as an "AudioEffectRecord" resource.
	effect = AudioServer.get_bus_effect(idx, 0)
	# 音频数据捕获，用于判断录音音量从而判断是否有声音输入
	capture = AudioServer.get_bus_effect(idx, 1)

## 启动录音
func startRecord():
	print("启动录音")
	if not effect.is_recording_active():
		effect.set_recording_active(true)


## 停止录音
func stopRecord():
	if effect.is_recording_active():
		effect.set_recording_active(false)


## 重启录音
func restartRecord():
	effect.set_recording_active(false)
	effect.set_recording_active(true)


## 播放回复
## PS: 是协程函数，外部可以await
func play_reply(reply):
	if reply == null:
		return
	stopRecord()
	assert(reply is AudioStream, "reply不是音频资源")
	## 确保不循环播放
	if reply is AudioStreamMP3:
		reply.loop = false
	if reply is AudioStreamOggVorbis:
		reply.loop = false
	if reply is AudioStreamWAV:
		reply.loop_mode = AudioStreamWAV.LOOP_DISABLED
	$AudioStreamPlayer.stream = reply
	$AudioStreamPlayer.play()
	await $AudioStreamPlayer.finished

## 录音并语音识别检查
## PS: 是协程函数，外部如果关心结果需await
func speech_record_check(text: String):
	print("录音采样频率: ", AudioServer.get_mix_rate())
	targetText = text
	startRecord()
	$Timer.start()
	await speech_recognition_successed
	print("识别成功，结束")
	$Timer.stop()
	stopRecord()


## 定时处理录音并识别的逻辑
func _on_timer_timeout():
	if effect.is_recording_active():
		var buf = capture.get_buffer(capture.get_frames_available())
		var soundDetected = false
		for vec in buf:
			if vec.x > VolumeMin or vec.y > VolumeMin:
				#print("Left channel volume = ", vec.x, ", Right volume = ", vec.y)
				soundDetected = true
		# 检测到声音处理
		if soundDetected:
			hasVoice = true
			novoiceTime = 0
		# 未检测到声音处理
		else:
			novoiceTime += $Timer.wait_time
			if hasVoice and novoiceTime >= LongTimeNoVoice:
				var rcd = effect.get_recording()
				if rcd == null:
					return
				print("音频时长: ", rcd.get_length())
				restartRecord()
				#await play_reply(rcd)
				if rcd.get_length() > 5:
					await play_reply(reply_correct)
					speech_recognition_successed.emit()
				#else:
					#startRecord()
				hasVoice = false
			# 长时间无语音输入，重启录音
			if novoiceTime >= LongTimeNoVoice:
				print("长时间无声音，重启录音")
				restartRecord()
				novoiceTime = 0
-												调整项目结构，添加根据URL参数加载相应实训场景的scene_loader场景
实验录音及播放
											
										
										
											2024-04-16 10:10:18 +08:00
+								extends Node
-												语音会话场景语音识别-录音处理逻辑完成
											
										
										
											2024-05-14 10:00:22 +08:00
+								## 语音识别成功信号
-												录音逻辑处理

											
										
										
											2024-05-11 20:06:02 +08:00
+								signal speech_recognition_successed
-												添加回复用音频（文本转语音生成的）
语音会话添加play_reply方法实现播放回复
探索录音方式
											
										
										
											2024-05-09 20:05:07 +08:00
+								## 录音效果器
-												语音会话组件构思
											
										
										
											2024-05-08 16:56:02 +08:00
+								var effect: AudioEffectRecord
-												添加回复用音频（文本转语音生成的）
语音会话添加play_reply方法实现播放回复
探索录音方式
											
										
										
											2024-05-09 20:05:07 +08:00
+								## 录音捕获效果器（用于判断录音音量）
 								var capture: AudioEffectCapture
-												调整项目结构，添加根据URL参数加载相应实训场景的scene_loader场景
实验录音及播放
											
										
										
											2024-04-16 10:10:18 +08:00
-												语音会话场景语音识别-录音处理逻辑完成
											
										
										
											2024-05-14 10:00:22 +08:00
+								## 待语音识别的文本
 								var targetText: String
 								## 音量最小阈值
 								const VolumeMin = 0.03
 								## 长时间没有说话阈值
 								const LongTimeNoVoice = 1
 								var hasVoice = false
 								var novoiceTime = 0
 								## 语音识别成功回复音效
 								var reply_correct = preload("res://Assets/training_speech/correct.mp3")
-												调整项目结构，添加根据URL参数加载相应实训场景的scene_loader场景
实验录音及播放
											
										
										
											2024-04-16 10:10:18 +08:00
 								func _ready():
 									# We get the index of the "Record" bus.
 									var idx = AudioServer.get_bus_index("Record")
 									# And use it to retrieve its first effect, which has been defined
 									# as an "AudioEffectRecord" resource.
 									effect = AudioServer.get_bus_effect(idx, 0)
-												语音会话场景语音识别-录音处理逻辑完成
											
										
										
											2024-05-14 10:00:22 +08:00
+									# 音频数据捕获，用于判断录音音量从而判断是否有声音输入
-												添加回复用音频（文本转语音生成的）
语音会话添加play_reply方法实现播放回复
探索录音方式
											
										
										
											2024-05-09 20:05:07 +08:00
+									capture = AudioServer.get_bus_effect(idx, 1)
-												调整项目结构，添加根据URL参数加载相应实训场景的scene_loader场景
实验录音及播放
											
										
										
											2024-04-16 10:10:18 +08:00
-												语音会话组件构思
											
										
										
											2024-05-08 16:56:02 +08:00
+								## 启动录音
 								func startRecord():
-												录音逻辑处理

											
										
										
											2024-05-11 20:06:02 +08:00
+									print("启动录音")
-												语音会话组件构思
											
										
										
											2024-05-08 16:56:02 +08:00
+									if not effect.is_recording_active():
 										effect.set_recording_active(true)
 								## 停止录音
 								func stopRecord():
-												调整项目结构，添加根据URL参数加载相应实训场景的scene_loader场景
实验录音及播放
											
										
										
											2024-04-16 10:10:18 +08:00
+									if effect.is_recording_active():
 										effect.set_recording_active(false)
-												语音会话组件构思
											
										
										
											2024-05-08 16:56:02 +08:00
-												语音会话场景语音识别-录音处理逻辑完成
											
										
										
											2024-05-14 10:00:22 +08:00
 								## 重启录音
 								func restartRecord():
 									effect.set_recording_active(false)
 									effect.set_recording_active(true)
-												添加回复用音频（文本转语音生成的）
语音会话添加play_reply方法实现播放回复
探索录音方式
											
										
										
											2024-05-09 20:05:07 +08:00
 								## 播放回复
 								## PS: 是协程函数，外部可以await
 								func play_reply(reply):
-												录音逻辑处理

											
										
										
											2024-05-11 20:06:02 +08:00
+									if reply == null:
 										return
-												添加回复用音频（文本转语音生成的）
语音会话添加play_reply方法实现播放回复
探索录音方式
											
										
										
											2024-05-09 20:05:07 +08:00
+									stopRecord()
 									assert(reply is AudioStream, "reply不是音频资源")
 									## 确保不循环播放
 									if reply is AudioStreamMP3:
 										reply.loop = false
 									if reply is AudioStreamOggVorbis:
 										reply.loop = false
 									if reply is AudioStreamWAV:
 										reply.loop_mode = AudioStreamWAV.LOOP_DISABLED
 									$AudioStreamPlayer.stream = reply
 									$AudioStreamPlayer.play()
 									await $AudioStreamPlayer.finished
 								## 录音并语音识别检查
 								## PS: 是协程函数，外部如果关心结果需await
-												语音会话场景语音识别-录音处理逻辑完成
											
										
										
											2024-05-14 10:00:22 +08:00
+								func speech_record_check(text: String):
 									print("录音采样频率: ", AudioServer.get_mix_rate())
 									targetText = text
-												录音逻辑处理

											
										
										
											2024-05-11 20:06:02 +08:00
+									startRecord()
 									$Timer.start()
 									await speech_recognition_successed
-												语音会话场景语音识别-录音处理逻辑完成
											
										
										
											2024-05-14 10:00:22 +08:00
+									print("识别成功，结束")
-												录音逻辑处理

											
										
										
											2024-05-11 20:06:02 +08:00
+									$Timer.stop()
 									stopRecord()
-												语音会话组件构思
											
										
										
											2024-05-08 16:56:02 +08:00
-												语音会话场景语音识别-录音处理逻辑完成
											
										
										
											2024-05-14 10:00:22 +08:00
+								## 定时处理录音并识别的逻辑
-												语音会话组件构思
											
										
										
											2024-05-08 16:56:02 +08:00
+								func _on_timer_timeout():
 									if effect.is_recording_active():
-												添加回复用音频（文本转语音生成的）
语音会话添加play_reply方法实现播放回复
探索录音方式
											
										
										
											2024-05-09 20:05:07 +08:00
+										var buf = capture.get_buffer(capture.get_frames_available())
-												语音会话场景语音识别-录音处理逻辑完成
											
										
										
											2024-05-14 10:00:22 +08:00
+										var soundDetected = false
-												添加回复用音频（文本转语音生成的）
语音会话添加play_reply方法实现播放回复
探索录音方式
											
										
										
											2024-05-09 20:05:07 +08:00
+										for vec in buf:
-												录音逻辑处理

											
										
										
											2024-05-11 20:06:02 +08:00
+											if vec.x > VolumeMin or vec.y > VolumeMin:
 												#print("Left channel volume = ", vec.x, ", Right volume = ", vec.y)
-												语音会话场景语音识别-录音处理逻辑完成
											
										
										
											2024-05-14 10:00:22 +08:00
+												soundDetected = true
 										# 检测到声音处理
 										if soundDetected:
 											hasVoice = true
 											novoiceTime = 0
 										# 未检测到声音处理
-												录音逻辑处理

											
										
										
											2024-05-11 20:06:02 +08:00
+										else:
 											novoiceTime += $Timer.wait_time
-												语音会话场景语音识别-录音处理逻辑完成
											
										
										
											2024-05-14 10:00:22 +08:00
+											if hasVoice and novoiceTime >= LongTimeNoVoice:
-												录音逻辑处理

											
										
										
											2024-05-11 20:06:02 +08:00
+												var rcd = effect.get_recording()
-												语音会话场景语音识别-录音处理逻辑完成
											
										
										
											2024-05-14 10:00:22 +08:00
+												if rcd == null:
 													return
 												print("音频时长: ", rcd.get_length())
 												restartRecord()
 												#await play_reply(rcd)
-												录音逻辑处理

											
										
										
											2024-05-11 20:06:02 +08:00
+												if rcd.get_length() > 5:
-												语音会话场景语音识别-录音处理逻辑完成
											
										
										
											2024-05-14 10:00:22 +08:00
+													await play_reply(reply_correct)
-												录音逻辑处理

											
										
										
											2024-05-11 20:06:02 +08:00
+													speech_recognition_successed.emit()
-												语音会话场景语音识别-录音处理逻辑完成
											
										
										
											2024-05-14 10:00:22 +08:00
+												#else:
 													#startRecord()
 												hasVoice = false
 											# 长时间无语音输入，重启录音
 											if novoiceTime >= LongTimeNoVoice:
 												print("长时间无声音，重启录音")
 												restartRecord()
 												novoiceTime = 0