extends Node ## 语音识别成功信号 signal speech_recognition_successed ## 录音效果器 var effect: AudioEffectRecord ## 录音捕获效果器(用于判断录音音量) var capture: AudioEffectCapture ## 待语音识别的文本 var targetText: String ## 音量最小阈值 const VolumeMin = 0.05 ## 长时间没有说话阈值 const LongTimeNoVoice = 1 var hasVoice = false var novoiceTime = 0 ## 是否存在未完成的HTTP请求 var hasUnfinishedRequest = false ## 待识别的录音队列 var recordingQueue: Array = [] @onready var http_req = $HTTPRequest const URL = "https://joylink.club/jlcloud/api/voice/verify?text=%s" var ConfigParams = preload("res://config_params.gd") ## 语音识别成功回复音效 var reply_correct = preload("res://Assets/training_speech/correct.mp3") func _ready(): # We get the index of the "Record" bus. var idx = AudioServer.get_bus_index("Record") # And use it to retrieve its first effect, which has been defined # as an "AudioEffectRecord" resource. effect = AudioServer.get_bus_effect(idx, 0) # 音频数据捕获,用于判断录音音量从而判断是否有声音输入 capture = AudioServer.get_bus_effect(idx, 1) ## 启动录音 func startRecord(): print("启动录音") if not effect.is_recording_active(): effect.set_recording_active(true) ## 停止录音 func stopRecord(): if effect.is_recording_active(): effect.set_recording_active(false) ## 重启录音 func restartRecord(): effect.set_recording_active(false) effect.set_recording_active(true) ## 播放回复 ## PS: 是协程函数,外部可以await func play_reply(reply): if reply == null: return stopRecord() assert(reply is AudioStream, "reply不是音频资源") ## 确保不循环播放 if reply is AudioStreamMP3: reply.loop = false if reply is AudioStreamOggVorbis: reply.loop = false if reply is AudioStreamWAV: reply.loop_mode = AudioStreamWAV.LOOP_DISABLED $AudioStreamPlayer.stream = reply $AudioStreamPlayer.play() await $AudioStreamPlayer.finished ## 录音并语音识别检查 ## PS: 是协程函数,外部如果关心结果需await func speech_record_check(text: String): assert(text != null and not text.is_empty(), "待识别的结果text不能为空") print("录音采样频率: ", AudioServer.get_mix_rate()) targetText = text startRecord() $Timer.start() await speech_recognition_successed print("识别成功,结束") _reset_record_state() ## 重置录音识别相关状态 func _reset_record_state(): targetText = "" hasVoice = false novoiceTime = 0 stopRecord() $Timer.stop() ## 定时处理录音并识别的逻辑 func _on_timer_timeout(): if effect.is_recording_active(): var buf = capture.get_buffer(capture.get_frames_available()) var soundDetected = false for vec in buf: if vec.x > VolumeMin or vec.y > VolumeMin: #print("Left channel volume = ", vec.x, ", Right volume = ", vec.y) soundDetected = true # 检测到声音处理 if soundDetected: hasVoice = true novoiceTime = 0 # 未检测到声音处理 else: novoiceTime += $Timer.wait_time if hasVoice and novoiceTime >= LongTimeNoVoice: var rcd = effect.get_recording() if rcd == null: return print("音频时长: ", rcd.get_length()) restartRecord() #await play_reply(rcd) #startRecord() _request_speech_recognition(rcd) hasVoice = false # 长时间无语音输入,重启录音 if novoiceTime >= LongTimeNoVoice: print("长时间无声音,重启录音") restartRecord() novoiceTime = 0 ## 请求语音识别 func _request_speech_recognition(recording: AudioStreamWAV): if hasUnfinishedRequest: recordingQueue.append(recording) return hasUnfinishedRequest = true var url = URL % targetText var headers = ["Content-Type: audio/wav"] headers.append("X-Token: %s" % ConfigParams.Token) var now = Time.get_datetime_string_from_system() var body = _build_wav(recording) var error = http_req.request_raw(url, headers, HTTPClient.METHOD_POST, body) if error != OK: push_error("在HTTP请求语音识别时发生了一个错误。", error) ## 语音识别接口调用结果 func _on_http_request_request_completed(result, response_code, headers, body): hasUnfinishedRequest = false var json = JSON.parse_string(body.get_string_from_utf8()) print("语音识别结果: ", json) var data = json["data"] # 验证成功,结束 if data != null and data["match"] == true: await play_reply(reply_correct) speech_recognition_successed.emit() # 未成功,如果录音队列不空,取出最新的继续识别 if recordingQueue.size() > 0: var next = recordingQueue.pop_back() _request_speech_recognition(next) ## 构造wav文件二进制数据 func _build_wav(recording: AudioStreamWAV) -> PackedByteArray: var data_bytes = recording.data.size() #Subchunk2Size = Size of data in bytes var sub_chunk_2_size = data_bytes # Format code # 1:PCM format (for 8 or 16 bit) # 3:IEEE float format var format_code = 3 if (recording.format == AudioStreamWAV.FORMAT_IMA_ADPCM) else 1 var n_channels = 2 if recording.stereo else 1 #print("录音结果采样率: ", recording.mix_rate) var sample_rate = AudioServer.get_mix_rate() var byte_pr_sample = 0 match recording.format: AudioStreamWAV.FORMAT_8_BITS: byte_pr_sample = 1 AudioStreamWAV.FORMAT_16_BITS: byte_pr_sample = 2 AudioStreamWAV.FORMAT_IMA_ADPCM: byte_pr_sample = 4 var wav: PackedByteArray = [] # Create WAV Header store_string(wav, "RIFF") # ChunkID store_32(wav, sub_chunk_2_size + 36) # ChunkSize = 36 + SubChunk2Size (size of entire file minus the 8 bits for this and previous header) store_string(wav, "WAVE") # Format store_string(wav, "fmt ") # Subchunk1ID store_32(wav, 16) # Subchunk1Size = 16 store_16(wav, format_code) # AudioFormat store_16(wav, n_channels) # Number of Channels store_32(wav, sample_rate) # SampleRate store_32(wav, sample_rate * n_channels * byte_pr_sample) # ByteRate store_16(wav, n_channels * byte_pr_sample) # BlockAlign = NumChannels * BytePrSample store_16(wav, byte_pr_sample * 8) # BitsPerSample store_string(wav, "data") # Subchunk2ID store_32(wav, sub_chunk_2_size) # Subchunk2Size # Add data var stream_data = recording.get_data() #print("formatCode=", format_code, ", n_channels=", n_channels, ", sample_rate=", sample_rate, #", byte_pr_sample=", byte_pr_sample, ", sub_chunk_2_size=", sub_chunk_2_size, ", data_size=", stream_data.size()) match recording.format: AudioStreamWAV.FORMAT_8_BITS: for i in data_bytes: var data_point = stream_data[i] + 128 wav.append(data_point) AudioStreamWAV.FORMAT_16_BITS: for i in data_bytes/2: var data_point = decode_uint16(stream_data[i*2], stream_data[i*2+1]) store_16(wav, data_point) _: push_error("构建wav错误,不支持的音频格式") return wav func store_string(buffer: PackedByteArray, s: String): buffer.append_array(s.to_utf8_buffer()) func store_16(buffer: PackedByteArray,p_dest: int, big_endian: bool = false): var a var b a = p_dest & 0xFF b = p_dest >> 8 var c if big_endian: c = a a = b b = c buffer.append(a) buffer.append(b) func store_32(buffer: PackedByteArray, p_dest: int, big_endian: bool = false): var a var b a = p_dest & 0xFFFF b = p_dest >> 16 var c if big_endian: c = a a = b b = c store_16(buffer, a, big_endian) store_16(buffer, b, big_endian) func decode_uint16(v1, v2) -> int: var v = 0 v |= v1 v2 <<= 8 v |= v2 return v