commit 8d96dc2307a7ab9b83c614ed87749a2cbcc5ee51 Author: sairate Date: Thu May 15 17:34:34 2025 +0800 Signed-off-by: sairate diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..359bb53 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml diff --git a/.idea/ai_api.iml b/.idea/ai_api.iml new file mode 100644 index 0000000..18053de --- /dev/null +++ b/.idea/ai_api.iml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..25bde2c --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,14 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/material_theme_project_new.xml b/.idea/material_theme_project_new.xml new file mode 100644 index 0000000..ef4c87e --- /dev/null +++ b/.idea/material_theme_project_new.xml @@ -0,0 +1,12 @@ + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..73ebcc9 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..3113677 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000..0bd2c70 --- /dev/null +++ b/app.py @@ -0,0 +1,207 @@ +import requests +import base64 +import time +import wave +import pyaudio +import pygame +import webrtcvad +from openai import OpenAI + +# --------------------- 配置参数 --------------------- +# 百度智能云 API 配置(请替换为您的 API Key 和 Secret Key) +BAIDU_API_KEY = "4icZSO1OlMCU2ZiRMhgGCXFu" +BAIDU_SECRET_KEY = "6wJldJ08m1jIX9hb0ULcJrIJ9D1OJW3c" + +# DeepSeek API 配置(请替换为您的 DeepSeek API Key) +DEEPSEEK_API_KEY = "sk-f15b44b6b3344cdd820e59acebce9d2c" + +# 录音参数 +CHUNK = 1024 +FORMAT = pyaudio.paInt16 +CHANNELS = 1 +RATE = 16000 + +# 设备 ID(可以随意设定) +DEVICE_ID = "raspberry_pi" + +# --------------------- 工具函数 --------------------- +def get_baidu_token(): + """获取百度智能云 API 访问令牌""" + url = "https://aip.baidubce.com/oauth/2.0/token" + params = { + "grant_type": "client_credentials", + "client_id": BAIDU_API_KEY, + "client_secret": BAIDU_SECRET_KEY, + } + response = requests.post(url, data=params) + if response.status_code == 200: + return response.json().get("access_token") + print("获取百度 Token 失败") + return None + + +def record_audio_vad(filename, max_duration=10): + """使用 WebRTC VAD 语音活动检测实现自动录音""" + vad = webrtcvad.Vad(1) # 设置 VAD 灵敏度(0-3,越大越严格) + + p = pyaudio.PyAudio() + + # 🛠 **确保录音格式符合 WebRTC VAD 要求** + stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=320) + + print("开始录音(自动检测静音停止)...") + frames = [] + silence_count = 0 + max_silence = 150 # 允许最多 1 秒静音(30 帧) + + while True: + data = stream.read(320, exception_on_overflow=False) # **🛠 WebRTC VAD 需要 10ms 帧大小** + frames.append(data) + + # 检查是否有语音 + is_speech = vad.is_speech(data, 16000) + silence_count = 0 if is_speech else silence_count + 1 + + if silence_count > max_silence: # 如果连续 1 秒静音,则停止录音 + print("检测到静音,录音结束。") + break + + # 限制最大录音时长 + if len(frames) > int((16000 / 320) * max_duration): + print("达到最大录音时长,录音结束。") + break + + stream.stop_stream() + stream.close() + p.terminate() + + # 保存录音为 WAV 文件 + wf = wave.open(filename, 'wb') + wf.setnchannels(1) + wf.setsampwidth(p.get_sample_size(pyaudio.paInt16)) + wf.setframerate(16000) + wf.writeframes(b''.join(frames)) + wf.close() + + +def speech_recognition(audio_file, token): + """调用百度语音识别 API 将音频转文本""" + with open(audio_file, "rb") as f: + speech_data = f.read() + speech_base64 = base64.b64encode(speech_data).decode('utf-8') + + payload = { + "format": "wav", + "rate": RATE, + "channel": 1, + "token": token, + "cuid": DEVICE_ID, + "len": len(speech_data), + "speech": speech_base64, + "word_list": ["小智","小志","小至"] # 添加热词,提高识别准确率 + } + url = "http://vop.baidu.com/server_api" + headers = {'Content-Type': 'application/json'} + response = requests.post(url, json=payload, headers=headers) + result = response.json() + + if result.get("err_no") == 0: + return result.get("result", [""])[0] + print("语音识别错误:", result.get("err_msg")) + return None + + +def wake_word_detected(text): + """检查文本是否包含唤醒词""" + wake_words = ["小智", "小志", "小知", "晓智"] + return any(word in text for word in wake_words) + + +def deepseek_conversation(user_text): + """调用 DeepSeek API 获取 AI 回答""" + try: + client = OpenAI(api_key=DEEPSEEK_API_KEY, base_url="https://api.deepseek.com") + response = client.chat.completions.create( + model="deepseek-chat", + messages=[{"role": "system", "content": "你是一名叫小智的助手,回复不需要使用markdown格式,请直接以文本形式回复。"}, + {"role": "user", "content": user_text}], + stream=False + ) + return response.choices[0].message.content + except Exception as e: + print("DeepSeek API 调用异常:", e) + return "抱歉,我无法获取答案。" + + +def text_to_speech(text, token, output_file="answer.mp3"): + """调用百度语音合成 API,将文本转换为语音""" + MAX_CHAR = 1024 + text = text[:MAX_CHAR] if len(text) > MAX_CHAR else text + + params = { + "tex": text, + "tok": token, + "cuid": DEVICE_ID, + "ctp": 1, + "lan": "zh" + } + url = "http://tsn.baidu.com/text2audio" + response = requests.post(url, data=params) + + if response.headers.get('Content-Type') == "audio/mp3": + with open(output_file, "wb") as f: + f.write(response.content) + return output_file + print("语音合成错误:", response.text) + return None + + +def play_audio(file_path): + """播放音频文件""" + pygame.mixer.init() + pygame.mixer.music.load(file_path) + pygame.mixer.music.play() + while pygame.mixer.music.get_busy(): + time.sleep(0.1) + + +# --------------------- 主程序 --------------------- +def main(): + print("启动智能助手小智...") + token = get_baidu_token() + if not token: + return + + while True: + print("等待唤醒词 '小智' ...") + record_audio_vad("wake.wav", max_duration=3) + wake_text = speech_recognition("wake.wav", token) + + if wake_text and "小智" in wake_text: + print("唤醒成功,小智回应:好的,小智在。") + + # 语音合成回应 "好的,小智在。" + response_audio = text_to_speech("好的,小智在。请说出你的问题。", token, output_file="wakeup_response.mp3") + if response_audio: + play_audio(response_audio) # 播放唤醒成功音频 + + print("请说出您的问题:") + record_audio_vad(filename="query.wav") + user_query = speech_recognition("query.wav", token) + if user_query: + print("用户说:", user_query) + # 使用 DeepSeek-R1 模型获取回答 + answer = deepseek_conversation(user_query) + print("小智回答:", answer) + # 使用百度语音合成将回答转换为语音 + audio_file = text_to_speech(answer, token, output_file="answer.mp3") + if audio_file: + play_audio(audio_file) + else: + print("未能识别您的问题,请重试。") + + time.sleep(1) + + +if __name__ == '__main__': + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..84e8cf1 Binary files /dev/null and b/requirements.txt differ diff --git a/test.py b/test.py new file mode 100644 index 0000000..4db0975 --- /dev/null +++ b/test.py @@ -0,0 +1,23 @@ + +BAIDU_API_KEY = "gQyEX2mdkEa3gHvaYxcXMSv3" +BAIDU_SECRET_KEY = "M5s4mMH3B5yeX5LDP4RME3rdlxATb3lO" +# 设备唯一标识(可设置为行空板的设备 ID 或 MAC 地址) +DEVICE_ID = "your_device_id" + +# DeepSeek API 配置(请替换为实际接口地址及参数) +DEEPSEEK_API_URL = "https://api.deepseek.com" +DEEPSEEK_API_KEY = "sk-f15b44b6b3344cdd820e59acebce9d2c" + +""" +curl https://api.deepseek.com/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ + -d '{ + "model": "deepseek-chat", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello!"} + ], + "stream": false + }' +""" \ No newline at end of file diff --git a/流程图.md b/流程图.md new file mode 100644 index 0000000..72f6246 --- /dev/null +++ b/流程图.md @@ -0,0 +1,12 @@ +```mermaid +graph TD + A[用户说话] --> B[语音录音模块pyaudiowebrtcva] + B --> C[语音识别模块百度语音识别API] + C --> D[语义理解模块DeepSeek大模型] + D --> E[语音合成模块百度语音合成API] + E --> F[音频播放模块pygame] + F --> G[返回语音回答] + G --> H{是否继续交互} + H -- 是 --> B + H -- 否 --> I[结束程序] +``` \ No newline at end of file diff --git a/流程图.png b/流程图.png new file mode 100644 index 0000000..fc4af58 Binary files /dev/null and b/流程图.png differ diff --git a/附件2:作品说明文档.doc b/附件2:作品说明文档.doc new file mode 100644 index 0000000..eb084dc Binary files /dev/null and b/附件2:作品说明文档.doc differ diff --git a/附件4:参赛选手承诺书.docx b/附件4:参赛选手承诺书.docx new file mode 100644 index 0000000..60469f1 Binary files /dev/null and b/附件4:参赛选手承诺书.docx differ