Signed-off-by: sairate <sairate@sina.cn>

This commit is contained in:
sairate 2025-05-15 17:34:34 +08:00
commit 8d96dc2307
14 changed files with 302 additions and 0 deletions

3
.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
# 默认忽略的文件
/shelf/
/workspace.xml

10
.idea/ai_api.iml Normal file
View File

@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/.venv" />
</content>
<orderEntry type="jdk" jdkName="Python 3.10 (ai_api)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View File

@ -0,0 +1,14 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<value>
<list size="1">
<item index="0" class="java.lang.String" itemvalue="numpy" />
</list>
</value>
</option>
</inspection_tool>
</profile>
</component>

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

View File

@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="MaterialThemeProjectNewConfig">
<option name="metadata">
<MTProjectMetadataState>
<option name="migrated" value="true" />
<option name="pristineConfig" value="false" />
<option name="userId" value="21c1c7ee:193388d497c:-7ff9" />
</MTProjectMetadataState>
</option>
</component>
</project>

7
.idea/misc.xml Normal file
View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.13" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (ai_api)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/ai_api.iml" filepath="$PROJECT_DIR$/.idea/ai_api.iml" />
</modules>
</component>
</project>

207
app.py Normal file
View File

@ -0,0 +1,207 @@
import requests
import base64
import time
import wave
import pyaudio
import pygame
import webrtcvad
from openai import OpenAI
# --------------------- 配置参数 ---------------------
# 百度智能云 API 配置(请替换为您的 API Key 和 Secret Key
BAIDU_API_KEY = "4icZSO1OlMCU2ZiRMhgGCXFu"
BAIDU_SECRET_KEY = "6wJldJ08m1jIX9hb0ULcJrIJ9D1OJW3c"
# DeepSeek API 配置(请替换为您的 DeepSeek API Key
DEEPSEEK_API_KEY = "sk-f15b44b6b3344cdd820e59acebce9d2c"
# 录音参数
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
# 设备 ID可以随意设定
DEVICE_ID = "raspberry_pi"
# --------------------- 工具函数 ---------------------
def get_baidu_token():
"""获取百度智能云 API 访问令牌"""
url = "https://aip.baidubce.com/oauth/2.0/token"
params = {
"grant_type": "client_credentials",
"client_id": BAIDU_API_KEY,
"client_secret": BAIDU_SECRET_KEY,
}
response = requests.post(url, data=params)
if response.status_code == 200:
return response.json().get("access_token")
print("获取百度 Token 失败")
return None
def record_audio_vad(filename, max_duration=10):
"""使用 WebRTC VAD 语音活动检测实现自动录音"""
vad = webrtcvad.Vad(1) # 设置 VAD 灵敏度0-3越大越严格
p = pyaudio.PyAudio()
# 🛠 **确保录音格式符合 WebRTC VAD 要求**
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=320)
print("开始录音(自动检测静音停止)...")
frames = []
silence_count = 0
max_silence = 150 # 允许最多 1 秒静音30 帧)
while True:
data = stream.read(320, exception_on_overflow=False) # **🛠 WebRTC VAD 需要 10ms 帧大小**
frames.append(data)
# 检查是否有语音
is_speech = vad.is_speech(data, 16000)
silence_count = 0 if is_speech else silence_count + 1
if silence_count > max_silence: # 如果连续 1 秒静音,则停止录音
print("检测到静音,录音结束。")
break
# 限制最大录音时长
if len(frames) > int((16000 / 320) * max_duration):
print("达到最大录音时长,录音结束。")
break
stream.stop_stream()
stream.close()
p.terminate()
# 保存录音为 WAV 文件
wf = wave.open(filename, 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
wf.setframerate(16000)
wf.writeframes(b''.join(frames))
wf.close()
def speech_recognition(audio_file, token):
"""调用百度语音识别 API 将音频转文本"""
with open(audio_file, "rb") as f:
speech_data = f.read()
speech_base64 = base64.b64encode(speech_data).decode('utf-8')
payload = {
"format": "wav",
"rate": RATE,
"channel": 1,
"token": token,
"cuid": DEVICE_ID,
"len": len(speech_data),
"speech": speech_base64,
"word_list": ["小智","小志","小至"] # 添加热词,提高识别准确率
}
url = "http://vop.baidu.com/server_api"
headers = {'Content-Type': 'application/json'}
response = requests.post(url, json=payload, headers=headers)
result = response.json()
if result.get("err_no") == 0:
return result.get("result", [""])[0]
print("语音识别错误:", result.get("err_msg"))
return None
def wake_word_detected(text):
"""检查文本是否包含唤醒词"""
wake_words = ["小智", "小志", "小知", "晓智"]
return any(word in text for word in wake_words)
def deepseek_conversation(user_text):
"""调用 DeepSeek API 获取 AI 回答"""
try:
client = OpenAI(api_key=DEEPSEEK_API_KEY, base_url="https://api.deepseek.com")
response = client.chat.completions.create(
model="deepseek-chat",
messages=[{"role": "system", "content": "你是一名叫小智的助手回复不需要使用markdown格式请直接以文本形式回复。"},
{"role": "user", "content": user_text}],
stream=False
)
return response.choices[0].message.content
except Exception as e:
print("DeepSeek API 调用异常:", e)
return "抱歉,我无法获取答案。"
def text_to_speech(text, token, output_file="answer.mp3"):
"""调用百度语音合成 API将文本转换为语音"""
MAX_CHAR = 1024
text = text[:MAX_CHAR] if len(text) > MAX_CHAR else text
params = {
"tex": text,
"tok": token,
"cuid": DEVICE_ID,
"ctp": 1,
"lan": "zh"
}
url = "http://tsn.baidu.com/text2audio"
response = requests.post(url, data=params)
if response.headers.get('Content-Type') == "audio/mp3":
with open(output_file, "wb") as f:
f.write(response.content)
return output_file
print("语音合成错误:", response.text)
return None
def play_audio(file_path):
"""播放音频文件"""
pygame.mixer.init()
pygame.mixer.music.load(file_path)
pygame.mixer.music.play()
while pygame.mixer.music.get_busy():
time.sleep(0.1)
# --------------------- 主程序 ---------------------
def main():
print("启动智能助手小智...")
token = get_baidu_token()
if not token:
return
while True:
print("等待唤醒词 '小智' ...")
record_audio_vad("wake.wav", max_duration=3)
wake_text = speech_recognition("wake.wav", token)
if wake_text and "小智" in wake_text:
print("唤醒成功,小智回应:好的,小智在。")
# 语音合成回应 "好的,小智在。"
response_audio = text_to_speech("好的,小智在。请说出你的问题。", token, output_file="wakeup_response.mp3")
if response_audio:
play_audio(response_audio) # 播放唤醒成功音频
print("请说出您的问题:")
record_audio_vad(filename="query.wav")
user_query = speech_recognition("query.wav", token)
if user_query:
print("用户说:", user_query)
# 使用 DeepSeek-R1 模型获取回答
answer = deepseek_conversation(user_query)
print("小智回答:", answer)
# 使用百度语音合成将回答转换为语音
audio_file = text_to_speech(answer, token, output_file="answer.mp3")
if audio_file:
play_audio(audio_file)
else:
print("未能识别您的问题,请重试。")
time.sleep(1)
if __name__ == '__main__':
main()

BIN
requirements.txt Normal file

Binary file not shown.

23
test.py Normal file
View File

@ -0,0 +1,23 @@
BAIDU_API_KEY = "gQyEX2mdkEa3gHvaYxcXMSv3"
BAIDU_SECRET_KEY = "M5s4mMH3B5yeX5LDP4RME3rdlxATb3lO"
# 设备唯一标识(可设置为行空板的设备 ID 或 MAC 地址)
DEVICE_ID = "your_device_id"
# DeepSeek API 配置(请替换为实际接口地址及参数)
DEEPSEEK_API_URL = "https://api.deepseek.com"
DEEPSEEK_API_KEY = "sk-f15b44b6b3344cdd820e59acebce9d2c"
"""
curl https://api.deepseek.com/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <sk-f15b44b6b3344cdd820e59acebce9d2c>" \
-d '{
"model": "deepseek-chat",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"}
],
"stream": false
}'
"""

12
流程图.md Normal file
View File

@ -0,0 +1,12 @@
```mermaid
graph TD
A[用户说话] --> B[语音录音模块pyaudiowebrtcva]
B --> C[语音识别模块百度语音识别API]
C --> D[语义理解模块DeepSeek大模型]
D --> E[语音合成模块百度语音合成API]
E --> F[音频播放模块pygame]
F --> G[返回语音回答]
G --> H{是否继续交互}
H -- 是 --> B
H -- 否 --> I[结束程序]
```

BIN
流程图.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

Binary file not shown.