xiaozhiAI/app.py

208 lines
6.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
import base64
import time
import wave
import pyaudio
import pygame
import webrtcvad
from openai import OpenAI
# --------------------- 配置参数 ---------------------
# 百度智能云 API 配置(请替换为您的 API Key 和 Secret Key
BAIDU_API_KEY = "4icZSO1OlMCU2ZiRMhgGCXFu"
BAIDU_SECRET_KEY = "6wJldJ08m1jIX9hb0ULcJrIJ9D1OJW3c"
# DeepSeek API 配置(请替换为您的 DeepSeek API Key
DEEPSEEK_API_KEY = "sk-f15b44b6b3344cdd820e59acebce9d2c"
# 录音参数
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
# 设备 ID可以随意设定
DEVICE_ID = "raspberry_pi"
# --------------------- 工具函数 ---------------------
def get_baidu_token():
"""获取百度智能云 API 访问令牌"""
url = "https://aip.baidubce.com/oauth/2.0/token"
params = {
"grant_type": "client_credentials",
"client_id": BAIDU_API_KEY,
"client_secret": BAIDU_SECRET_KEY,
}
response = requests.post(url, data=params)
if response.status_code == 200:
return response.json().get("access_token")
print("获取百度 Token 失败")
return None
def record_audio_vad(filename, max_duration=10):
"""使用 WebRTC VAD 语音活动检测实现自动录音"""
vad = webrtcvad.Vad(1) # 设置 VAD 灵敏度0-3越大越严格
p = pyaudio.PyAudio()
# 🛠 **确保录音格式符合 WebRTC VAD 要求**
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=320)
print("开始录音(自动检测静音停止)...")
frames = []
silence_count = 0
max_silence = 150 # 允许最多 1 秒静音30 帧)
while True:
data = stream.read(320, exception_on_overflow=False) # **🛠 WebRTC VAD 需要 10ms 帧大小**
frames.append(data)
# 检查是否有语音
is_speech = vad.is_speech(data, 16000)
silence_count = 0 if is_speech else silence_count + 1
if silence_count > max_silence: # 如果连续 1 秒静音,则停止录音
print("检测到静音,录音结束。")
break
# 限制最大录音时长
if len(frames) > int((16000 / 320) * max_duration):
print("达到最大录音时长,录音结束。")
break
stream.stop_stream()
stream.close()
p.terminate()
# 保存录音为 WAV 文件
wf = wave.open(filename, 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
wf.setframerate(16000)
wf.writeframes(b''.join(frames))
wf.close()
def speech_recognition(audio_file, token):
"""调用百度语音识别 API 将音频转文本"""
with open(audio_file, "rb") as f:
speech_data = f.read()
speech_base64 = base64.b64encode(speech_data).decode('utf-8')
payload = {
"format": "wav",
"rate": RATE,
"channel": 1,
"token": token,
"cuid": DEVICE_ID,
"len": len(speech_data),
"speech": speech_base64,
"word_list": ["小智","小志","小至"] # 添加热词,提高识别准确率
}
url = "http://vop.baidu.com/server_api"
headers = {'Content-Type': 'application/json'}
response = requests.post(url, json=payload, headers=headers)
result = response.json()
if result.get("err_no") == 0:
return result.get("result", [""])[0]
print("语音识别错误:", result.get("err_msg"))
return None
def wake_word_detected(text):
"""检查文本是否包含唤醒词"""
wake_words = ["小智", "小志", "小知", "晓智"]
return any(word in text for word in wake_words)
def deepseek_conversation(user_text):
"""调用 DeepSeek API 获取 AI 回答"""
try:
client = OpenAI(api_key=DEEPSEEK_API_KEY, base_url="https://api.deepseek.com")
response = client.chat.completions.create(
model="deepseek-chat",
messages=[{"role": "system", "content": "你是一名叫小智的助手回复不需要使用markdown格式请直接以文本形式回复。"},
{"role": "user", "content": user_text}],
stream=False
)
return response.choices[0].message.content
except Exception as e:
print("DeepSeek API 调用异常:", e)
return "抱歉,我无法获取答案。"
def text_to_speech(text, token, output_file="answer.mp3"):
"""调用百度语音合成 API将文本转换为语音"""
MAX_CHAR = 1024
text = text[:MAX_CHAR] if len(text) > MAX_CHAR else text
params = {
"tex": text,
"tok": token,
"cuid": DEVICE_ID,
"ctp": 1,
"lan": "zh"
}
url = "http://tsn.baidu.com/text2audio"
response = requests.post(url, data=params)
if response.headers.get('Content-Type') == "audio/mp3":
with open(output_file, "wb") as f:
f.write(response.content)
return output_file
print("语音合成错误:", response.text)
return None
def play_audio(file_path):
"""播放音频文件"""
pygame.mixer.init()
pygame.mixer.music.load(file_path)
pygame.mixer.music.play()
while pygame.mixer.music.get_busy():
time.sleep(0.1)
# --------------------- 主程序 ---------------------
def main():
print("启动智能助手小智...")
token = get_baidu_token()
if not token:
return
while True:
print("等待唤醒词 '小智' ...")
record_audio_vad("wake.wav", max_duration=3)
wake_text = speech_recognition("wake.wav", token)
if wake_text and "小智" in wake_text:
print("唤醒成功,小智回应:好的,小智在。")
# 语音合成回应 "好的,小智在。"
response_audio = text_to_speech("好的,小智在。请说出你的问题。", token, output_file="wakeup_response.mp3")
if response_audio:
play_audio(response_audio) # 播放唤醒成功音频
print("请说出您的问题:")
record_audio_vad(filename="query.wav")
user_query = speech_recognition("query.wav", token)
if user_query:
print("用户说:", user_query)
# 使用 DeepSeek-R1 模型获取回答
answer = deepseek_conversation(user_query)
print("小智回答:", answer)
# 使用百度语音合成将回答转换为语音
audio_file = text_to_speech(answer, token, output_file="answer.mp3")
if audio_file:
play_audio(audio_file)
else:
print("未能识别您的问题,请重试。")
time.sleep(1)
if __name__ == '__main__':
main()