Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Google voice Support #385

Merged
merged 8 commits into from
Mar 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ venv*
config.json
QR.png
nohup.out
tmp
14 changes: 13 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,14 @@ cd chatgpt-on-wechat/
```bash
pip3 install itchat-uos==1.5.0.dev0
pip3 install --upgrade openai

如果使用百度的语音识别,需要安装百度的pythonSDK
pip3 install baidu-aip chardet
如果使用google的语音识别,需要安装speech_recognition和依赖的ffmpeg和espeak
pip3 install SpeechRecognition
--在MacOS中安装ffmpeg,brew install ffmpeg espeak
--在Windows中安装ffmpeg,下载ffmpeg.exe
--在Linux中安装ffmpeg,apt-get install ffmpeg espeak
```
注:`itchat-uos`使用指定版本1.5.0.dev0,`openai`使用最新版本,需高于0.27.0。

Expand Down Expand Up @@ -112,7 +120,11 @@ cp config-template.json config.json
+ 默认只要被人 @ 就会触发机器人自动回复;另外群聊天中只要检测到以 "@bot" 开头的内容,同样会自动回复(方便自己触发),这对应配置项 `group_chat_prefix`
+ 可选配置: `group_name_keyword_white_list`配置项支持模糊匹配群名称,`group_chat_keyword`配置项则支持模糊匹配群消息内容,用法与上述两个配置项相同。(Contributed by [evolay](https://github.com/evolay))

**3.其他配置**
**3.语音识别**
+ 配置`speech_recognition=true`开启语音识别,默认使用openai的whisper模型
+ 配置`voice_reply_voice=true`语音回复语音,但是需要配置对应语音合成平台的key,由于itchat协议的限制,只能发送语音mp3文件。使用wechaty则回复的是微信语音。

**4.其他配置**

+ `proxy`:由于目前 `openai` 接口国内无法访问,需配置代理客户端的地址,详情参考 [#351](https://github.com/zhayujie/chatgpt-on-wechat/issues/351)
+ 对于图像生成,在满足个人或群组触发条件外,还需要额外的关键词前缀来触发,对应配置 `image_create_prefix `
Expand Down
7 changes: 7 additions & 0 deletions bridge/bridge.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from bot import bot_factory
from voice import voice_factory


class Bridge(object):
Expand All @@ -7,3 +8,9 @@ def __init__(self):

def fetch_reply_content(self, query, context):
return bot_factory.create_bot("chatGPT").reply(query, context)

def fetch_voice_to_text(self, voiceFile):
return voice_factory.create_voice("openai").voiceToText(voiceFile)

def fetch_text_to_voice(self, text):
return voice_factory.create_voice("baidu").textToVoice(text)
8 changes: 7 additions & 1 deletion channel/channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def startup(self):
"""
raise NotImplementedError

def handle(self, msg):
def handle_text(self, msg):
"""
process received msg
:param msg: message object
Expand All @@ -29,3 +29,9 @@ def send(self, msg, receiver):

def build_reply_content(self, query, context=None):
return Bridge().fetch_reply_content(query, context)

def build_voice_to_text(self, voice_file):
return Bridge().fetch_voice_to_text(voice_file)

def build_text_to_voice(self, text):
return Bridge().fetch_text_to_voice(text)
62 changes: 51 additions & 11 deletions channel/wechat/wechat_channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
"""
wechat channel
"""

import itchat
import json
from itchat.content import *
from channel.channel import Channel
from concurrent.futures import ThreadPoolExecutor
from common.log import logger
from common.tmp_dir import TmpDir
from config import conf
import requests
import io
Expand All @@ -18,7 +20,7 @@

@itchat.msg_register(TEXT)
def handler_single_msg(msg):
WechatChannel().handle(msg)
WechatChannel().handle_text(msg)
return None


Expand All @@ -28,6 +30,12 @@ def handler_group_msg(msg):
return None


@itchat.msg_register(VOICE)
def handler_single_voice(msg):
WechatChannel().handle_voice(msg)
return None


class WechatChannel(Channel):
def __init__(self):
pass
Expand All @@ -39,12 +47,27 @@ def startup(self):
# start message listener
itchat.run()

def handle(self, msg):
logger.debug("[WX]receive msg: " + json.dumps(msg, ensure_ascii=False))
def handle_voice(self, msg):
if conf().get('speech_recognition') != True :
return
logger.debug("[WX]receive voice msg: " + msg['FileName'])
thread_pool.submit(self._do_handle_voice, msg)

def _do_handle_voice(self, msg):
fileName = TmpDir().path() + msg['FileName']
msg.download(fileName)
content = super().build_voice_to_text(fileName)
self._handle_single_msg(msg, content, conf().get('voice_reply_voice'))

def handle_text(self, msg):
logger.debug("[WX]receive text msg: " + json.dumps(msg, ensure_ascii=False))
content = msg['Text']
self._handle_single_msg(msg, content, False)

def _handle_single_msg(self, msg, content, reply_voice=False):
from_user_id = msg['FromUserName']
to_user_id = msg['ToUserName'] # 接收人id
other_user_id = msg['User']['UserName'] # 对手方id
content = msg['Text']
match_prefix = self.check_prefix(content, conf().get('single_chat_prefix'))
if "」\n- - - - - - - - - - - - - - -" in content:
logger.debug("[WX]reference query skipped")
Expand All @@ -60,9 +83,10 @@ def handle(self, msg):
if img_match_prefix:
content = content.split(img_match_prefix, 1)[1].strip()
thread_pool.submit(self._do_send_img, content, from_user_id)
else:
thread_pool.submit(self._do_send, content, from_user_id)

elif reply_voice:
thread_pool.submit(self._do_send_voice, content, from_user_id)
else :
thread_pool.submit(self._do_send_text, content, from_user_id)
elif to_user_id == other_user_id and match_prefix:
# 自己给好友发送消息
str_list = content.split(match_prefix, 1)
Expand All @@ -72,8 +96,10 @@ def handle(self, msg):
if img_match_prefix:
content = content.split(img_match_prefix, 1)[1].strip()
thread_pool.submit(self._do_send_img, content, to_user_id)
elif reply_voice:
thread_pool.submit(self._do_send_voice, content, to_user_id)
else:
thread_pool.submit(self._do_send, content, to_user_id)
thread_pool.submit(self._do_send_text, content, to_user_id)


def handle_group(self, msg):
Expand Down Expand Up @@ -105,10 +131,24 @@ def handle_group(self, msg):
thread_pool.submit(self._do_send_group, content, msg)

def send(self, msg, receiver):
logger.info('[WX] sendMsg={}, receiver={}'.format(msg, receiver))
itchat.send(msg, toUserName=receiver)
logger.info('[WX] sendMsg={}, receiver={}'.format(msg, receiver))

def _do_send(self, query, reply_user_id):
def _do_send_voice(self, query, reply_user_id):
try:
if not query:
return
context = dict()
context['from_user_id'] = reply_user_id
reply_text = super().build_reply_content(query, context)
if reply_text:
replyFile = super().build_text_to_voice(reply_text)
itchat.send_file(replyFile, toUserName=reply_user_id)
logger.info('[WX] sendFile={}, receiver={}'.format(replyFile, reply_user_id))
except Exception as e:
logger.exception(e)

def _do_send_text(self, query, reply_user_id):
try:
if not query:
return
Expand Down Expand Up @@ -138,8 +178,8 @@ def _do_send_img(self, query, reply_user_id):
image_storage.seek(0)

# 图片发送
logger.info('[WX] sendImage, receiver={}'.format(reply_user_id))
itchat.send_image(image_storage, reply_user_id)
logger.info('[WX] sendImage, receiver={}'.format(reply_user_id))
except Exception as e:
logger.exception(e)

Expand Down
20 changes: 20 additions & 0 deletions common/tmp_dir.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

import os
import pathlib
from config import conf


class TmpDir(object):
"""A temporary directory that is deleted when the object is destroyed.
"""

tmpFilePath = pathlib.Path('./tmp/')

def __init__(self):
pathExists = os.path.exists(self.tmpFilePath)
if not pathExists and conf().get('speech_recognition') == True:
os.makedirs(self.tmpFilePath)

def path(self):
return str(self.tmpFilePath) + '/'

5 changes: 5 additions & 0 deletions config-template.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
"group_name_white_list": ["ChatGPT测试群", "ChatGPT测试群2"],
"image_create_prefix": ["画", "看", "找"],
"conversation_max_tokens": 1000,
"speech_recognition": false,
"voice_reply_voice": false,
"baidu_app_id": "YOUR BAIDU APP ID",
"baidu_api_key": "YOUR BAIDU API KEY",
"baidu_secret_key": "YOUR BAIDU SERVICE KEY",
"character_desc": "你是ChatGPT, 一个由OpenAI训练的大型语言模型, 你旨在回答并解决人们的任何问题,并且可以使用多种语言与人交流。",
"expires_in_seconds": 3600
}
36 changes: 36 additions & 0 deletions voice/baidu/baidu_voice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@

"""
baidu voice service
"""
import time
from aip import AipSpeech
from common.log import logger
from common.tmp_dir import TmpDir
from voice.voice import Voice
from config import conf

class BaiduVoice(Voice):
APP_ID = conf().get('baidu_app_id')
API_KEY = conf().get('baidu_api_key')
SECRET_KEY = conf().get('baidu_secret_key')
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

def __init__(self):
pass

def voiceToText(self, voice_file):
pass

def textToVoice(self, text):
result = self.client.synthesis(text, 'zh', 1, {
'spd': 5, 'pit': 5, 'vol': 5, 'per': 111
})
if not isinstance(result, dict):
fileName = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
with open(fileName, 'wb') as f:
f.write(result)
logger.info('[Baidu] textToVoice text={} voice file name={}'.format(text, fileName))
return fileName
else:
logger.error('[Baidu] textToVoice error={}'.format(result))
return None
51 changes: 51 additions & 0 deletions voice/google/google_voice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@

"""
google voice service
"""

import pathlib
import subprocess
import time
import speech_recognition
import pyttsx3
from common.log import logger
from common.tmp_dir import TmpDir
from voice.voice import Voice


class GoogleVoice(Voice):
recognizer = speech_recognition.Recognizer()
engine = pyttsx3.init()

def __init__(self):
# 语速
self.engine.setProperty('rate', 125)
# 音量
self.engine.setProperty('volume', 1.0)
# 0为男声,1为女声
voices = self.engine.getProperty('voices')
self.engine.setProperty('voice', voices[1].id)

def voiceToText(self, voice_file):
new_file = voice_file.replace('.mp3', '.wav')
subprocess.call('ffmpeg -i ' + voice_file +
' -acodec pcm_s16le -ac 1 -ar 16000 ' + new_file, shell=True)
with speech_recognition.AudioFile(new_file) as source:
audio = self.recognizer.record(source)
try:
text = self.recognizer.recognize_google(audio, language='zh-CN')
logger.info(
'[Google] voiceToText text={} voice file name={}'.format(text, voice_file))
return text
except speech_recognition.UnknownValueError:
return "抱歉,我听不懂。"
except speech_recognition.RequestError as e:
return "抱歉,无法连接到 Google 语音识别服务;{0}".format(e)

def textToVoice(self, text):
textFile = TmpDir().path() + '语音回复_' + str(int(time.time())) + '.mp3'
self.engine.save_to_file(text, textFile)
self.engine.runAndWait()
logger.info(
'[Google] textToVoice text={} voice file name={}'.format(text, textFile))
return textFile
27 changes: 27 additions & 0 deletions voice/openai/openai_voice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@

"""
google voice service
"""
import json
import openai
from config import conf
from common.log import logger
from voice.voice import Voice


class OpenaiVoice(Voice):
def __init__(self):
openai.api_key = conf().get('open_ai_api_key')

def voiceToText(self, voice_file):
logger.debug(
'[Openai] voice file name={}'.format(voice_file))
file = open(voice_file, "rb")
reply = openai.Audio.transcribe("whisper-1", file)
text = reply["text"]
logger.info(
'[Openai] voiceToText text={} voice file name={}'.format(text, voice_file))
return text

def textToVoice(self, text):
pass
16 changes: 16 additions & 0 deletions voice/voice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""
Voice service abstract class
"""

class Voice(object):
def voiceToText(self, voice_file):
"""
Send voice to voice service and get text
"""
raise NotImplementedError

def textToVoice(self, text):
"""
Send text to voice service and get voice
"""
raise NotImplementedError
20 changes: 20 additions & 0 deletions voice/voice_factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
voice factory
"""

def create_voice(voice_type):
"""
create a voice instance
:param voice_type: voice type code
:return: voice instance
"""
if voice_type == 'baidu':
from voice.baidu.baidu_voice import BaiduVoice
return BaiduVoice()
elif voice_type == 'google':
from voice.google.google_voice import GoogleVoice
return GoogleVoice()
elif voice_type == 'openai':
from voice.openai.openai_voice import OpenaiVoice
return OpenaiVoice()
raise RuntimeError