diff --git a/bot/bot_factory.py b/bot/bot_factory.py index a6ef2415b..951d13922 100644 --- a/bot/bot_factory.py +++ b/bot/bot_factory.py @@ -67,6 +67,10 @@ def create_bot(bot_type): elif bot_type == const.MiniMax: from bot.minimax.minimax_bot import MinimaxBot return MinimaxBot() + + elif bot_type == const.OLLAMA: + from bot.ollama.ollama_bot import OllamaBot + return OllamaBot() raise RuntimeError diff --git a/bot/ollama/ollama_bot.py b/bot/ollama/ollama_bot.py new file mode 100644 index 000000000..a63d2cd98 --- /dev/null +++ b/bot/ollama/ollama_bot.py @@ -0,0 +1,125 @@ +# encoding:utf-8 + +from bot.bot import Bot +from bot.ollama.ollama_session import OllamaSession +from bot.session_manager import SessionManager +from bridge.context import ContextType +from bridge.reply import Reply, ReplyType +from common.log import logger +from common.token_bucket import TokenBucket +from config import conf, load_config +from bot.ollama.ollama_session import num_tokens_from_messages + +import ollama + +class OllamaBot(Bot): + def __init__(self): + super().__init__() + # set the default api_key + if conf().get("rate_limit_chatgpt"): + self.tb4chatgpt = TokenBucket(conf().get("rate_limit_chatgpt", 20)) + conf_model = conf().get("model") or "gpt-3.5-turbo" + self.sessions = SessionManager(OllamaSession, model=conf_model) + + self.client = ollama.Client( + host=conf().get("ollama_host", "http://localhost:11434") + ) + + self.args = { + "model": conf_model, # 对话模型的名称 + "temperature": conf().get("temperature", 0.9), # 值在[0,1]之间,越大表示回复越具有不确定性 + # "max_tokens":4096, # 回复最大的字符数 + "top_p": conf().get("top_p", 1), + "frequency_penalty": conf().get("frequency_penalty", 0.0), # [-2,2]之间,该值越大则更倾向于产生不同的内容 + "presence_penalty": conf().get("presence_penalty", 0.0), # [-2,2]之间,该值越大则更倾向于产生不同的内容 + "request_timeout": conf().get("request_timeout", None), # 请求超时时间,openai接口默认设置为600,对于难问题一般需要较长时间 + "timeout": conf().get("request_timeout", None), # 重试超时时间,在这个时间内,将会自动重试 + } + + + def reply(self, query, context=None): + # acquire reply content + if context.type == ContextType.TEXT: + logger.info("[CHATGPT] query={}".format(query)) + + session_id = context["session_id"] + reply = None + clear_memory_commands = conf().get("clear_memory_commands", ["#清除记忆"]) + if query in clear_memory_commands: + self.sessions.clear_session(session_id) + reply = Reply(ReplyType.INFO, "记忆已清除") + elif query == "#清除所有": + self.sessions.clear_all_session() + reply = Reply(ReplyType.INFO, "所有人记忆已清除") + elif query == "#更新配置": + load_config() + reply = Reply(ReplyType.INFO, "配置已更新") + if reply: + return reply + session = self.sessions.session_query(query, session_id) + logger.debug("[OLLAMA] session query={}".format(session.messages)) + + model = context.get("gpt_model") + new_args = None + if model: + new_args = self.args.copy() + new_args["model"] = model + + reply_content = self.reply_text(session, args=new_args) + logger.debug( + "[OLLAMA] new_query={}, session_id={}, reply_cont={}, completion_tokens={}".format( + session.messages, + session_id, + reply_content["content"], + reply_content["completion_tokens"], + ) + ) + if reply_content["completion_tokens"] == 0 and len(reply_content["content"]) > 0: + reply = Reply(ReplyType.ERROR, reply_content["content"]) + elif reply_content["completion_tokens"] > 0: + self.sessions.session_reply(reply_content["content"], session_id, reply_content["total_tokens"]) + reply = Reply(ReplyType.TEXT, reply_content["content"]) + else: + reply = Reply(ReplyType.ERROR, reply_content["content"]) + logger.debug("[OLLAMA] reply {} used 0 tokens.".format(reply_content)) + return reply + + elif context.type == ContextType.IMAGE_CREATE: + ok, retstring = self.create_img(query, 0) + reply = None + if ok: + reply = Reply(ReplyType.IMAGE_URL, retstring) + else: + reply = Reply(ReplyType.ERROR, retstring) + return reply + else: + reply = Reply(ReplyType.ERROR, "Bot不支持处理{}类型的消息".format(context.type)) + return reply + + def reply_text(self, session: OllamaSession, args=None) -> dict: + """ + call Ollama's ChatCompletion to get the answer + :param session: a conversation session + :param session_id: session id + :param retry_count: retry count + :return: {} + """ + try: + # if api_key == None, the default openai.api_key will be used + if args is None: + args = self.args + response = self.client.chat(messages=session.messages, model=args["model"], options=args) + # 计算 total_tokens + total_tokens = num_tokens_from_messages(session.messages, args["model"]) + # 计算 completion_tokens + completion_tokens = num_tokens_from_messages([{"role": "assistant", "content": response["message"]["content"]}]) + return { + "total_tokens": total_tokens + completion_tokens, + "completion_tokens": completion_tokens, + "content": response["message"]["content"], + } + except Exception as e: + result = {"completion_tokens": 0, "content": "我现在有点累了,等会再来吧"} + logger.exception("[CHATGPT] Exception: {}".format(e)) + self.sessions.clear_session(session.session_id) + return result diff --git a/bot/ollama/ollama_session.py b/bot/ollama/ollama_session.py new file mode 100644 index 000000000..7338cde83 --- /dev/null +++ b/bot/ollama/ollama_session.py @@ -0,0 +1,87 @@ +from bot.session_manager import Session +from common.log import logger + +""" + e.g. [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Who won the world series in 2020?"}, + {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."}, + {"role": "user", "content": "Where was it played?"} + ] +""" + + +class OllamaSession(Session): + def __init__(self, session_id, system_prompt=None, model="gpt-3.5-turbo"): + super().__init__(session_id, system_prompt) + self.model = model + self.reset() + + def discard_exceeding(self, max_tokens, cur_tokens=None): + precise = True + try: + cur_tokens = self.calc_tokens() + except Exception as e: + precise = False + if cur_tokens is None: + raise e + logger.debug("Exception when counting tokens precisely for query: {}".format(e)) + while cur_tokens > max_tokens: + if len(self.messages) > 2: + self.messages.pop(1) + elif len(self.messages) == 2 and self.messages[1]["role"] == "assistant": + self.messages.pop(1) + if precise: + cur_tokens = self.calc_tokens() + else: + cur_tokens = cur_tokens - max_tokens + break + elif len(self.messages) == 2 and self.messages[1]["role"] == "user": + logger.warn("user message exceed max_tokens. total_tokens={}".format(cur_tokens)) + break + else: + logger.debug("max_tokens={}, total_tokens={}, len(messages)={}".format(max_tokens, cur_tokens, len(self.messages))) + break + if precise: + cur_tokens = self.calc_tokens() + else: + cur_tokens = cur_tokens - max_tokens + return cur_tokens + + def calc_tokens(self): + return num_tokens_from_messages(self.messages, self.model) + + +# refer to https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb +def num_tokens_from_messages(messages, model="gpt-3.5-turbo"): + """Returns the number of tokens used by a list of messages.""" + import tiktoken + + model = "gpt-3.5-turbo" + + try: + encoding = tiktoken.encoding_for_model(model) + except KeyError: + logger.debug("Warning: model not found. Using cl100k_base encoding.") + encoding = tiktoken.get_encoding("cl100k_base") + + tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n + tokens_per_name = -1 # if there's a name, the role is omitted + + num_tokens = 0 + for message in messages: + num_tokens += tokens_per_message + for key, value in message.items(): + num_tokens += len(encoding.encode(value)) + if key == "name": + num_tokens += tokens_per_name + num_tokens += 3 # every reply is primed with <|start|>assistant<|message|> + return num_tokens + + +def num_tokens_by_character(messages): + """Returns the number of tokens used by a list of messages.""" + tokens = 0 + for msg in messages: + tokens += len(msg["content"]) + return tokens diff --git a/common/const.py b/common/const.py index 9eb8795da..acf6fc9d7 100644 --- a/common/const.py +++ b/common/const.py @@ -9,6 +9,7 @@ CLAUDEAPI= "claudeAPI" # 通过Claude api调用模型 QWEN = "qwen" # 旧版通义模型 QWEN_DASHSCOPE = "dashscope" # 通义新版sdk和api key +OLLAMA = "ollama" GEMINI = "gemini" # gemini-1.0-pro diff --git a/config-template.json b/config-template.json index d0268d3b1..a3a32cbf5 100644 --- a/config-template.json +++ b/config-template.json @@ -1,5 +1,7 @@ { "channel_type": "wx", + "bot_type": "ollama", + "ollama_host": "http://localhost:11434", "model": "", "open_ai_api_key": "YOUR API KEY", "claude_api_key": "YOUR API KEY", diff --git a/config.py b/config.py index a28c5b959..34197e51b 100644 --- a/config.py +++ b/config.py @@ -179,6 +179,8 @@ "Minimax_api_key": "", "Minimax_group_id": "", "Minimax_base_url": "", + + "ollama_host": "http://localhost:11434", # ollama host } diff --git a/plugins/godcmd/godcmd.py b/plugins/godcmd/godcmd.py index 3e9d3eabd..61b1e833a 100644 --- a/plugins/godcmd/godcmd.py +++ b/plugins/godcmd/godcmd.py @@ -313,7 +313,7 @@ def on_handle_context(self, e_context: EventContext): except Exception as e: ok, result = False, "你没有设置私有GPT模型" elif cmd == "reset": - if bottype in [const.OPEN_AI, const.CHATGPT, const.CHATGPTONAZURE, const.LINKAI, const.BAIDU, const.XUNFEI, const.QWEN, const.GEMINI, const.ZHIPU_AI, const.CLAUDEAPI]: + if bottype in [const.OPEN_AI, const.CHATGPT, const.CHATGPTONAZURE, const.LINKAI, const.BAIDU, const.XUNFEI, const.QWEN, const.GEMINI, const.ZHIPU_AI, const.CLAUDEAPI, const.OLLAMA]: bot.sessions.clear_session(session_id) if Bridge().chat_bots.get(bottype): Bridge().chat_bots.get(bottype).sessions.clear_session(session_id) @@ -339,7 +339,7 @@ def on_handle_context(self, e_context: EventContext): ok, result = True, "配置已重载" elif cmd == "resetall": if bottype in [const.OPEN_AI, const.CHATGPT, const.CHATGPTONAZURE, const.LINKAI, - const.BAIDU, const.XUNFEI, const.QWEN, const.GEMINI, const.ZHIPU_AI, const.MOONSHOT]: + const.BAIDU, const.XUNFEI, const.QWEN, const.GEMINI, const.ZHIPU_AI, const.MOONSHOT, const.OLLAMA]: channel.cancel_all_session() bot.sessions.clear_all_session() ok, result = True, "重置所有会话成功"