feat(project): support cpu-only mode (#359)

* feat(primitive/llm_reranker.py): support siliconcloud api * docs(project): support cpu-only RAG * unittest(service): test unittest passed
InternLM · Aug 15, 2024 · b9cc552 · b9cc552
1 parent e9021d9
commit b9cc552
Show file tree

Hide file tree

Showing 25 changed files with 553 additions and 114 deletions.
diff --git a/README.md b/README.md
@@ -143,9 +143,10 @@ Our Web version has been released to [OpenXLab](https://openxlab.org.cn/apps/det
 
 The following are the GPU memory requirements for different features, the difference lies only in whether the **options are turned on**.
 
-|              Configuration Example               | GPU mem Requirements |                                                                                   Description                                                                                   |                       Verified Devices on Linux System                        |
+|              Configuration Example               | GPU mem Requirements |                                                                                   Description                                                                                   |                       Verified on Linux                        |
 | :----------------------------------------------: | :------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------: |
-|         [config-2G.ini](./config-2G.ini)         |         2GB          | Use openai API (such as [kimi](https://kimi.moonshot.cn), [deepseek](https://platform.deepseek.com/usage), [stepfun](https://platform.stepfun.com/) and [siliconcloud](https://siliconflow.cn/)) to search for text only | ![](https://img.shields.io/badge/1660ti%206G-passed-blue?style=for-the-badge) |
+|         [config-cpu.ini](./config-cpu.ini)         |   -    | Use [siliconcloud](https://siliconflow.cn/) API <br/> for text only | ![](https://img.shields.io/badge/x86-passed-blue?style=for-the-badge) |
+|         [config-2G.ini](./config-2G.ini)         |         2GB          | Use openai API (such as [kimi](https://kimi.moonshot.cn), [deepseek](https://platform.deepseek.com/usage) and [stepfun](https://platform.stepfun.com/) to search for text only | ![](https://img.shields.io/badge/1660ti%206G-passed-blue?style=for-the-badge) |
 | [config-multimodal.ini](./config-multimodal.ini) |         10GB         |                                                                Use openai API for LLM, image and text retrieval                                                                 | ![](https://img.shields.io/badge/3090%2024G-passed-blue?style=for-the-badge)  |
 | \[Standard Edition\] [config.ini](./config.ini)  |         19GB         |                                                                    Local deployment of LLM, single modality                                                                     | ![](https://img.shields.io/badge/3090%2024G-passed-blue?style=for-the-badge)  |
 |   [config-advanced.ini](./config-advanced.ini)   |         80GB         |                                                   local LLM, anaphora resolution, single modality, practical for WeChat group                                                   | ![](https://img.shields.io/badge/A100%2080G-passed-blue?style=for-the-badge)  |
@@ -258,6 +259,29 @@ Same as [OpenXlab APP](https://openxlab.org.cn/apps/detail/tpoisonooo/huixiangdo
 
 # 🍴 Other Configurations
 
+## **CPU-only Edition**
+
+If there is no GPU available, model inference can be completed using the [siliconcloud](https://siliconflow.cn/) API.
+
+Taking docker miniconda+Python3.11 as an example, install CPU dependencies and run:
+
+```bash
+# Start container
+docker run -v /path/to/huixiangdou:/huixiangdou -p 7860:7860 -p 23333:23333 -it continuumio/miniconda3 /bin/bash
+# Install dependencies
+apt update
+apt install python-dev libxml2-dev libxslt1-dev antiword unrtf poppler-utils pstotext tesseract-ocr flac ffmpeg lame libmad0 libsox-fmt-mp3 sox libjpeg-dev swig libpulse-dev
+python3 -m pip install -r requirements-cpu.txt
+# Establish knowledge base
+python3 -m huixiangdou.service.feature_store --config_path config-cpu.ini
+# Q&A test
+python3 -m huixiangdou.main --standalone --config_path config-cpu.ini
+# gradio UI
+python3 -m huixiangdou.gradio --config_path config-cpu.ini
+```
+
+If you find the installation too slow, a pre-installed image is provided in [Docker Hub](https://hub.docker.com/repository/docker/tpoisonooo/huixiangdou/tags). Simply replace it when starting the docker.
+
 ## **2G Cost-effective Edition**
 
 If your GPU mem exceeds 1.8G, or you pursue cost-effectiveness. This configuration discards the local LLM and uses remote LLM instead, which is the same as the standard edition.

diff --git a/README_zh.md b/README_zh.md
@@ -143,7 +143,8 @@ Web 版视频教程见 [BiliBili](https://www.bilibili.com/video/BV1S2421N7mn)
 
 |                     配置示例                     | 显存需求 |                                                                                 描述                                                                                 |                             Linux 系统已验证设备                              |
 | :----------------------------------------------: | :------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------: |
-|         [config-2G.ini](./config-2G.ini)         |   2GB    | 用 openai API（如 [kimi](https://kimi.moonshot.cn)、[deepseek](https://platform.deepseek.com/usage)、[stepfun](https://platform.stepfun.com/) 和 [siliconcloud](https://siliconflow.cn/)）<br/>仅检索文本 | ![](https://img.shields.io/badge/1660ti%206G-passed-blue?style=for-the-badge) |
+|         [config-cpu.ini](./config-cpu.ini)         |   -    | 用 [siliconcloud](https://siliconflow.cn/) API <br/>仅检索文本 | ![](https://img.shields.io/badge/x86-passed-blue?style=for-the-badge) |
+|         [config-2G.ini](./config-2G.ini)         |   2GB    | 用 openai API（如 [kimi](https://kimi.moonshot.cn)、[deepseek](https://platform.deepseek.com/usage) 和 [stepfun](https://platform.stepfun.com/)）<br/>仅检索文本 | ![](https://img.shields.io/badge/1660ti%206G-passed-blue?style=for-the-badge) |
 | [config-multimodal.ini](./config-multimodal.ini) |   10GB   |                                                                    用 openai API 做 LLM，图文检索                                                                    | ![](https://img.shields.io/badge/3090%2024G-passed-blue?style=for-the-badge)  |
 |       【标准版】[config.ini](./config.ini)       |   19GB   |                                                                         本地部署 LLM，单模态                                                                         | ![](https://img.shields.io/badge/3090%2024G-passed-blue?style=for-the-badge)  |
 |   [config-advanced.ini](./config-advanced.ini)   |   80GB   |                                                                本地 LLM，指代消歧，单模态，微信群实用                                                                | ![](https://img.shields.io/badge/A100%2080G-passed-blue?style=for-the-badge)  |
@@ -249,7 +250,30 @@ curl -X POST http://127.0.0.1:23333/huixiangdou_inference  -H "Content-Type: app
 
 # 🍴 其他配置
 
-## 2G 实惠版
+## **纯 CPU 版**
+
+若没有 GPU，可以使用 [siliconcloud](https://siliconflow.cn/) API 完成模型推理。
+
+以 docker miniconda+Python3.11 为例，安装 cpu 依赖，运行：
+
+```bash
+# 启动容器
+docker run  -v /path/to/huixiangdou:/huixiangdou  -p 7860:7860 -p 23333:23333  -it continuumio/miniconda3 /bin/bash
+# 装依赖
+apt update
+apt install python-dev libxml2-dev libxslt1-dev antiword unrtf poppler-utils pstotext tesseract-ocr flac ffmpeg lame libmad0 libsox-fmt-mp3 sox libjpeg-dev swig libpulse-dev
+python3 -m pip install -r requirements-cpu.txt
+# 建立知识库
+python3 -m huixiangdou.service.feature_store  --config_path config-cpu.ini
+# 问答测试
+python3 -m huixiangdou.main --standalone --config_path config-cpu.ini
+# gradio UI
+python3 -m huixiangdou.gradio --config_path config-cpu.ini
+```
+
+如果装依赖太慢，[dockerhub 里](https://hub.docker.com/repository/docker/tpoisonooo/huixiangdou/tags)提供了安装好依赖的镜像，docker 启动时替换即可。
+
+## **2G 实惠版**
 
 如果你的显存超过 1.8G，或追求性价比。此配置扔掉了本地 LLM，使用 remote LLM 代替，其他和标准版相同。
 
@@ -277,7 +301,7 @@ remote_llm_model = "alibaba/Qwen1.5-110B-Chat"
 python3 -m huixiangdou.main --standalone --config-path config-2G.ini # 一次启动所有服务
 ```
 
-## 10G 多模态版
+## **10G 多模态版**
 
 如果你有 10G 显存，那么可以进一步支持图文检索。仅需修改 config.ini 使用的模型。
 
@@ -300,7 +324,7 @@ reranker_model_path = "BAAI/bge-reranker-v2-minicpm-layerwise"
 python3 tests/test_query_gradio.py
 ```
 
-## 80G 完整版
+## **80G 完整版**
 
 微信体验群里的 “茴香豆” 开启了全部功能：
 
@@ -318,7 +342,7 @@ python3 tests/test_query_gradio.py
 - [使用 wkteam 微信接入，整合图片、公众号解析和指代消歧](./docs/add_wechat_commercial_zh.md)
 - [使用 rag.py 标注 SFT 训练数据](./docs/rag_annotate_sft_data_zh.md)
 
-## 移动端
+## **移动端**
 
 贡献者提供了[android工具](./android) 完成微信接入。方案基于系统层 API，原理上可以控制任何 UI（不限于通讯软件）。
 

diff --git a/config-cpu.ini b/config-cpu.ini
@@ -0,0 +1,212 @@
+[feature_store]
+# `feature_store.py` use this throttle to distinct `good_questions` and `bad_questions`
+reject_throttle = -1.0
+# text2vec model, support local relative path, huggingface repo and URL.
+# for example:
+#  "maidalun1020/bce-embedding-base_v1"
+#  "BAAI/bge-m3"
+#  "https://api.siliconflow.cn/v1/embeddings"
+embedding_model_path = "https://api.siliconflow.cn/v1/embeddings"
+
+# reranker model, support list:
+#  "maidalun1020/bce-reranker-base_v1"
+#  "BAAI/bge-reranker-v2-minicpm-layerwise"
+#  "https://api.siliconflow.cn/v1/rerank"
+reranker_model_path = "https://api.siliconflow.cn/v1/rerank"
+
+# if using `siliconcloud` API as `embedding_model_path` or `reranker_model_path`, give the token
+api_token = ""
+api_rpm = 800
+work_dir = "workdir"
+
+[web_search]
+engine = "serper"
+# web search engine support ddgs and serper
+# For ddgs, see https://pypi.org/project/duckduckgo-search
+# For serper, check https://serper.dev/api-key to get a free API key
+serper_x_api_key = "YOUR-API-KEY-HERE"
+domain_partial_order = ["openai.com", "pytorch.org", "readthedocs.io", "nvidia.com", "stackoverflow.com", "juejin.cn", "zhuanlan.zhihu.com", "www.cnblogs.com"]
+save_dir = "logs/web_search_result"
+
+[llm]
+enable_local = 0
+enable_remote = 1
+# hybrid llm service address
+client_url = "http://127.0.0.1:8888/inference"
+
+[llm.server]
+# local LLM configuration
+# support "internlm/internlm2-chat-7b", "internlm2_5-7b-chat" and "qwen/qwen-7b-chat-int8"
+# support local path, for example
+# local_llm_path = "/path/to/your/internlm2_5"
+
+local_llm_path = "internlm/internlm2_5-7b-chat"
+local_llm_max_text_length = 3000
+# llm server listen port
+local_llm_bind_port = 8888
+
+# remote LLM service configuration
+# support "gpt", "kimi", "deepseek", "zhipuai", "step", "internlm", "xi-api" and "alles-apin"
+# support "siliconcloud", see https://siliconflow.cn/zh-cn/siliconcloud
+# xi-api and alles-apin is chinese gpt proxy
+# for internlm, see https://internlm.intern-ai.org.cn/api/document
+
+remote_type = "siliconcloud"
+remote_api_key = ""
+# max text length for remote LLM.
+# use 128000 for kimi, 192000 for gpt/xi-api, 16000 for deepseek, 128000 for zhipuai, 40000 for internlm2
+remote_llm_max_text_length = 40000
+# openai API model type, support model list:
+# "auto" for kimi. To save money, we auto select model name by prompt length.
+# "auto" for step to save money, see https://platform.stepfun.com/
+# "gpt-4-0613" for gpt/xi-api,
+# "deepseek-chat" for deepseek,
+# "glm-4" for zhipuai,
+# "gpt-4-1106-preview" for alles-apin or OpenAOE
+# "internlm2-latest" for internlm
+# for example "alibaba/Qwen1.5-110B-Chat", see https://siliconflow.readme.io/reference/chat-completions-1
+remote_llm_model = "alibaba/Qwen1.5-110B-Chat"
+# request per minute
+rpm = 500
+
+[coreference_resolution]
+base_url = 'http://127.0.0.1:9999/v1'
+api_key = 'token-abc123'
+
+[worker]
+# enable web search or not
+enable_web_search = 1
+# enable search enhancement or not
+enable_sg_search = 0
+# enable coreference resolution in `PreprocNode`
+enable_cr = 0
+save_path = "logs/work.txt"
+
+[worker.time]
+enable = 0
+start = "00:00:00"
+end = "23:59:59"
+has_weekday = 1
+
+[sg_search]
+# download `src` from https://github.com/sourcegraph/src-cli#installation
+binary_src_path = "/usr/local/bin/src"
+src_access_token = "YOUR-SRC-ACCESS-TOKEN"
+
+# add your repo here, we just take opencompass and lmdeploy as example
+[sg_search.opencompass]
+github_repo_id = "open-compass/opencompass"
+introduction = "用于评测大型语言模型（LLM）. 它提供了完整的开源可复现的评测框架，支持大语言模型、多模态模型的一站式评测，基于分布式技术，对大参数量模型亦能实现高效评测。评测方向汇总为知识、语言、理解、推理、考试五大能力维度，整合集纳了超过70个评测数据集，合计提供了超过40万个模型评测问题，并提供长文本、安全、代码3类大模型特色技术能力评测。"
+# introduction = "For evaluating Large Language Models (LLMs). It provides a fully open-source, reproducible evaluation framework, supporting one-stop evaluation for large language models and multimodal models. Based on distributed technology, it can efficiently evaluate models with a large number of parameters. The evaluation directions are summarized in five capability dimensions: knowledge, language, understanding, reasoning, and examination. It integrates and collects more than 70 evaluation datasets, providing in total over 400,000 model evaluation questions. Additionally, it offers evaluations for three types of capabilities specific to large models: long text, security, and coding."
+
+[sg_search.lmdeploy]
+github_repo_id = "internlm/lmdeploy"
+introduction = "lmdeploy 是一个用于压缩、部署和服务 LLM（Large Language Model）的工具包。是一个服务端场景下，transformer 结构 LLM 部署工具，支持 GPU 服务端部署，速度有保障，支持 Tensor Parallel，多并发优化，功能全面，包括模型转换、缓存历史会话的 cache feature 等. 它还提供了 WebUI、命令行和 gRPC 客户端接入。"
+# introduction = "lmdeploy is a toolkit for compressing, deploying, and servicing Large Language Models (LLMs). It is a deployment tool for transformer-structured LLMs in server-side scenarios, supporting GPU server-side deployment, ensuring speed, and supporting Tensor Parallel along with optimizations for multiple concurrent processes. It offers comprehensive features including model conversion, cache features for caching historical sessions and more. Additionally, it provides access via WebUI, command line, and gRPC clients."
+# add your repo here, we just take opencompass and lmdeploy as example
+
+[sg_search.mmpose]
+github_repo_id = "open-mmlab/mmpose"
+introduction = "MMPose is an open-source toolbox for pose estimation based on PyTorch"
+
+[sg_search.mmdetection]
+github_repo_id = "open-mmlab/mmdetection"
+introduction = "MMDetection is an open source object detection toolbox based on PyTorch."
+
+[sg_search.huixiangdou]
+github_repo_id = "internlm/huixiangdou"
+introduction = "茴香豆是一个基于 LLM 的群聊知识助手。设计拒答、响应两阶段 pipeline 应对群聊场景，解答问题同时不会消息泛滥。"
+
+[sg_search.xtuner]
+github_repo_id = "internlm/xtuner"
+introduction = "XTuner is an efficient, flexible and full-featured toolkit for fine-tuning large models."
+
+[sg_search.mmyolo]
+github_repo_id = "open-mmlab/mmyolo"
+introduction = "OpenMMLab YOLO series toolbox and benchmark. Implemented RTMDet, RTMDet-Rotated,YOLOv5, YOLOv6, YOLOv7, YOLOv8,YOLOX, PPYOLOE, etc."
+
+[sg_search.Amphion]
+github_repo_id = "open-mmlab/Amphion"
+introduction = "Amphion is a toolkit for Audio, Music, and Speech Generation. Its purpose is to support reproducible research and help junior researchers and engineers get started in the field of audio, music, and speech generation research and development."
+
+[sg_search.mmcv]
+github_repo_id = "open-mmlab/mmcv"
+introduction = "MMCV is a foundational library for computer vision research and it provides image/video processing, image and annotation visualization, image transformation, various CNN architectures and high-quality implementation of common CPU and CUDA ops"
+
+[frontend]
+# chat group assistant type, support "lark_group", "wechat_personal", "wechat_wkteam" and "none"
+# for "lark_group", open https://open.feishu.cn/document/home/introduction-to-custom-app-development/self-built-application-development-process to create one
+# for "wechat_personal", read ./docs/add_wechat_group_zh.md to setup gateway
+# for "wkteam", see https://wkteam.cn/
+type = "none"
+
+# for "lark", it is chat group webhook url, send reply to group, for example "https://open.feishu.cn/open-apis/bot/v2/hook/xxxxxxxxxxxxxxx"
+# for "lark_group", it is the url to fetch chat group message, for example "http://101.133.161.20:6666/fetch", `101.133.161.20` is your own public IPv4 addr
+# for "wechat_personal", it is useless
+webhook_url = "https://open.feishu.cn/open-apis/bot/v2/hook/xxxxxxxxxxxxxxx"
+
+# when a new group chat message is received, should it be processed immediately or wait for 18 seconds in case the user hasn't finished speaking?
+# support "immediate"
+message_process_policy = "immediate"
+
+[frontend.lark_group]
+# "lark_group" configuration examples, use your own app_id and secret !!!
+app_id = "cli_a53a34dcb778500e"
+app_secret = "2ajhg1ixSvlNm1bJkH4tJhPfTCsGGHT1"
+encrypt_key = "abc"
+verification_token = "def"
+
+[frontend.wechat_personal]
+# "wechat_personal" listen port
+bind_port = 9527
+
+[frontend.wechat_wkteam]
+# wechat message callback server ip
+callback_ip = "101.133.161.11"
+callback_port = 9528
+
+# public redis config
+redis_host = "101.133.161.11"
+redis_port = "6380"
+redis_passwd = "hxd123"
+
+# wkteam
+account = ""
+password = ""
+# !!! `proxy` is very import parameter, it's your account location
+# 1：北京 2：天津 3：上海 4：重庆 5：河北
+# 6：山西 7：江苏 8：浙江 9：安徽 10：福建
+# 11：江西 12：山东 13：河南 14：湖北 15：湖南
+# 16：广东 17：海南 18：四川 20：陕西
+# bad proxy would cause account deactivation !!!
+proxy = -1
+
+# save dir
+dir = "wkteam"
+
+# 群号和介绍
+# 茴香豆相关
+[frontend.wechat_wkteam.43925126702]
+name = "茴香豆群（大暑）"
+introduction = "github https://github.com/InternLM/HuixiangDou 用户体验群"
+
+[frontend.wechat_wkteam.44546611710]
+name = "茴香豆群（立夏）"
+introduction = "github https://github.com/InternLM/HuixiangDou 用户体验群"
+
+[frontend.wechat_wkteam.38720590618]
+name = "茴香豆群（惊蛰）"
+introduction = "github https://github.com/InternLM/HuixiangDou 用户体验群"
+
+[frontend.wechat_wkteam.48437885473]
+name = "茴香豆群（谷雨）"
+introduction = "github https://github.com/InternLM/HuixiangDou 用户体验群"
+
+[frontend.wechat_wkteam.34744063953]
+name = "茴香豆群（雨水）"
+introduction = "github https://github.com/InternLM/HuixiangDou 用户体验群"
+
+# github.com/tencent/ncnn contributors
+[frontend.wechat_wkteam.18356748488]
+name = "卷卷群"
+introduction = "ncnn contributors group"