diff --git a/apps/cel/__init__.py b/apps/cel/__init__.py index 7403d07..61d3dcd 100644 --- a/apps/cel/__init__.py +++ b/apps/cel/__init__.py @@ -12,10 +12,4 @@ app.autodiscover_tasks() # Schedule Tasks -app.conf.beat_schedule = { - "check_usage_limit": { - "task": "apps.chat.tasks.check_usage_limit", - "schedule": crontab(minute="*"), - "args": (), - }, -} +app.conf.beat_schedule = {} diff --git a/apps/chat/admin.py b/apps/chat/admin.py index 79e07eb..9b4779e 100644 --- a/apps/chat/admin.py +++ b/apps/chat/admin.py @@ -74,6 +74,7 @@ class AIModelAdmin(admin.ModelAdmin): "is_enabled", "prompt_price", "completion_price", + "vision_price", ] list_filter = ["provider", "is_enabled"] diff --git a/apps/chat/client/__init__.py b/apps/chat/client/__init__.py index 5e9347f..5401f3d 100644 --- a/apps/chat/client/__init__.py +++ b/apps/chat/client/__init__.py @@ -1,19 +1,7 @@ -from apps.chat.client.claude import ClaudeClient -from apps.chat.client.gemini import GeminiClient -from apps.chat.client.hunyuan import HunYuanClient, HunYuanVisionClient -from apps.chat.client.kimi import KimiClient from apps.chat.client.midjourney import MidjourneyClient -from apps.chat.client.openai import OpenAIClient, OpenAIVisionClient -from apps.chat.client.zhipu import ZhipuClient +from apps.chat.client.openai import OpenAIClient __all__ = ( - "GeminiClient", "OpenAIClient", - "OpenAIVisionClient", - "HunYuanClient", - "HunYuanVisionClient", "MidjourneyClient", - "KimiClient", - "ClaudeClient", - "ZhipuClient", ) diff --git a/apps/chat/client/base.py b/apps/chat/client/base.py index ac1f882..1eca1ea 100644 --- a/apps/chat/client/base.py +++ b/apps/chat/client/base.py @@ -1,16 +1,23 @@ import abc +import base64 import datetime from channels.db import database_sync_to_async +from django.conf import settings from django.contrib.auth import get_user_model from django.shortcuts import get_object_or_404 from django.utils import timezone +from django.utils.translation import gettext +from httpx import Client +from openai import OpenAI from opentelemetry import trace from opentelemetry.sdk.trace import Span from opentelemetry.trace import SpanKind +from ovinc_client.core.logger import logger -from apps.chat.constants import OpenAIRole, SpanType -from apps.chat.models import AIModel, ChatLog, Message +from apps.chat.constants import MessageContentType, OpenAIRole, SpanType +from apps.chat.exceptions import FileExtractFailed, GenerateFailed +from apps.chat.models import AIModel, ChatLog, Message, MessageContent USER_MODEL = get_user_model() @@ -62,11 +69,14 @@ async def _chat(self, *args, **kwargs) -> any: raise NotImplementedError() - async def record(self, prompt_tokens: int = 0, completion_tokens: int = 0) -> None: + async def record(self, prompt_tokens: int = 0, completion_tokens: int = 0, vision_count: int = 0) -> None: if not self.log: return # calculate tokens - self.log.prompt_tokens = max(prompt_tokens, self.log.prompt_tokens) + vision_tokens = 0 + if self.model_inst.prompt_price and self.model_inst.vision_price and vision_count: + vision_tokens = vision_count * self.model_inst.vision_price / self.model_inst.prompt_price + self.log.prompt_tokens = max(prompt_tokens, self.log.prompt_tokens) + vision_tokens self.log.completion_tokens = max(completion_tokens, self.log.completion_tokens) # calculate price self.log.prompt_token_unit_price = self.model_inst.prompt_price @@ -74,7 +84,90 @@ async def record(self, prompt_tokens: int = 0, completion_tokens: int = 0) -> No # save self.log.finished_at = int(timezone.now().timestamp() * 1000) await database_sync_to_async(self.log.save)() + # calculate usage + from apps.chat.tasks import calculate_usage_limit # pylint: disable=C0415 + + await database_sync_to_async(calculate_usage_limit)(log_id=self.log.id) # pylint: disable=E1120 def start_span(self, name: str, kind: SpanKind, **kwargs) -> Span: span: Span = self.tracer.start_as_current_span(name=name, kind=kind, **kwargs) return span + + +class OpenAIBaseClient(BaseClient, abc.ABC): + """ + OpenAI Client + """ + + @property + @abc.abstractmethod + def api_key(self) -> str: + raise NotImplementedError() + + @property + @abc.abstractmethod + def base_url(self) -> str: + raise NotImplementedError() + + @property + def http_client(self) -> Client | None: + return None + + @property + def timeout(self) -> int: + return settings.OPENAI_CHAT_TIMEOUT + + @property + def api_model(self) -> str: + return self.model + + async def _chat(self, *args, **kwargs) -> any: + image_count = self.format_message() + client = OpenAI(api_key=self.api_key, base_url=self.base_url, http_client=self.http_client) + try: + with self.start_span(SpanType.API, SpanKind.CLIENT): + response = client.chat.completions.create( + model=self.api_model, + messages=[message.model_dump(exclude_none=True) for message in self.messages], + temperature=self.temperature, + top_p=self.top_p, + stream=True, + timeout=self.timeout, + stream_options={"include_usage": True}, + extra_headers={"HTTP-Referer": settings.PROJECT_URL, "X-Title": settings.PROJECT_NAME}, + ) + except Exception as err: # pylint: disable=W0718 + logger.exception("[GenerateContentFailed] %s", err) + yield str(GenerateFailed()) + response = [] + prompt_tokens = 0 + completion_tokens = 0 + with self.start_span(SpanType.CHUNK, SpanKind.SERVER): + for chunk in response: + if chunk.choices: + yield chunk.choices[0].delta.content or "" + if chunk.usage: + prompt_tokens = chunk.usage.prompt_tokens + completion_tokens = chunk.usage.completion_tokens + await self.record(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, vision_count=image_count) + + def format_message(self) -> int: + image_count = 0 + for message in self.messages: + message: Message + if not isinstance(message.content, list): + continue + for content in message.content: + content: MessageContent + if content.type != MessageContentType.IMAGE_URL or not content.image_url: + continue + content.image_url.url = self.convert_url_to_base64(content.image_url.url) + image_count += 1 + return image_count + + def convert_url_to_base64(self, url: str) -> str: + with Client(http2=True) as client: + response = client.get(url) + if response.status_code == 200: + return f"data:image/webp;base64,{base64.b64encode(response.content).decode()}" + raise FileExtractFailed(gettext("Parse Image To Base64 Failed")) diff --git a/apps/chat/client/claude.py b/apps/chat/client/claude.py deleted file mode 100644 index badf83e..0000000 --- a/apps/chat/client/claude.py +++ /dev/null @@ -1,100 +0,0 @@ -import base64 - -from anthropic import Anthropic -from anthropic.types import ( - RawContentBlockDeltaEvent, - RawMessageDeltaEvent, - RawMessageStartEvent, -) -from django.conf import settings -from django.utils.translation import gettext -from httpx import Client -from opentelemetry.trace import SpanKind -from ovinc_client.core.logger import logger - -from apps.chat.client.base import BaseClient -from apps.chat.constants import ( - ClaudeMessageType, - MessageContentType, - OpenAIRole, - SpanType, -) -from apps.chat.exceptions import FileExtractFailed, GenerateFailed -from apps.chat.models import Message, MessageContent, MessageContentSource - - -class ClaudeClient(BaseClient): - """ - Claude Client - """ - - async def _chat(self, *args, **kwargs) -> any: - client = Anthropic( - api_key=settings.ANTHROPIC_API_KEY, - base_url=settings.ANTHROPIC_BASE_URL, - http_client=Client(proxy=settings.OPENAI_HTTP_PROXY_URL) if settings.OPENAI_HTTP_PROXY_URL else None, - ) - system, messages = self.parse_messages() - try: - with self.start_span(SpanType.API, SpanKind.CLIENT): - response = client.messages.create( - max_tokens=settings.ANTHROPIC_MAX_TOKENS, - system=system, - messages=[message.model_dump(exclude_none=True) for message in messages], - model=self.model, - temperature=self.temperature, - top_p=self.top_p, - stream=True, - timeout=settings.ANTHROPIC_TIMEOUT, - ) - except Exception as err: # pylint: disable=W0718 - logger.exception("[GenerateContentFailed] %s", err) - yield str(GenerateFailed()) - response = [] - prompt_tokens = 0 - completion_tokens = 0 - with self.start_span(SpanType.CHUNK, SpanKind.SERVER): - # pylint: disable=E1133 - for chunk in response: - match chunk.type: - case ClaudeMessageType.MESSAGE_START: - chunk: RawMessageStartEvent - prompt_tokens = chunk.message.usage.input_tokens - self.log.chat_id = chunk.message.id - case ClaudeMessageType.MESSAGE_DELTA: - chunk: RawMessageDeltaEvent - completion_tokens = chunk.usage.output_tokens - case ClaudeMessageType.CONTENT_BLOCK_DELTA: - chunk: RawContentBlockDeltaEvent - yield chunk.delta.text - await self.record(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens) - - def parse_messages(self) -> (str, list[Message]): - # parse image - for message in self.messages: - if not isinstance(message.content, list): - continue - for content in message.content: - content: MessageContent - if content.type != MessageContentType.IMAGE_URL or not content.image_url: - continue - content.type = MessageContentType.IMAGE - content.source = MessageContentSource( - type="base64", - media_type="image/webp", - data=self.convert_url_to_base64(content.image_url.url), - ) - content.image_url = None - # parse system - system = "" - if self.messages[0].role == OpenAIRole.SYSTEM: - system = self.messages[0].content - return system, self.messages[1:] - return system, self.messages - - def convert_url_to_base64(self, url: str) -> str: - with Client(http2=True) as client: - response = client.get(url) - if response.status_code == 200: - return base64.b64encode(response.content).decode() - raise FileExtractFailed(gettext("Parse Image To Base64 Failed")) diff --git a/apps/chat/client/gemini.py b/apps/chat/client/gemini.py deleted file mode 100644 index 35e69a8..0000000 --- a/apps/chat/client/gemini.py +++ /dev/null @@ -1,73 +0,0 @@ -# pylint: disable=R0801 - -import base64 - -from django.conf import settings -from django.utils.translation import gettext -from httpx import Client -from openai import OpenAI -from opentelemetry.trace import SpanKind -from ovinc_client.core.logger import logger - -from apps.chat.client.openai import BaseClient -from apps.chat.constants import MessageContentType, SpanType -from apps.chat.exceptions import FileExtractFailed, GenerateFailed -from apps.chat.models import Message, MessageContent - - -class GeminiClient(BaseClient): - """ - Gemini Client - """ - - def format_message(self) -> None: - for message in self.messages: - message: Message - if not isinstance(message.content, list): - continue - for content in message.content: - content: MessageContent - if content.type != MessageContentType.IMAGE_URL or not content.image_url: - continue - content.image_url.url = self.convert_url_to_base64(content.image_url.url) - - async def _chat(self, *args, **kwargs) -> any: - self.format_message() - client = OpenAI( - api_key=settings.GEMINI_API_KEY, - base_url=settings.GEMINI_API_URL, - http_client=Client(proxy=settings.OPENAI_HTTP_PROXY_URL) if settings.OPENAI_HTTP_PROXY_URL else None, - ) - try: - with self.start_span(SpanType.API, SpanKind.CLIENT): - response = client.chat.completions.create( - model=self.model, - messages=[message.model_dump(exclude_none=True) for message in self.messages], - temperature=self.temperature, - top_p=self.top_p, - stream=True, - timeout=settings.GEMINI_CHAT_TIMEOUT, - stream_options={"include_usage": True}, - ) - except Exception as err: # pylint: disable=W0718 - logger.exception("[GenerateContentFailed] %s", err) - yield str(GenerateFailed()) - response = [] - prompt_tokens = 0 - completion_tokens = 0 - with self.start_span(SpanType.CHUNK, SpanKind.SERVER): - # pylint: disable=E1133 - for chunk in response: - if chunk.choices: - yield chunk.choices[0].delta.content or "" - if chunk.usage: - prompt_tokens = chunk.usage.prompt_tokens - completion_tokens = chunk.usage.completion_tokens - await self.record(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens) - - def convert_url_to_base64(self, url: str) -> str: - with Client(http2=True) as client: - response = client.get(url) - if response.status_code == 200: - return f"data:image/webp;base64,{base64.b64encode(response.content).decode()}" - raise FileExtractFailed(gettext("Parse Image To Base64 Failed")) diff --git a/apps/chat/client/hunyuan.py b/apps/chat/client/hunyuan.py deleted file mode 100644 index c8c6655..0000000 --- a/apps/chat/client/hunyuan.py +++ /dev/null @@ -1,177 +0,0 @@ -# pylint: disable=R0801 - -import asyncio -import json -import time -import uuid - -import httpx -from django.conf import settings -from opentelemetry.trace import SpanKind -from ovinc_client.core.logger import logger -from rest_framework import status -from tencentcloud.common import credential -from tencentcloud.common.exception import TencentCloudSDKException -from tencentcloud.hunyuan.v20230901 import hunyuan_client, models - -from apps.chat.client.base import BaseClient -from apps.chat.constants import ( - HUNYUAN_SUCCESS_DETAIL, - HunyuanJobStatusCode, - HunyuanLogoControl, - HunyuanReviseControl, - MessageContentType, - SpanType, -) -from apps.chat.exceptions import GenerateFailed, LoadImageFailed -from apps.chat.models import HunYuanChuck, MessageContent -from apps.cos.client import COSClient -from apps.cos.utils import TCloudUrlParser - - -class HunYuanClient(BaseClient): - """ - Hun Yuan - """ - - async def _chat(self, *args, **kwargs) -> any: - # call hunyuan api - try: - with self.start_span(SpanType.API, SpanKind.CLIENT): - response = self.call_api() - except Exception as err: # pylint: disable=W0718 - logger.exception("[GenerateContentFailed] %s", err) - yield str(GenerateFailed()) - response = [] - # init - prompt_tokens = 0 - completion_tokens = 0 - # explain completion - with self.start_span(SpanType.CHUNK, SpanKind.SERVER): - for chunk in response: - chunk = HunYuanChuck(**json.loads(chunk["data"])) - self.log.chat_id = chunk.Id - prompt_tokens = chunk.Usage.PromptTokens - completion_tokens = chunk.Usage.CompletionTokens - if chunk.Choices: - yield chunk.Choices[0].Delta.Content - await self.record(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens) - - def call_api(self) -> models.ChatCompletionsResponse: - client = hunyuan_client.HunyuanClient( - credential.Credential(settings.QCLOUD_SECRET_ID, settings.QCLOUD_SECRET_KEY), "" - ) - req = models.ChatCompletionsRequest() - params = { - "Model": self.model, - "Messages": [{"Role": message.role, **self.parse_content(message.content)} for message in self.messages], - "TopP": self.top_p, - "Temperature": self.temperature, - "Stream": True, - "EnableEnhancement": False, - } - req.from_json_string(json.dumps(params)) - return client.ChatCompletions(req) - - def parse_content(self, content: str | list[MessageContent]) -> dict: - if isinstance(content, list): - new_content = [] - for content_item in content: - if content_item.type == MessageContentType.TEXT: - new_content.append( - { - "Type": MessageContentType.TEXT, - "Text": content_item.text, - } - ) - elif content_item.type == MessageContentType.IMAGE_URL: - new_content.append( - { - "Type": MessageContentType.IMAGE_URL, - "ImageUrl": { - "Url": content_item.image_url.url, - }, - } - ) - return {"Contents": new_content} - return {"Content": content} - - -class HunYuanVisionClient(BaseClient): - """ - Hunyuan Vision Client - """ - - async def _chat(self, *args, **kwargs) -> any: - # init client - client = hunyuan_client.HunyuanClient( - credential.Credential(settings.QCLOUD_SECRET_ID, settings.QCLOUD_SECRET_KEY), - settings.HUNYUAN_IMAGE_API_REGION, - ) - # call hunyuan api - try: - # submit job - with self.start_span(SpanType.API, SpanKind.CLIENT): - response = self.call_api(client) - # wait for result - start_time = time.time() - while time.time() - start_time < settings.HUNYUAN_IMAGE_JOB_TIMEOUT: - with self.start_span(SpanType.FETCH, SpanKind.CLIENT): - result = self.call_result_api(client, response.JobId) - # if not finished, continue loop - if result.JobStatusCode in [HunyuanJobStatusCode.RUNNING, HunyuanJobStatusCode.WAITING]: - yield "" - await asyncio.sleep(settings.HUNYUAN_IMAGE_JOB_INTERVAL) - continue - # if finished, check result - if result.JobStatusCode == HunyuanJobStatusCode.FINISHED: - await self.record(completion_tokens=len(result.ResultImage)) - # all failed - if all(i != HUNYUAN_SUCCESS_DETAIL for i in result.ResultDetails): - yield str(GenerateFailed()) - break - with self.start_span(SpanType.CHUNK, SpanKind.SERVER): - # record - self.log.chat_id = response.JobId - # use first success picture - message_index = min( - index - for (index, detail) in enumerate(result.ResultDetails) - if detail == HUNYUAN_SUCCESS_DETAIL - ) - message_url = result.ResultImage[message_index] - httpx_client = httpx.AsyncClient(http2=True) - image_resp = await httpx_client.get(message_url) - await httpx_client.aclose() - if image_resp.status_code != status.HTTP_200_OK: - raise LoadImageFailed() - url = await COSClient().put_object( - file=image_resp.content, - file_name=f"{uuid.uuid4().hex}.{image_resp.headers['content-type'].split('/')[-1]}", - ) - yield f"![output]({TCloudUrlParser(url).url})" - else: - yield f"{result.JobErrorMsg}({result.JobErrorCode})" - await self.record() - break - except TencentCloudSDKException as err: - logger.exception("[GenerateContentFailed] %s", err) - yield str(err.message) - except Exception as err: # pylint: disable=W0718 - logger.exception("[GenerateContentFailed] %s", err) - yield str(GenerateFailed()) - - def call_api(self, client: hunyuan_client) -> models.SubmitHunyuanImageJobResponse: - req = models.SubmitHunyuanImageJobRequest() - params = { - "Prompt": self.messages[-1].content, - "LogoAdd": HunyuanLogoControl.REMOVE, - "Revise": HunyuanReviseControl.ENABLED, - } - req.from_json_string(json.dumps(params)) - return client.SubmitHunyuanImageJob(req) - - def call_result_api(self, client: hunyuan_client, job_id: str) -> models.QueryHunyuanImageJobResponse: - req = models.QueryHunyuanImageJobRequest() - req.from_json_string(json.dumps({"JobId": job_id})) - return client.QueryHunyuanImageJob(req) diff --git a/apps/chat/client/kimi.py b/apps/chat/client/kimi.py deleted file mode 100644 index 72e08fb..0000000 --- a/apps/chat/client/kimi.py +++ /dev/null @@ -1,46 +0,0 @@ -# pylint: disable=R0801 - - -from django.conf import settings -from openai import OpenAI -from opentelemetry.trace import SpanKind -from ovinc_client.core.logger import logger - -from apps.chat.client.base import BaseClient -from apps.chat.constants import SpanType -from apps.chat.exceptions import GenerateFailed - - -class KimiClient(BaseClient): - """ - Kimi Client - """ - - async def _chat(self, *args, **kwargs) -> any: - client = OpenAI(api_key=settings.KIMI_API_KEY, base_url=settings.KIMI_API_BASE_URL) - try: - with self.start_span(SpanType.API, SpanKind.CLIENT): - response = client.chat.completions.create( - model=self.model, - messages=[message.model_dump(exclude_none=True) for message in self.messages], - temperature=self.temperature, - top_p=self.top_p, - stream=True, - timeout=settings.KIMI_CHAT_TIMEOUT, - ) - except Exception as err: # pylint: disable=W0718 - logger.exception("[GenerateContentFailed] %s", err) - yield str(GenerateFailed()) - response = [] - prompt_tokens = 0 - completion_tokens = 0 - with self.start_span(SpanType.CHUNK, SpanKind.SERVER): - # pylint: disable=E1133 - for chunk in response: - self.log.chat_id = chunk.id - usage = chunk.choices[0].model_extra.get("usage") or {} - if usage: - prompt_tokens = usage.get("prompt_tokens", prompt_tokens) - completion_tokens = usage.get("completion_tokens", completion_tokens) - yield chunk.choices[0].delta.content or "" - await self.record(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens) diff --git a/apps/chat/client/midjourney.py b/apps/chat/client/midjourney.py index a4e91a0..ed633f1 100644 --- a/apps/chat/client/midjourney.py +++ b/apps/chat/client/midjourney.py @@ -68,5 +68,6 @@ async def _chat(self, *args, **kwargs) -> any: except Exception as err: # pylint: disable=W0718 logger.exception("[GenerateContentFailed] %s", err) yield str(GenerateFailed()) + await self.record() finally: await client.aclose() diff --git a/apps/chat/client/openai.py b/apps/chat/client/openai.py index 3ea54d2..94e16b5 100644 --- a/apps/chat/client/openai.py +++ b/apps/chat/client/openai.py @@ -1,111 +1,32 @@ -# pylint: disable=R0801 +from httpx import Client -import abc -import uuid -from urllib.parse import urlparse +from apps.chat.client.base import OpenAIBaseClient -from django.conf import settings -from httpx import AsyncClient, Client -from openai import OpenAI -from opentelemetry.trace import SpanKind -from ovinc_client.core.logger import logger -from rest_framework import status -from apps.chat.client.base import BaseClient -from apps.chat.constants import SpanType -from apps.chat.exceptions import GenerateFailed, LoadImageFailed -from apps.cos.client import COSClient -from apps.cos.utils import TCloudUrlParser - - -class OpenAIMixin(abc.ABC): - """ - OpenAI Mixin - """ - - model_settings: dict | None - - # pylint: disable=R0913,R0917 - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.client = OpenAI( - api_key=self.model_settings.get("api_key", settings.OPENAI_API_KEY), - base_url=self.model_settings.get("base_url", settings.OPENAI_API_BASE), - http_client=Client(proxy=settings.OPENAI_HTTP_PROXY_URL) if settings.OPENAI_HTTP_PROXY_URL else None, - ) - - -class OpenAIClient(OpenAIMixin, BaseClient): +class OpenAIClient(OpenAIBaseClient): """ OpenAI Client """ - async def _chat(self, *args, **kwargs) -> any: - try: - with self.start_span(SpanType.API, SpanKind.CLIENT): - response = self.client.chat.completions.create( - model=self.model.replace(".", ""), - messages=[message.model_dump(exclude_none=True) for message in self.messages], - temperature=self.temperature, - top_p=self.top_p, - stream=True, - timeout=settings.OPENAI_CHAT_TIMEOUT, - stream_options={"include_usage": True}, - ) - except Exception as err: # pylint: disable=W0718 - logger.exception("[GenerateContentFailed] %s", err) - yield str(GenerateFailed()) - response = [] - content = "" - prompt_tokens = 0 - completion_tokens = 0 - with self.start_span(SpanType.CHUNK, SpanKind.SERVER): - # pylint: disable=E1133 - for chunk in response: - self.log.chat_id = chunk.id - if chunk.choices: - content += chunk.choices[0].delta.content or "" - yield chunk.choices[0].delta.content or "" - if chunk.usage: - prompt_tokens = chunk.usage.prompt_tokens - completion_tokens = chunk.usage.completion_tokens - await self.record(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens) + @property + def api_key(self) -> str: + return self.model_settings.get("api_key") + @property + def base_url(self) -> str: + return self.model_settings.get("base_url") -class OpenAIVisionClient(OpenAIMixin, BaseClient): - """ - OpenAI Vision Client - """ + @property + def http_client(self) -> Client | None: + proxy = self.model_settings.get("proxy") + if proxy: + return Client(proxy=proxy) + return None + + @property + def timeout(self) -> int: + return self.model_settings.get("timeout", super().timeout) - async def _chat(self, *args, **kwargs) -> any: - try: - with self.start_span(SpanType.API, SpanKind.CLIENT): - # noinspection PyTypeChecker - response = self.client.images.generate( - model=self.model.replace(".", ""), - prompt=self.messages[-1].content, - n=1, - size=self.model_inst.vision_size, - quality=self.model_inst.vision_quality, - style=self.model_inst.vision_style, - ) - except Exception as err: # pylint: disable=W0718 - logger.exception("[GenerateContentFailed] %s", err) - yield str(GenerateFailed()) - return - with self.start_span(SpanType.CHUNK, SpanKind.SERVER): - # record - await self.record(completion_tokens=1) - # image - if not settings.ENABLE_IMAGE_PROXY: - yield f"![{self.messages[-1].content}]({response.data[0].url})" - httpx_client = AsyncClient(http2=True, proxy=settings.OPENAI_HTTP_PROXY_URL) - image_resp = await httpx_client.get(response.data[0].url) - await httpx_client.aclose() - if image_resp.status_code != status.HTTP_200_OK: - raise LoadImageFailed() - url = await COSClient().put_object( - file=image_resp.content, - file_name=f"{uuid.uuid4().hex}.{urlparse(response.data[0].url).path.split('.')[-1]}", - ) - yield f"![output]({TCloudUrlParser(url).url})" + @property + def api_model(self) -> str: + return self.model_settings.get("api_model", super().api_model) diff --git a/apps/chat/client/zhipu.py b/apps/chat/client/zhipu.py deleted file mode 100644 index 5eaebb4..0000000 --- a/apps/chat/client/zhipu.py +++ /dev/null @@ -1,49 +0,0 @@ -# pylint: disable=R0801 - - -from django.conf import settings -from openai import OpenAI -from opentelemetry.trace import SpanKind -from ovinc_client.core.logger import logger - -from apps.chat.client.base import BaseClient -from apps.chat.constants import SpanType -from apps.chat.exceptions import GenerateFailed - - -class ZhipuClient(BaseClient): - """ - Zhipu Client - """ - - async def _chat(self, *args, **kwargs) -> any: - client = OpenAI(api_key=settings.ZHIPU_API_KEY, base_url=settings.ZHIPU_API_URL) - try: - with self.start_span(SpanType.API, SpanKind.CLIENT): - response = client.chat.completions.create( - model=self.model, - messages=[message.model_dump(exclude_none=True) for message in self.messages], - temperature=self.temperature, - top_p=self.top_p, - stream=True, - timeout=settings.ZHIPU_API_TIMEOUT, - stream_options={"include_usage": True}, - ) - except Exception as err: # pylint: disable=W0718 - logger.exception("[GenerateContentFailed] %s", err) - yield str(GenerateFailed()) - response = [] - content = "" - prompt_tokens = 0 - completion_tokens = 0 - with self.start_span(SpanType.CHUNK, SpanKind.SERVER): - # pylint: disable=E1133 - for chunk in response: - self.log.chat_id = chunk.id - if chunk.choices: - content += chunk.choices[0].delta.content or "" - yield chunk.choices[0].delta.content or "" - if chunk.usage: - prompt_tokens = chunk.usage.prompt_tokens - completion_tokens = chunk.usage.completion_tokens - await self.record(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens) diff --git a/apps/chat/constants.py b/apps/chat/constants.py index 10f8335..9e05ea0 100644 --- a/apps/chat/constants.py +++ b/apps/chat/constants.py @@ -43,12 +43,7 @@ class AIModelProvider(TextChoices): """ OPENAI = "openai", gettext_lazy("Open AI") - GOOGLE = "google", gettext_lazy("Google") - TENCENT = "tencent", gettext_lazy("Tencent") MIDJOURNEY = "midjourney", gettext_lazy("Midjourney") - MOONSHOT = "moonshot", gettext_lazy("Moonshot") - CLAUDE = "claude", gettext_lazy("Claude") - ZHIPU = "zhipu", gettext_lazy("Zhipu") class VisionSize(TextChoices): diff --git a/apps/chat/consumers_async.py b/apps/chat/consumers_async.py index c27c097..b5ad0a4 100644 --- a/apps/chat/consumers_async.py +++ b/apps/chat/consumers_async.py @@ -12,17 +12,7 @@ from django.shortcuts import get_object_or_404 from ovinc_client.core.logger import logger -from apps.chat.client import ( - ClaudeClient, - GeminiClient, - HunYuanClient, - HunYuanVisionClient, - KimiClient, - MidjourneyClient, - OpenAIClient, - OpenAIVisionClient, - ZhipuClient, -) +from apps.chat.client import MidjourneyClient, OpenAIClient from apps.chat.client.base import BaseClient from apps.chat.constants import WS_CLOSED_KEY, AIModelProvider from apps.chat.exceptions import UnexpectedProvider, VerifyFailed @@ -128,24 +118,10 @@ def get_model_inst(self, model: str) -> AIModel: # pylint: disable=R0911 def get_model_client(self, model: AIModel) -> Type[BaseClient]: match model.provider: - case AIModelProvider.TENCENT: - if model.is_vision: - return HunYuanVisionClient - return HunYuanClient - case AIModelProvider.GOOGLE: - return GeminiClient case AIModelProvider.OPENAI: - if model.is_vision: - return OpenAIVisionClient return OpenAIClient case AIModelProvider.MIDJOURNEY: return MidjourneyClient - case AIModelProvider.MOONSHOT: - return KimiClient - case AIModelProvider.CLAUDE: - return ClaudeClient - case AIModelProvider.ZHIPU: - return ZhipuClient case _: raise UnexpectedProvider() diff --git a/apps/chat/migrations/0021_aimodel_vision_price_alter_aimodel_provider.py b/apps/chat/migrations/0021_aimodel_vision_price_alter_aimodel_provider.py new file mode 100644 index 0000000..5d3b295 --- /dev/null +++ b/apps/chat/migrations/0021_aimodel_vision_price_alter_aimodel_provider.py @@ -0,0 +1,36 @@ +# pylint: disable=R0801,C0103 +# Generated by Django 4.2.17 on 2024-12-18 12:09 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("chat", "0020_aimodel_support_vision_alter_aimodel_provider"), + ] + + operations = [ + migrations.AddField( + model_name="aimodel", + name="vision_price", + field=models.DecimalField(decimal_places=10, default=0, max_digits=20, verbose_name="Vision Price"), + ), + migrations.AlterField( + model_name="aimodel", + name="provider", + field=models.CharField( + choices=[ + ("openai", "Open AI"), + ("google", "Google"), + ("tencent", "Tencent"), + ("midjourney", "Midjourney"), + ("claude", "Claude"), + ("zhipu", "Zhipu"), + ], + db_index=True, + max_length=64, + verbose_name="Provider", + ), + ), + ] diff --git a/apps/chat/models.py b/apps/chat/models.py index cb16cf5..1266edd 100644 --- a/apps/chat/models.py +++ b/apps/chat/models.py @@ -142,6 +142,9 @@ class AIModel(BaseModel): completion_price = models.DecimalField( gettext_lazy("Completion Price"), max_digits=PRICE_DIGIT_NUMS, decimal_places=PRICE_DECIMAL_NUMS ) + vision_price = models.DecimalField( + gettext_lazy("Vision Price"), max_digits=PRICE_DIGIT_NUMS, decimal_places=PRICE_DECIMAL_NUMS, default=0 + ) support_system_define = models.BooleanField(gettext_lazy("Support System Define"), default=True) support_vision = models.BooleanField(gettext_lazy("Support Vision"), default=False) is_vision = models.BooleanField(gettext_lazy("Is Vision"), default=False) diff --git a/apps/chat/tasks.py b/apps/chat/tasks.py index ccf7745..9f57383 100644 --- a/apps/chat/tasks.py +++ b/apps/chat/tasks.py @@ -38,8 +38,6 @@ def calculate_usage_limit(self, log_id: str): Calculate Model Usage Limit """ - celery_logger.info("[CalculateUsageLimit] Start %s", self.request.id) - log = get_object_or_404(ChatLog, id=log_id) usage = log.prompt_tokens + log.completion_tokens celery_logger.info( @@ -57,8 +55,6 @@ def calculate_usage_limit(self, log_id: str): - (log.completion_tokens * log.completion_token_unit_price / 1000) ) - celery_logger.info("[CalculateUsageLimit] End %s", self.request.id) - @app.task(bind=True) def async_reply(self, channel_name: str, key: str): diff --git a/entry/settings.py b/entry/settings.py index 228075f..d2215d2 100644 --- a/entry/settings.py +++ b/entry/settings.py @@ -23,6 +23,10 @@ APP_SECRET = getenv_or_raise("APP_SECRET") SECRET_KEY = getenv_or_raise("APP_SECRET") +# Project +PROJECT_NAME = os.getenv("PROJECT_NAME", "OVINC Chat") +PROJECT_URL = os.getenv("PROJECT_URL", "https://github.com/OVINC-CN/ChatGPTWeb") + # Hosts BACKEND_URL = getenv_or_raise("BACKEND_URL") ALLOWED_HOSTS = [getenv_or_raise("BACKEND_HOST")] @@ -217,26 +221,13 @@ OVINC_API_RECORD_LOG = strtobool(os.getenv("OVINC_API_RECORD_LOG", "True")) # OpenAI -OPENAI_HTTP_PROXY_URL = os.getenv("OPENAI_HTTP_PROXY_URL") -OPENAI_API_KEY = os.getenv("DEFAULT_OPENAI_API_KEY") -OPENAI_API_BASE = os.getenv("DEFAULT_OPENAI_API_BASE") OPENAI_CHAT_TIMEOUT = int(os.getenv("OPENAI_CHAT_TIMEOUT", "60")) OPENAI_PRE_CHECK_TIMEOUT = int(os.getenv("OPENAI_PRE_CHECK_TIMEOUT", "600")) -# Gemini -GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "") -GEMINI_API_URL = os.getenv("GEMINI_API_URL", "") -GEMINI_CHAT_TIMEOUT = int(os.getenv("GEMINI_CHAT_TIMEOUT", "60")) - # QCLOUD QCLOUD_SECRET_ID = os.getenv("QCLOUD_SECRET_ID") QCLOUD_SECRET_KEY = os.getenv("QCLOUD_SECRET_KEY") -# Hunyuan -HUNYUAN_IMAGE_API_REGION = os.getenv("HUNYUAN_IMAGE_API_REGION", "ap-guangzhou") -HUNYUAN_IMAGE_JOB_INTERVAL = int(os.getenv("HUNYUAN_IMAGE_JOB_INTERVAL", "5")) -HUNYUAN_IMAGE_JOB_TIMEOUT = int(os.getenv("HUNYUAN_IMAGE_JOB_TIMEOUT", "600")) - # Captcha CAPTCHA_TCLOUD_ID = os.getenv("CAPTCHA_TCLOUD_ID", QCLOUD_SECRET_ID) CAPTCHA_TCLOUD_KEY = os.getenv("CAPTCHA_TCLOUD_KEY", QCLOUD_SECRET_KEY) @@ -268,18 +259,11 @@ QCLOUD_STS_EXPIRE_TIME = int(os.getenv("QCLOUD_STS_EXPIRE_TIME", str(60 * 10))) # Log -# this feature is removed and cannot be opened -RECORD_CHAT_CONTENT = False CHATLOG_QUERY_DAYS = int(os.getenv("CHATLOG_QUERY_DAYS", "7")) # IMAGE ENABLE_IMAGE_PROXY = strtobool(os.getenv("ENABLE_IMAGE_PROXY", "False")) -# Kimi -KIMI_API_KEY = os.getenv("KIMI_API_KEY") -KIMI_API_BASE_URL = os.getenv("KIMI_API_BASE_URL") -KIMI_CHAT_TIMEOUT = int(os.getenv("KIMI_CHAT_TIMEOUT", "60")) - # File ENABLE_FILE_UPLOAD = strtobool(os.getenv("ENABLE_FILE_UPLOAD", "False")) LOAD_FILE_TIMEOUT = int(os.getenv("LOAD_FILE_TIMEOUT", "60")) @@ -309,14 +293,3 @@ MIDJOURNEY_IMAGE_JOB_INTERVAL = int(os.getenv("MIDJOURNEY_IMAGE_JOB_INTERVAL", "5")) MIDJOURNEY_IMAGE_JOB_TIMEOUT = int(os.getenv("MIDJOURNEY_IMAGE_JOB_TIMEOUT", "600")) MIDJOURNEY_API_TIMEOUT = int(os.getenv("MIDJOURNEY_API_TIMEOUT", "60")) - -# Claude -ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "") -ANTHROPIC_BASE_URL = os.getenv("ANTHROPIC_BASE_URL", "") -ANTHROPIC_MAX_TOKENS = int(os.getenv("ANTHROPIC_MAX_TOKENS", "4096")) -ANTHROPIC_TIMEOUT = int(os.getenv("ANTHROPIC_TIMEOUT", "60")) - -# Zhipu -ZHIPU_API_KEY = os.getenv("ZHIPU_API_KEY", "") -ZHIPU_API_URL = os.getenv("ZHIPU_API_URL", "https://open.bigmodel.cn/api/paas/v4/") -ZHIPU_API_TIMEOUT = int(os.getenv("ZHIPU_API_TIMEOUT", "60")) diff --git a/requirements.txt b/requirements.txt index e1090c3..ac7cd7e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,9 +34,6 @@ qcloud-python-sts==3.1.6 qrcode==8.0.0 pillow==11.0.0 -# claude -anthropic==0.40.0 - # websocket websockets==14.1