From 304796ec592348cdc0ccad2e034e770e36858167 Mon Sep 17 00:00:00 2001 From: hdelossantos Date: Thu, 21 Nov 2024 14:29:44 -0500 Subject: [PATCH] feat: support setting maxConcurrentChunks for Generic OpenAI embedder (#2655) * exposes `maxConcurrentChunks` parameter for the generic openai embedder through configuration. This allows setting a batch size for endpoints which don't support the default of 500 * Update new field to new UI make getting to ensure proper type and format --------- Co-authored-by: timothycarambat --- docker/.env.example | 3 +- .../GenericOpenAiOptions/index.jsx | 44 +++++++++++++++++++ server/.env.example | 1 + server/models/systemSettings.js | 2 + .../EmbeddingEngines/genericOpenAi/index.js | 19 ++++++-- server/utils/helpers/updateENV.js | 4 ++ 6 files changed, 69 insertions(+), 4 deletions(-) diff --git a/docker/.env.example b/docker/.env.example index 0580465965..2b3d10629d 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -164,6 +164,7 @@ GID='1000' # EMBEDDING_MODEL_MAX_CHUNK_LENGTH=8192 # EMBEDDING_BASE_PATH='http://127.0.0.1:4000' # GENERIC_OPEN_AI_EMBEDDING_API_KEY='sk-123abc' +# GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS=500 ########################################### ######## Vector Database Selection ######## @@ -299,4 +300,4 @@ GID='1000' # Enable simple SSO passthrough to pre-authenticate users from a third party service. # See https://docs.anythingllm.com/configuration#simple-sso-passthrough for more information. -# SIMPLE_SSO_ENABLED=1 \ No newline at end of file +# SIMPLE_SSO_ENABLED=1 diff --git a/frontend/src/components/EmbeddingSelection/GenericOpenAiOptions/index.jsx b/frontend/src/components/EmbeddingSelection/GenericOpenAiOptions/index.jsx index e524a263e4..84ae4ab8b7 100644 --- a/frontend/src/components/EmbeddingSelection/GenericOpenAiOptions/index.jsx +++ b/frontend/src/components/EmbeddingSelection/GenericOpenAiOptions/index.jsx @@ -1,4 +1,8 @@ +import React, { useState } from "react"; +import { CaretDown, CaretUp } from "@phosphor-icons/react"; + export default function GenericOpenAiEmbeddingOptions({ settings }) { + const [showAdvancedControls, setShowAdvancedControls] = useState(false); return (
@@ -69,6 +73,46 @@ export default function GenericOpenAiEmbeddingOptions({ settings }) { />
+
+ +
+ ); } diff --git a/server/.env.example b/server/.env.example index 723b3a644c..ba56517a8f 100644 --- a/server/.env.example +++ b/server/.env.example @@ -154,6 +154,7 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long. # EMBEDDING_MODEL_MAX_CHUNK_LENGTH=8192 # EMBEDDING_BASE_PATH='http://127.0.0.1:4000' # GENERIC_OPEN_AI_EMBEDDING_API_KEY='sk-123abc' +# GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS=500 ########################################### ######## Vector Database Selection ######## diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index 9bd4a7bf1e..dd54b8e362 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -193,6 +193,8 @@ const SystemSettings = { process.env.EMBEDDING_MODEL_MAX_CHUNK_LENGTH, GenericOpenAiEmbeddingApiKey: !!process.env.GENERIC_OPEN_AI_EMBEDDING_API_KEY, + GenericOpenAiEmbeddingMaxConcurrentChunks: + process.env.GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS || 500, // -------------------------------------------------------- // VectorDB Provider Selection Settings & Configs diff --git a/server/utils/EmbeddingEngines/genericOpenAi/index.js b/server/utils/EmbeddingEngines/genericOpenAi/index.js index d3ec307217..f26a070cc6 100644 --- a/server/utils/EmbeddingEngines/genericOpenAi/index.js +++ b/server/utils/EmbeddingEngines/genericOpenAi/index.js @@ -14,13 +14,26 @@ class GenericOpenAiEmbedder { }); this.model = process.env.EMBEDDING_MODEL_PREF ?? null; - // Limit of how many strings we can process in a single pass to stay with resource or network limits - this.maxConcurrentChunks = 500; - + // this.maxConcurrentChunks is delegated to the getter below. // Refer to your specific model and provider you use this class with to determine a valid maxChunkLength this.embeddingMaxChunkLength = 8_191; } + /** + * returns the `GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS` env variable as a number + * or 500 if the env variable is not set or is not a number. + * @returns {number} + */ + get maxConcurrentChunks() { + if (!process.env.GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS) + return 500; + if ( + isNaN(Number(process.env.GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS)) + ) + return 500; + return Number(process.env.GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS); + } + async embedTextInput(textInput) { const result = await this.embedChunks( Array.isArray(textInput) ? textInput : [textInput] diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index d547930a52..2c43846210 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -267,6 +267,10 @@ const KEY_MAPPING = { envKey: "GENERIC_OPEN_AI_EMBEDDING_API_KEY", checks: [], }, + GenericOpenAiEmbeddingMaxConcurrentChunks: { + envKey: "GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS", + checks: [nonZero], + }, // Vector Database Selection Settings VectorDB: {