diff --git a/README.md b/README.md index 4d423ac0a..23fe26cb4 100644 --- a/README.md +++ b/README.md @@ -37,12 +37,12 @@ await inference.textToImage({ This is a collection of JS libraries to interact with the Hugging Face API, with TS types included. -- [@huggingface/inference](packages/inference/README.md): Use the Inference API to make calls to 100,000+ Machine Learning models, or your own [inference endpoints](https://hf.co/docs/inference-endpoints/)! +- [@huggingface/inference](packages/inference/README.md): Use Inference Endpoints to make calls to 100,000+ Machine Learning models - [@huggingface/hub](packages/hub/README.md): Interact with huggingface.co to create or delete repos and commit / download files - [@huggingface/agents](packages/agents/README.md): Interact with HF models through a natural language interface -With more to come, like `@huggingface/endpoints` to manage your HF Endpoints! +With more to come, like `@huggingface/endpoints` to manage your dedicated Inference Endpoints! We use modern features to avoid polyfills and dependencies, so the libraries will only work on modern browsers / Node.js >= 18 / Bun / Deno. @@ -128,7 +128,7 @@ await inference.imageToText({ model: 'nlpconnect/vit-gpt2-image-captioning', }) -// Using your own inference endpoint: https://hf.co/docs/inference-endpoints/ +// Using your own dedicated inference endpoint: https://hf.co/docs/inference-endpoints/ const gpt2 = inference.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2'); const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the universe is'}); ``` diff --git a/docs/_toctree.yml b/docs/_toctree.yml index 9eca25a75..e051e87d6 100644 --- a/docs/_toctree.yml +++ b/docs/_toctree.yml @@ -4,9 +4,9 @@ isExpanded: true sections: - local: inference/README - title: Use the Inference API + title: Use Inference Endpoints - local: inference/modules - title: API Reference + title: API reference - title: "@huggingface/hub" isExpanded: true sections: diff --git a/packages/agents/README.md b/packages/agents/README.md index 2b7aa0c8f..64bf02729 100644 --- a/packages/agents/README.md +++ b/packages/agents/README.md @@ -1,6 +1,6 @@ # 🤗 Hugging Face Agents.js -A way to call Hugging Face models and inference APIs from natural language, using an LLM. +A way to call Hugging Face models and inference Endpoints from natural language, using an LLM. ## Install @@ -25,7 +25,7 @@ Check out the [full documentation](https://huggingface.co/docs/huggingface.js/ag ## Usage -Agents.js leverages LLMs hosted as Inference APIs on HF, so you need to create an account and generate an [access token](https://huggingface.co/settings/tokens). +Agents.js leverages LLMs hosted as Inference Endpoints on HF, so you need to create an account and generate an [access token](https://huggingface.co/settings/tokens). ```ts import { HfAgent } from "@huggingface/agents"; diff --git a/packages/inference/README.md b/packages/inference/README.md index 352c80b99..c16b992ca 100644 --- a/packages/inference/README.md +++ b/packages/inference/README.md @@ -1,10 +1,11 @@ -# 🤗 Hugging Face Inference API +# 🤗 Hugging Face Inference Endpoints -A Typescript powered wrapper for the Hugging Face Inference API. Learn more about the Inference API at [Hugging Face](https://huggingface.co/docs/api-inference/index). It also works with [Inference Endpoints](https://huggingface.co/docs/inference-endpoints/index). +A Typescript powered wrapper for the Hugging Face Inference Endpoints API. Learn more about Inference Endpoints at [Hugging Face](https://huggingface.co/inference-endpoints). +It works wither both [serverless](https://huggingface.co/docs/api-inference/index) and [dedicated](https://huggingface.co/docs/inference-endpoints/index) Endpoints. Check out the [full documentation](https://huggingface.co/docs/huggingface.js/inference/README). -You can also try out a live [interactive notebook](https://observablehq.com/@huggingface/hello-huggingface-js-inference), see some demos on [hf.co/huggingfacejs](https://huggingface.co/huggingfacejs), or watch a [Scrimba tutorial that explains how the Inference API works](https://scrimba.com/scrim/cod8248f5adfd6e129582c523). +You can also try out a live [interactive notebook](https://observablehq.com/@huggingface/hello-huggingface-js-inference), see some demos on [hf.co/huggingfacejs](https://huggingface.co/huggingfacejs), or watch a [Scrimba tutorial that explains how Inference Endpoints works](https://scrimba.com/scrim/cod8248f5adfd6e129582c523). ## Getting Started diff --git a/packages/inference/package.json b/packages/inference/package.json index 30d958300..c0915eda9 100644 --- a/packages/inference/package.json +++ b/packages/inference/package.json @@ -4,7 +4,7 @@ "packageManager": "pnpm@8.10.5", "license": "MIT", "author": "Tim Mikeladze ", - "description": "Typescript wrapper for the Hugging Face Inference API", + "description": "Typescript wrapper for the Hugging Face Inference Endpoints API", "repository": { "type": "git", "url": "https://github.com/huggingface/huggingface.js.git" @@ -55,4 +55,4 @@ "@types/node": "18.13.0" }, "resolutions": {} -} +} \ No newline at end of file diff --git a/packages/inference/src/lib/getDefaultTask.ts b/packages/inference/src/lib/getDefaultTask.ts index acd7bda33..3149998d6 100644 --- a/packages/inference/src/lib/getDefaultTask.ts +++ b/packages/inference/src/lib/getDefaultTask.ts @@ -2,7 +2,7 @@ import { isUrl } from "./isUrl"; /** * We want to make calls to the huggingface hub the least possible, eg if - * someone is calling the inference API 1000 times per second, we don't want + * someone is calling Inference Endpoints 1000 times per second, we don't want * to make 1000 calls to the hub to get the task name. */ const taskCache = new Map(); diff --git a/packages/inference/src/tasks/custom/request.ts b/packages/inference/src/tasks/custom/request.ts index eef997b7b..c794ee290 100644 --- a/packages/inference/src/tasks/custom/request.ts +++ b/packages/inference/src/tasks/custom/request.ts @@ -2,7 +2,7 @@ import type { InferenceTask, Options, RequestArgs } from "../../types"; import { makeRequestOptions } from "../../lib/makeRequestOptions"; /** - * Primitive to make custom calls to the inference API + * Primitive to make custom calls to Inference Endpoints */ export async function request( args: RequestArgs, diff --git a/packages/inference/src/types.ts b/packages/inference/src/types.ts index 28ad52fb5..377736ae6 100644 --- a/packages/inference/src/types.ts +++ b/packages/inference/src/types.ts @@ -6,7 +6,7 @@ export interface Options { */ retry_on_error?: boolean; /** - * (Default: true). Boolean. There is a cache layer on the inference API to speedup requests we have already seen. Most models can use those results as is as models are deterministic (meaning the results will be the same anyway). However if you use a non deterministic model, you can set this parameter to prevent the caching mechanism from being used resulting in a real new query. + * (Default: true). Boolean. There is a cache layer on Serverless Inference Endpoints to speedup requests we have already seen. Most models can use those results as is as models are deterministic (meaning the results will be the same anyway). However if you use a non deterministic model, you can set this parameter to prevent the caching mechanism from being used resulting in a real new query. */ use_cache?: boolean; /** @@ -47,7 +47,7 @@ export interface BaseArgs { */ accessToken?: string; /** - * The model to use. Can be a full URL for HF inference endpoints. + * The model to use. Can be a full URL for a dedicated inference endpoint. * * If not specified, will call huggingface.co/api/tasks to get the default model for the task. */ diff --git a/packages/tasks/src/library-to-tasks.ts b/packages/tasks/src/library-to-tasks.ts index 5110eab6f..98e7c7f55 100644 --- a/packages/tasks/src/library-to-tasks.ts +++ b/packages/tasks/src/library-to-tasks.ts @@ -3,7 +3,7 @@ import type { PipelineType } from "./pipelines"; /** * Mapping from library name (excluding Transformers) to its supported tasks. - * Inference API should be disabled for all other (library, task) pairs beyond this mapping. + * Serverless Inference Endpoints should be disabled for all other (library, task) pairs beyond this mapping. * As an exception, we assume Transformers supports all inference tasks. * This mapping is generated automatically by "python-api-export-tasks" action in huggingface/api-inference-community repo upon merge. * Ref: https://github.com/huggingface/api-inference-community/pull/158 diff --git a/packages/tasks/src/model-data.ts b/packages/tasks/src/model-data.ts index c0ff45d92..9b3f76d72 100644 --- a/packages/tasks/src/model-data.ts +++ b/packages/tasks/src/model-data.ts @@ -78,7 +78,7 @@ export interface ModelData { */ widgetData?: WidgetExample[] | undefined; /** - * Parameters that will be used by the widget when calling Inference API + * Parameters that will be used by the widget when calling Inference Endpoints (serverless) * https://huggingface.co/docs/api-inference/detailed_parameters * * can be set in the model card metadata (under `inference/parameters`) @@ -89,10 +89,10 @@ export interface ModelData { */ cardData?: { inference?: - | boolean - | { - parameters?: Record; - }; + | boolean + | { + parameters?: Record; + }; base_model?: string | string[]; }; /** diff --git a/packages/tasks/src/pipelines.ts b/packages/tasks/src/pipelines.ts index 91748fe2e..f11a8a8db 100644 --- a/packages/tasks/src/pipelines.ts +++ b/packages/tasks/src/pipelines.ts @@ -62,7 +62,7 @@ export interface PipelineData { /// This type is used in multiple places in the Hugging Face /// ecosystem: /// - To determine which widget to show. -/// - To determine which endpoint of Inference API to use. +/// - To determine which endpoint of Inference Endpoints to use. /// - As filters at the left of models and datasets page. /// /// Note that this is sensitive to order. diff --git a/packages/tasks/src/tasks/audio-classification/about.md b/packages/tasks/src/tasks/audio-classification/about.md index f135ea5cf..7772acd60 100644 --- a/packages/tasks/src/tasks/audio-classification/about.md +++ b/packages/tasks/src/tasks/audio-classification/about.md @@ -26,7 +26,7 @@ Datasets such as VoxLingua107 allow anyone to train language identification mode ### Emotion recognition -Emotion recognition is self explanatory. In addition to trying the widgets, you can use the Inference API to perform audio classification. Here is a simple example that uses a [HuBERT](https://huggingface.co/superb/hubert-large-superb-er) model fine-tuned for this task. +Emotion recognition is self explanatory. In addition to trying the widgets, you can use Inference Endpoints to perform audio classification. Here is a simple example that uses a [HuBERT](https://huggingface.co/superb/hubert-large-superb-er) model fine-tuned for this task. ```python import json diff --git a/packages/tasks/src/tasks/audio-to-audio/about.md b/packages/tasks/src/tasks/audio-to-audio/about.md index d63de3a18..2822a7499 100644 --- a/packages/tasks/src/tasks/audio-to-audio/about.md +++ b/packages/tasks/src/tasks/audio-to-audio/about.md @@ -12,7 +12,7 @@ model = SpectralMaskEnhancement.from_hparams( model.enhance_file("file.wav") ``` -Alternatively, you can use the [Inference API](https://huggingface.co/inference-api) to solve this task +Alternatively, you can use [Inference Endpoints](https://huggingface.co/inference-endpoints) to solve this task ```python import json diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/about.md b/packages/tasks/src/tasks/automatic-speech-recognition/about.md index 7873d9c6d..4a92f3b51 100644 --- a/packages/tasks/src/tasks/automatic-speech-recognition/about.md +++ b/packages/tasks/src/tasks/automatic-speech-recognition/about.md @@ -18,7 +18,7 @@ The use of Multilingual ASR has become popular, the idea of maintaining just a s ## Inference -The Hub contains over [~9,000 ASR models](https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&sort=downloads) that you can use right away by trying out the widgets directly in the browser or calling the models as a service using the Inference API. Here is a simple code snippet to do exactly this: +The Hub contains over [~9,000 ASR models](https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&sort=downloads) that you can use right away by trying out the widgets directly in the browser or calling the models as a service using Inference Endpoints. Here is a simple code snippet to do exactly this: ```python import json diff --git a/packages/tasks/src/tasks/sentence-similarity/about.md b/packages/tasks/src/tasks/sentence-similarity/about.md index ee536235d..c8cd91213 100644 --- a/packages/tasks/src/tasks/sentence-similarity/about.md +++ b/packages/tasks/src/tasks/sentence-similarity/about.md @@ -8,7 +8,7 @@ You can extract information from documents using Sentence Similarity models. The The [Sentence Transformers](https://www.sbert.net/) library is very powerful for calculating embeddings of sentences, paragraphs, and entire documents. An embedding is just a vector representation of a text and is useful for finding how similar two texts are. -You can find and use [hundreds of Sentence Transformers](https://huggingface.co/models?library=sentence-transformers&sort=downloads) models from the Hub by directly using the library, playing with the widgets in the browser or using the Inference API. +You can find and use [hundreds of Sentence Transformers](https://huggingface.co/models?library=sentence-transformers&sort=downloads) models from the Hub by directly using the library, playing with the widgets in the browser or using Inference Endpoints. ## Task Variants @@ -16,7 +16,7 @@ You can find and use [hundreds of Sentence Transformers](https://huggingface.co/ Passage Ranking is the task of ranking documents based on their relevance to a given query. The task is evaluated on Mean Reciprocal Rank. These models take one query and multiple documents and return ranked documents according to the relevancy to the query. 📄 -You can infer with Passage Ranking models using the [Inference API](https://huggingface.co/inference-api). The Passage Ranking model inputs are a query for which we look for relevancy in the documents and the documents we want to search. The model will return scores according to the relevancy of these documents for the query. +You can infer with Passage Ranking models using [Inference Endpoints](https://huggingface.co/inference-endpoints). The Passage Ranking model inputs are a query for which we look for relevancy in the documents and the documents we want to search. The model will return scores according to the relevancy of these documents for the query. ```python import json diff --git a/packages/tasks/src/tasks/tabular-classification/about.md b/packages/tasks/src/tasks/tabular-classification/about.md index 9af38bcee..d46a48976 100644 --- a/packages/tasks/src/tasks/tabular-classification/about.md +++ b/packages/tasks/src/tasks/tabular-classification/about.md @@ -19,7 +19,7 @@ Tabular classification models can be used in predicting customer churn in teleco You can use [skops](https://skops.readthedocs.io/) for model hosting and inference on the Hugging Face Hub. This library is built to improve production workflows of various libraries that are used to train tabular models, including [sklearn](https://scikit-learn.org/stable/) and [xgboost](https://xgboost.readthedocs.io/en/stable/). Using `skops` you can: -- Easily use inference API, +- Easily use Inference Endpoints - Build neat UIs with one line of code, - Programmatically create model cards, - Securely serialize your scikit-learn model. (See limitations of using pickle [here](https://huggingface.co/docs/hub/security-pickle).) diff --git a/packages/tasks/src/tasks/tabular-regression/about.md b/packages/tasks/src/tasks/tabular-regression/about.md index eb8861779..53c7b6599 100644 --- a/packages/tasks/src/tasks/tabular-regression/about.md +++ b/packages/tasks/src/tasks/tabular-regression/about.md @@ -30,7 +30,7 @@ model.fit(X, y) You can use [skops](https://skops.readthedocs.io/) for model hosting and inference on the Hugging Face Hub. This library is built to improve production workflows of various libraries that are used to train tabular models, including [sklearn](https://scikit-learn.org/stable/) and [xgboost](https://xgboost.readthedocs.io/en/stable/). Using `skops` you can: -- Easily use inference API, +- Easily use Inference Endpoints, - Build neat UIs with one line of code, - Programmatically create model cards, - Securely serialize your models. (See limitations of using pickle [here](https://huggingface.co/docs/hub/security-pickle).) diff --git a/packages/tasks/src/tasks/text-to-speech/about.md b/packages/tasks/src/tasks/text-to-speech/about.md index 22638b0d6..9fa52cf86 100644 --- a/packages/tasks/src/tasks/text-to-speech/about.md +++ b/packages/tasks/src/tasks/text-to-speech/about.md @@ -10,9 +10,9 @@ TTS models are used to create voice assistants on smart devices. These models ar TTS models are widely used in airport and public transportation announcement systems to convert the announcement of a given text into speech. -## Inference API +## Inference Endpoints -The Hub contains over [1500 TTS models](https://huggingface.co/models?pipeline_tag=text-to-speech&sort=downloads) that you can use right away by trying out the widgets directly in the browser or calling the models as a service using the Inference API. Here is a simple code snippet to get you started: +The Hub contains over [1500 TTS models](https://huggingface.co/models?pipeline_tag=text-to-speech&sort=downloads) that you can use right away by trying out the widgets directly in the browser or calling the models as a service using Inference Endpoints. Here is a simple code snippet to get you started: ```python import json diff --git a/packages/widgets/src/hooks.server.ts b/packages/widgets/src/hooks.server.ts index 48d8f51ab..55569ce6c 100644 --- a/packages/widgets/src/hooks.server.ts +++ b/packages/widgets/src/hooks.server.ts @@ -7,65 +7,65 @@ import { sequence } from "@sveltejs/kit/hooks"; const handleSSO = env.OAUTH_CLIENT_ID && env.OAUTH_CLIENT_SECRET ? SvelteKitAuth({ - // Should be fine as long as your reverse proxy is configured to only accept traffic with the correct host header - trustHost: true, - /** - * SvelteKit has built-in CSRF protection, so we can skip the check - */ - skipCSRFCheck: skipCSRFCheck, - cookies: { - sessionToken: { - name: "session_token", - options: { - httpOnly: true, - sameSite: "lax", - secure: true, - path: "/", - maxAge: 3600, // The OAuth token's lifetime is 3600 seconds - }, + // Should be fine as long as your reverse proxy is configured to only accept traffic with the correct host header + trustHost: true, + /** + * SvelteKit has built-in CSRF protection, so we can skip the check + */ + skipCSRFCheck: skipCSRFCheck, + cookies: { + sessionToken: { + name: "session_token", + options: { + httpOnly: true, + sameSite: "lax", + secure: true, + path: "/", + maxAge: 3600, // The OAuth token's lifetime is 3600 seconds }, }, - providers: [ - { - name: "Hugging Face", - id: "huggingface", - type: "oidc", - clientId: env.OAUTH_CLIENT_ID, - clientSecret: env.OAUTH_CLIENT_SECRET, - issuer: "https://huggingface.co", - wellKnown: "https://huggingface.co/.well-known/openid-configuration", - /** Add "inference-api" scope and remove "email" scope */ - authorization: { params: { scope: "openid profile inference-api" } }, - checks: ["state" as never, "pkce" as never], - }, - ], - secret: env.OAUTH_CLIENT_SECRET, - /** - * Get the access_token without an account in DB, to make calls to the inference API - */ - callbacks: { - jwt({ token, account, profile }) { - return { - ...token, - /** - * account & profile are undefined beyond the first login, in those - * cases `token.access_token` and `token.username` are defined - */ - ...(account && { access_token: account.access_token }), - ...(profile && { username: profile.preferred_username }), - }; - }, - session({ session, token }) { - return { - ...session, - access_token: token.access_token, - user: Object.assign({}, session.user, { - username: token.username, - }), - }; - }, + }, + providers: [ + { + name: "Hugging Face", + id: "huggingface", + type: "oidc", + clientId: env.OAUTH_CLIENT_ID, + clientSecret: env.OAUTH_CLIENT_SECRET, + issuer: "https://huggingface.co", + wellKnown: "https://huggingface.co/.well-known/openid-configuration", + /** Add "inference-api" scope and remove "email" scope */ + authorization: { params: { scope: "openid profile inference-api" } }, + checks: ["state" as never, "pkce" as never], + }, + ], + secret: env.OAUTH_CLIENT_SECRET, + /** + * Get the access_token without an account in DB, to make calls to Inference Endpoints + */ + callbacks: { + jwt({ token, account, profile }) { + return { + ...token, + /** + * account & profile are undefined beyond the first login, in those + * cases `token.access_token` and `token.username` are defined + */ + ...(account && { access_token: account.access_token }), + ...(profile && { username: profile.preferred_username }), + }; + }, + session({ session, token }) { + return { + ...session, + access_token: token.access_token, + user: Object.assign({}, session.user, { + username: token.username, + }), + }; }, - }) + }, + }) : null; const handleGlobal: Handle = async ({ event, resolve }) => { diff --git a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetHeader/WidgetHeader.svelte b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetHeader/WidgetHeader.svelte index f85364dc1..2d9e18376 100644 --- a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetHeader/WidgetHeader.svelte +++ b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetHeader/WidgetHeader.svelte @@ -53,7 +53,7 @@
{#if !isDisabled} - Inference API + Inference Endpoints (serverless) {:else} Inference Examples {/if} diff --git a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetInfo/WidgetInfo.svelte b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetInfo/WidgetInfo.svelte index fe7b379ea..e83f3682a 100644 --- a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetInfo/WidgetInfo.svelte +++ b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetInfo/WidgetInfo.svelte @@ -17,18 +17,18 @@ $: modelTooBig = $modelLoadStates[model.id]?.state === "TooBig"; const state = { - [LoadState.Loadable]: "This model can be loaded on the Inference API on-demand.", - [LoadState.Loaded]: "This model is currently loaded and running on the Inference API.", + [LoadState.Loadable]: "This model can be loaded on Inference Endpoints (serverless).", + [LoadState.Loaded]: "This model is currently loaded and running on Inference Endpoints (serverless).", [LoadState.TooBig]: - "Model is too large to load onto the free Inference API. To try the model, launch it on Inference Endpoints instead.", - [LoadState.Error]: "⚠️ This model could not be loaded by the inference API. ⚠️", + "Model is too large to load onto on Inference Endpoints (serverless). To try the model, launch it on Inference Endpoints (dedicated) instead.", + [LoadState.Error]: "⚠️ This model could not be loaded on Inference Endpoints (serverless). ⚠️", } as const; const azureState = { [LoadState.Loadable]: "This model can be loaded loaded on AzureML Managed Endpoint", [LoadState.Loaded]: "This model is loaded and running on AzureML Managed Endpoint", [LoadState.TooBig]: - "Model is too large to load onto the free Inference API. To try the model, launch it on Inference Endpoints instead.", + "Model is too large to load onto on Inference Endpoints (serverless). To try the model, launch it on Inference Endpoints (dedicated) instead.", [LoadState.Error]: "⚠️ This model could not be loaded.", } as const; @@ -62,9 +62,10 @@ {:else if (model.inference === InferenceDisplayability.Yes || model.pipeline_tag === "reinforcement-learning") && !modelTooBig} {@html getStatusReport($modelLoadStates[model.id], state)} {:else if model.inference === InferenceDisplayability.ExplicitOptOut} - Inference API has been turned off for this model. + Inference Endpoints (serverless) has been turned off for this model. {:else if model.inference === InferenceDisplayability.CustomCode} - Inference API does not yet support model repos that contain custom code.Inference Endpoints (serverless) does not yet support model repos that contain custom code. {:else if model.inference === InferenceDisplayability.LibraryNotDetected} @@ -82,21 +83,21 @@ {:else if model.inference === InferenceDisplayability.PipelineLibraryPairNotSupported} - Inference API does not yet support {model.library_name} models for this pipeline type. + Inference Endpoints (serverless) does not yet support {model.library_name} models for this pipeline type. {:else if modelTooBig} - Model is too large to load onto the free Inference API. To try the model, launch it on Inference EndpointsInference Endpoints (dedicated) instead. {:else} - Inference API is disabled for an unknown reason. Please open a + Inference Endpoints (serverless) is disabled for an unknown reason. Please open a Discussion in the Community tab. {/if} diff --git a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetState/WidgetState.svelte b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetState/WidgetState.svelte index 94be11f7b..f50a37914 100644 --- a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetState/WidgetState.svelte +++ b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetState/WidgetState.svelte @@ -5,13 +5,13 @@
- This model is currently loaded and running on the Inference API. + This model is currently loaded and running on Inference Endpoints (serverless).
- ⚠️ This model could not be loaded by the inference API. ⚠️ + ⚠️ This model could not be loaded in Inference Endpoints (serverless). ⚠️
- This model can be loaded on the Inference API on-demand. + This model can be loaded in Inference Endpoints (serverless).
diff --git a/packages/widgets/src/lib/components/InferenceWidget/shared/helpers.ts b/packages/widgets/src/lib/components/InferenceWidget/shared/helpers.ts index 3a92e55e0..76b1acd73 100644 --- a/packages/widgets/src/lib/components/InferenceWidget/shared/helpers.ts +++ b/packages/widgets/src/lib/components/InferenceWidget/shared/helpers.ts @@ -84,7 +84,7 @@ export async function callInferenceApi( requestBody: Record, apiToken = "", outputParsingFn: (x: unknown) => T, - waitForModel = false, // If true, the server will only respond once the model has been loaded on the inference API, + waitForModel = false, // If true, the server will only respond once the model has been loaded on Inference Endpoints (serverless) includeCredentials = false, isOnLoadCall = false, // If true, the server will try to answer from cache and not do anything if not useCache = true @@ -184,7 +184,7 @@ export async function getModelLoadInfo( } } -// Extend Inference API requestBody with user supplied Inference API parameters +// Extend requestBody with user supplied parameters for Inference Endpoints (serverless) export function addInferenceParameters(requestBody: Record, model: ModelData): void { const inference = model?.cardData?.inference; if (typeof inference === "object") {