From c83cc3e889eda1c670af5b2b47f615c057013e8c Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Fri, 17 Jan 2025 18:01:42 +0100 Subject: [PATCH] Putting the `provider` arg more front'n'center (and other tweaks) (#1114) --- README.md | 31 ++++++++++++++----------------- packages/inference/README.md | 17 +++++++++++------ 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 4874c33a1..910c6b008 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ await uploadFile({ } }); -// Use HF Inference API +// Use HF Inference API, or external Inference Providers! await inference.chatCompletion({ model: "meta-llama/Llama-3.1-8B-Instruct", @@ -39,6 +39,7 @@ await inference.chatCompletion({ ], max_tokens: 512, temperature: 0.5, + provider: "sambanova", // or together, fal-ai, replicate, … }); await inference.textToImage({ @@ -146,16 +147,16 @@ for await (const chunk of inference.chatCompletionStream({ /// Using a third-party provider: await inference.chatCompletion({ - model: "meta-llama/Llama-3.1-8B-Instruct", - messages: [{ role: "user", content: "Hello, nice to meet you!" }], - max_tokens: 512, - provider: "sambanova" + model: "meta-llama/Llama-3.1-8B-Instruct", + messages: [{ role: "user", content: "Hello, nice to meet you!" }], + max_tokens: 512, + provider: "sambanova", // or together, fal-ai, replicate, … }) await inference.textToImage({ - model: "black-forest-labs/FLUX.1-dev", - inputs: "a picture of a green bird", - provider: "together" + model: "black-forest-labs/FLUX.1-dev", + inputs: "a picture of a green bird", + provider: "fal-ai", }) @@ -169,14 +170,10 @@ await inference.translation({ }, }); -await inference.textToImage({ - model: 'black-forest-labs/FLUX.1-dev', - inputs: 'a picture of a green bird', -}) - +// pass multimodal files or URLs as inputs await inference.imageToText({ + model: 'nlpconnect/vit-gpt2-image-captioning', data: await (await fetch('https://picsum.photos/300/300')).blob(), - model: 'nlpconnect/vit-gpt2-image-captioning', }) // Using your own dedicated inference endpoint: https://hf.co/docs/inference-endpoints/ @@ -188,9 +185,9 @@ const llamaEndpoint = inference.endpoint( "https://api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct" ); const out = await llamaEndpoint.chatCompletion({ - model: "meta-llama/Llama-3.1-8B-Instruct", - messages: [{ role: "user", content: "Hello, nice to meet you!" }], - max_tokens: 512, + model: "meta-llama/Llama-3.1-8B-Instruct", + messages: [{ role: "user", content: "Hello, nice to meet you!" }], + max_tokens: 512, }); console.log(out.choices[0].message); ``` diff --git a/packages/inference/README.md b/packages/inference/README.md index 90ff49893..4cc2e6881 100644 --- a/packages/inference/README.md +++ b/packages/inference/README.md @@ -42,15 +42,15 @@ const hf = new HfInference('your access token') Your access token should be kept private. If you need to protect it in front-end applications, we suggest setting up a proxy server that stores the access token. -### Requesting third-party inference providers +### Third-party inference providers -You can request inference from third-party providers with the inference client. +You can send inference requests to third-party providers with the inference client. Currently, we support the following providers: [Fal.ai](https://fal.ai), [Replicate](https://replicate.com), [Together](https://together.xyz) and [Sambanova](https://sambanova.ai). -To make request to a third-party provider, you have to pass the `provider` parameter to the inference function. Make sure your request is authenticated with an access token. +To send requests to a third-party provider, you have to pass the `provider` parameter to the inference function. Make sure your request is authenticated with an access token. ```ts -const accessToken = "hf_..."; // Either a HF access token, or an API key from the 3rd party provider (Replicate in this example) +const accessToken = "hf_..."; // Either a HF access token, or an API key from the third-party provider (Replicate in this example) const client = new HfInference(accessToken); await client.textToImage({ @@ -63,14 +63,19 @@ await client.textToImage({ When authenticated with a Hugging Face access token, the request is routed through https://huggingface.co. When authenticated with a third-party provider key, the request is made directly against that provider's inference API. -Only a subset of models are supported when requesting 3rd party providers. You can check the list of supported models per pipeline tasks here: +Only a subset of models are supported when requesting third-party providers. You can check the list of supported models per pipeline tasks here: - [Fal.ai supported models](./src/providers/fal-ai.ts) - [Replicate supported models](./src/providers/replicate.ts) - [Sambanova supported models](./src/providers/sambanova.ts) - [Together supported models](./src/providers/together.ts) - [HF Inference API (serverless)](https://huggingface.co/models?inference=warm&sort=trending) -#### Tree-shaking +❗**Important note:** To be compatible, the third-party API must adhere to the "standard" shape API we expect on HF model pages for each pipeline task type. +This is not an issue for LLMs as everyone converged on the OpenAI API anyways, but can be more tricky for other tasks like "text-to-image" or "automatic-speech-recognition" where there exists no standard API. Let us know if any help is needed or if we can make things easier for you! + +👋**Want to add another provider?** Get in touch if you'd like to add support for another Inference provider, and/or request it on https://huggingface.co/spaces/huggingface/HuggingDiscussions/discussions/49 + +### Tree-shaking You can import the functions you need directly from the module instead of using the `HfInference` class.