From 88b8af195ee8d28c4bf038a13083e1cc44e0c374 Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Mon, 19 Aug 2024 12:20:23 +0200
Subject: [PATCH 01/38] Add draft of docs structure

---
 docs/api-inference/_toctree.yml       | 15 +++++++++++++++
 docs/api-inference/getting-started.md |  1 +
 docs/api-inference/index.md           |  1 +
 docs/api-inference/overview.md        |  7 +++++++
 docs/api-inference/task_parameters.md | 16 ++++++++++++++++
 docs/api-inference/tasks/fill-mask.md |  1 +
 6 files changed, 41 insertions(+)
 create mode 100644 docs/api-inference/_toctree.yml
 create mode 100644 docs/api-inference/getting-started.md
 create mode 100644 docs/api-inference/index.md
 create mode 100644 docs/api-inference/overview.md
 create mode 100644 docs/api-inference/task_parameters.md
 create mode 100644 docs/api-inference/tasks/fill-mask.md

diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
new file mode 100644
index 000000000..0d32376dd
--- /dev/null
+++ b/docs/api-inference/_toctree.yml
@@ -0,0 +1,15 @@
+- sections:
+  - local: index
+    title: Serverless Inference API
+  - local: overview
+    title: Overview
+  - local: getting-started
+    title: Get started
+  title: Get Started
+- sections:
+  - local: task_parameters
+    title: Task Parameters
+  - sections:
+    - local: tasks/fill-mask
+      title: Fill Mask
+  title: Parameters
\ No newline at end of file
diff --git a/docs/api-inference/getting-started.md b/docs/api-inference/getting-started.md
new file mode 100644
index 000000000..bad55622f
--- /dev/null
+++ b/docs/api-inference/getting-started.md
@@ -0,0 +1 @@
+# Getting Started
diff --git a/docs/api-inference/index.md b/docs/api-inference/index.md
new file mode 100644
index 000000000..2d3ac96f8
--- /dev/null
+++ b/docs/api-inference/index.md
@@ -0,0 +1 @@
+# Serverless Inference API
diff --git a/docs/api-inference/overview.md b/docs/api-inference/overview.md
new file mode 100644
index 000000000..d35789b21
--- /dev/null
+++ b/docs/api-inference/overview.md
@@ -0,0 +1,7 @@
+# Overview
+
+## Main Features
+
+## Warm vs Cold vs Frozen models
+
+## Security and Compliance
diff --git a/docs/api-inference/task_parameters.md b/docs/api-inference/task_parameters.md
new file mode 100644
index 000000000..4e97e656e
--- /dev/null
+++ b/docs/api-inference/task_parameters.md
@@ -0,0 +1,16 @@
+# Detailed Parameters
+
+Table with 
+- Domain
+- Task
+- Whether it's supported in Inference API
+- Supported libraries (not sure)
+- Recommended model
+- Link to model specific page
+
+
+
+## Additional parameters (different page?)
+
+- Controling cache
+- Modifying the task used by a model (Which task is used by this model?)
\ No newline at end of file
diff --git a/docs/api-inference/tasks/fill-mask.md b/docs/api-inference/tasks/fill-mask.md
new file mode 100644
index 000000000..bba61811b
--- /dev/null
+++ b/docs/api-inference/tasks/fill-mask.md
@@ -0,0 +1 @@
+## Fill Mask
\ No newline at end of file

From f558bdd15208d92b9443f6bb9093ea4f0b6079dc Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Tue, 20 Aug 2024 16:12:19 +0200
Subject: [PATCH 02/38] Add index page

---
 docs/api-inference/index.md           | 49 +++++++++++++++++++++++++++
 docs/api-inference/tasks/fill-mask.md |  7 +++-
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/docs/api-inference/index.md b/docs/api-inference/index.md
index 2d3ac96f8..7d03f2ae6 100644
--- a/docs/api-inference/index.md
+++ b/docs/api-inference/index.md
@@ -1 +1,50 @@
 # Serverless Inference API
+
+**Instant Access to 800,000+ ML Models for Fast Prototyping**
+
+Explore the most popular models for text, image, speech, and more — all with a simple API request. Build, test, and experiment without worrying about infrastructure or setup.
+
+---
+
+## Why use the Inference API?
+
+The Serverless Inference API offers a fast and free way to explore thousands of models for a variety of tasks. Whether you're prototyping a new application or experimenting with ML capabilities, this API gives you instant access to high-performing models across multiple domains:
+
+* **Text Generation:** Including large language models and tool-calling prompts, generate and experiment with high-quality responses.
+* **Image Generation:** Easily create customized images, including LoRAs for your own styles.
+* **Document Embeddings:** Build search and retrieval systems with SOTA embeddings.
+* **Classical AI Tasks:** Ready-to-use models for text classification, image classification, speech recognition, and more.
+
+TODO: add some flow chart image
+
+⚡ **Fast and Free to Get Started**: The Inference API is free with rate limits. For production needs, explore [Inference Endpoints](https://huggingface.co/docs/inference-endpoints/index) for dedicated resources, autoscaling, advanced security features, and more.
+
+---
+
+## Key Benefits
+
+- 🚀 **Instant Prototyping:** Access powerful models without setup.
+- 🎯 **Diverse Use Cases:** One API for text, image, and beyond.
+- 🔧 **Developer-Friendly:** Simple requests, fast responses.
+
+---
+
+## Contents
+
+The documentation is organized into two sections:
+
+* **Quick Overview** Learn the basics of how to use the Inference API.
+* **Detailed Parameters** Dive deep into task-specific settings and parameters.
+
+---
+
+## Looking for custom support from the Hugging Face team?
+
+<a target="_blank" href="https://huggingface.co/support">
+    <img alt="HuggingFace Expert Acceleration Program" src="https://cdn-media.huggingface.co/marketing/transformers/new-support-improved.png" style="max-width: 600px; border: 1px solid #eee; border-radius: 4px; box-shadow: 0 1px 2px 0 rgba(0, 0, 0, 0.05);">
+</a><br>
+
+## Hugging Face is trusted in production by over 10,000 companies
+
+<img class="block dark:hidden !shadow-none !border-0 !rounded-none" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/inference-api/companies-light.png" width="600">
+<img class="hidden dark:block !shadow-none !border-0 !rounded-none" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/inference-api/companies-dark.png" width="600">
\ No newline at end of file
diff --git a/docs/api-inference/tasks/fill-mask.md b/docs/api-inference/tasks/fill-mask.md
index bba61811b..64260ae39 100644
--- a/docs/api-inference/tasks/fill-mask.md
+++ b/docs/api-inference/tasks/fill-mask.md
@@ -1 +1,6 @@
-## Fill Mask
\ No newline at end of file
+## Fill Mask
+
+Mask filling is the task of predicting the right word (token to be precise) in the middle of a sequence. 
+
+Automated docs below
+

From 8b6230f5fbf5cec1a6154feaf039838010996642 Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Wed, 21 Aug 2024 15:09:27 +0200
Subject: [PATCH 03/38] Prepare overview and rate limits

---
 docs/api-inference/_toctree.yml               |  7 ++--
 ...{getting-started.md => getting_started.md} |  0
 docs/api-inference/index.md                   |  2 +-
 docs/api-inference/overview.md                | 34 +++++++++++++++++--
 docs/api-inference/rate_limits.md             | 11 ++++++
 .../tasks/{fill-mask.md => fill_mask.md}      |  0
 6 files changed, 49 insertions(+), 5 deletions(-)
 rename docs/api-inference/{getting-started.md => getting_started.md} (100%)
 create mode 100644 docs/api-inference/rate_limits.md
 rename docs/api-inference/tasks/{fill-mask.md => fill_mask.md} (100%)

diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
index 0d32376dd..99c74cc49 100644
--- a/docs/api-inference/_toctree.yml
+++ b/docs/api-inference/_toctree.yml
@@ -3,13 +3,16 @@
     title: Serverless Inference API
   - local: overview
     title: Overview
-  - local: getting-started
+  - local: getting_started
+  - local: rate_limits
+    title: Rate Limits
     title: Get started
+    
   title: Get Started
 - sections:
   - local: task_parameters
     title: Task Parameters
   - sections:
-    - local: tasks/fill-mask
+    - local: tasks/fill_mask
       title: Fill Mask
   title: Parameters
\ No newline at end of file
diff --git a/docs/api-inference/getting-started.md b/docs/api-inference/getting_started.md
similarity index 100%
rename from docs/api-inference/getting-started.md
rename to docs/api-inference/getting_started.md
diff --git a/docs/api-inference/index.md b/docs/api-inference/index.md
index 7d03f2ae6..8ae6ce7a1 100644
--- a/docs/api-inference/index.md
+++ b/docs/api-inference/index.md
@@ -33,7 +33,7 @@ TODO: add some flow chart image
 
 The documentation is organized into two sections:
 
-* **Quick Overview** Learn the basics of how to use the Inference API.
+* **Getting Started** Learn the basics of how to use the Inference API.
 * **Detailed Parameters** Dive deep into task-specific settings and parameters.
 
 ---
diff --git a/docs/api-inference/overview.md b/docs/api-inference/overview.md
index d35789b21..cdc86ac70 100644
--- a/docs/api-inference/overview.md
+++ b/docs/api-inference/overview.md
@@ -2,6 +2,36 @@
 
 ## Main Features
 
-## Warm vs Cold vs Frozen models
+* Leverage over 800,000+ models from different open-source libraries (transformers, sentence transformers, adapter transformers, diffusers, timm, etc.).
+* Use models for a variety of tasks, including text generation, image generation, document embeddings, NER, summarization, image classification, and more.
+* Accelerate your prototyping by using GPU-powered models.
+* Run very large models that are challenging to deploy in production.
+* Benefit from the built-in automatic scaling, load balancing and caching.
 
-## Security and Compliance
+## Eligibility
+
+Given the fast-paced nature of the open ML ecosystem, the Inference API allows using models that have large community interest and are actively being used(based on recent likes, downloads, and usage). Because of this, deployed models can be swapped without prior notice.
+
+You can find:
+
+* **[Warm models](https://huggingface.co/models?inference=warm&sort=trending):** models ready to be used.
+* **[Cold models](https://huggingface.co/models?inference=cold&sort=trending):** models that are not loaded but can be used.
+* **[Frozen models](https://huggingface.co/models?inference=frozen&sort=trending):** models that currently can't be run with the API.
+
+TODO: add screenshot
+
+## GPU vs CPU
+
+By default, the Inference API uses GPUs to run large models. For small models that can run well on CPU, such as small text classification and text embeddings, the API will automatically switch to CPU to save costs.
+
+## Inference for PRO
+
+In addition to thousands of public models available in the Hub, PRO and Enteprise users get free access and higher rate limits to the following models:
+
+
+| Model                          | Size                                                                                                                                                                                       | Context Length | Use                                                          |
+|--------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------|--------------------------------------------------------------|
+| Meta Llama 3.1Instruct  | [8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct), [70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct)                                                      | 128k tokens      | High quality multilingual chat model with large context length |
+| Meta Llama 3 Instruct          | [8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct), [70B](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct)                                                       | 8k tokens      | One of the best chat models                                  |
+| Llama 2 Chat                   | [7B](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf), [13B](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf), [70B](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) | 4k tokens      | One of the best conversational models                        |
+| Bark                           | [0.9B](https://huggingface.co/suno/bark)                                                                                                                                                   | -              | Text to audio generation                                     |
diff --git a/docs/api-inference/rate_limits.md b/docs/api-inference/rate_limits.md
new file mode 100644
index 000000000..c3ed7a6a9
--- /dev/null
+++ b/docs/api-inference/rate_limits.md
@@ -0,0 +1,11 @@
+# Rate Limits
+
+The Inference API has temporary rate limits based on the number of requests. These rate limits are subject to change in the future to be compute-based or token-based. 
+
+Serverless API is not meant to be used for heavy production applications. If you need higher rate limits, using [Inference Endpoints](https://huggingface.co/docs/inference/endpoints) to have dedicated resources.
+
+| User Tier           | Rate Limit                |
+|---------------------|---------------------------|
+| Unregistered Users  | 1 request per hour        |
+| Signed-up Users     | 300 requests per hour     |
+| PRO and Enterprise Users           | 1000 requests per hour    |
\ No newline at end of file
diff --git a/docs/api-inference/tasks/fill-mask.md b/docs/api-inference/tasks/fill_mask.md
similarity index 100%
rename from docs/api-inference/tasks/fill-mask.md
rename to docs/api-inference/tasks/fill_mask.md

From 6380dfe332c8b1ade8cdedf649145b7f75081056 Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Wed, 21 Aug 2024 15:15:24 +0200
Subject: [PATCH 04/38] Manage redirects

---
 docs/api-inference/_redirects.yml                    |  5 +++++
 docs/api-inference/getting_started.md                |  2 ++
 docs/api-inference/index.md                          |  2 +-
 docs/api-inference/overview.md                       | 12 ++++++++++++
 .../{task_parameters.md => parameters.md}            |  2 +-
 5 files changed, 21 insertions(+), 2 deletions(-)
 create mode 100644 docs/api-inference/_redirects.yml
 rename docs/api-inference/{task_parameters.md => parameters.md} (92%)

diff --git a/docs/api-inference/_redirects.yml b/docs/api-inference/_redirects.yml
new file mode 100644
index 000000000..3548bcb8f
--- /dev/null
+++ b/docs/api-inference/_redirects.yml
@@ -0,0 +1,5 @@
+quicktour: overview
+detailed_parameters: parameters
+parallelism: TODO
+usage: getting_started
+faq: overview
\ No newline at end of file
diff --git a/docs/api-inference/getting_started.md b/docs/api-inference/getting_started.md
index bad55622f..6d668cf10 100644
--- a/docs/api-inference/getting_started.md
+++ b/docs/api-inference/getting_started.md
@@ -1 +1,3 @@
 # Getting Started
+
+TODO:
\ No newline at end of file
diff --git a/docs/api-inference/index.md b/docs/api-inference/index.md
index 8ae6ce7a1..6b5b7a744 100644
--- a/docs/api-inference/index.md
+++ b/docs/api-inference/index.md
@@ -34,7 +34,7 @@ TODO: add some flow chart image
 The documentation is organized into two sections:
 
 * **Getting Started** Learn the basics of how to use the Inference API.
-* **Detailed Parameters** Dive deep into task-specific settings and parameters.
+* **Parameters** Dive into task-specific settings and parameters.
 
 ---
 
diff --git a/docs/api-inference/overview.md b/docs/api-inference/overview.md
index cdc86ac70..26964c13b 100644
--- a/docs/api-inference/overview.md
+++ b/docs/api-inference/overview.md
@@ -35,3 +35,15 @@ In addition to thousands of public models available in the Hub, PRO and Entepris
 | Meta Llama 3 Instruct          | [8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct), [70B](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct)                                                       | 8k tokens      | One of the best chat models                                  |
 | Llama 2 Chat                   | [7B](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf), [13B](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf), [70B](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) | 4k tokens      | One of the best conversational models                        |
 | Bark                           | [0.9B](https://huggingface.co/suno/bark)                                                                                                                                                   | -              | Text to audio generation                                     |
+
+
+## FAQ
+
+### Running Private Models
+
+The free Serverless API is designed to run popular public models. If you have a private model, you can use the [Inference Endpoints](https://huggingface.co/docs/inference/endpoints) to deploy your model.
+
+### Fine-tuning Models
+
+To automatically finetune a model on your data, please try [AutoTrain](https://huggingface.co/autotrain). It’s a no-code solution for automatically training and deploying a model; all you have to do is upload your data!
+
diff --git a/docs/api-inference/task_parameters.md b/docs/api-inference/parameters.md
similarity index 92%
rename from docs/api-inference/task_parameters.md
rename to docs/api-inference/parameters.md
index 4e97e656e..f4c21782d 100644
--- a/docs/api-inference/task_parameters.md
+++ b/docs/api-inference/parameters.md
@@ -1,4 +1,4 @@
-# Detailed Parameters
+# Parameters
 
 Table with 
 - Domain

From 9df929a05cc42e20a88ddc95198e4bd41881eab5 Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Wed, 21 Aug 2024 15:17:33 +0200
Subject: [PATCH 05/38] Clean up

---
 docs/api-inference/_toctree.yml | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
index 99c74cc49..cb2d54791 100644
--- a/docs/api-inference/_toctree.yml
+++ b/docs/api-inference/_toctree.yml
@@ -4,14 +4,13 @@
   - local: overview
     title: Overview
   - local: getting_started
+    title: Getting Started
   - local: rate_limits
     title: Rate Limits
-    title: Get started
-    
-  title: Get Started
+  title: title
 - sections:
-  - local: task_parameters
-    title: Task Parameters
+  - local: parameters
+    title: Parameters
   - sections:
     - local: tasks/fill_mask
       title: Fill Mask

From 60ad476e2e408155a43c09d6d68aaab79a5820cc Mon Sep 17 00:00:00 2001
From: Omar Sanseviero <osanseviero@gmail.com>
Date: Wed, 21 Aug 2024 16:49:29 +0200
Subject: [PATCH 06/38] Apply suggestions from code review

Co-authored-by: Pedro Cuenca <pedro@huggingface.co>
---
 docs/api-inference/overview.md    | 8 ++++----
 docs/api-inference/rate_limits.md | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/api-inference/overview.md b/docs/api-inference/overview.md
index 26964c13b..8e4aeb75a 100644
--- a/docs/api-inference/overview.md
+++ b/docs/api-inference/overview.md
@@ -6,11 +6,11 @@
 * Use models for a variety of tasks, including text generation, image generation, document embeddings, NER, summarization, image classification, and more.
 * Accelerate your prototyping by using GPU-powered models.
 * Run very large models that are challenging to deploy in production.
-* Benefit from the built-in automatic scaling, load balancing and caching.
+* Production-grade platform without the hassle: built-in automatic scaling, load balancing and caching.
 
 ## Eligibility
 
-Given the fast-paced nature of the open ML ecosystem, the Inference API allows using models that have large community interest and are actively being used(based on recent likes, downloads, and usage). Because of this, deployed models can be swapped without prior notice.
+Given the fast-paced nature of the open ML ecosystem, the Inference API exposes models that have large community interest and are in active use (based on recent likes, downloads, and usage). Because of this, deployed models can be swapped without prior notice.
 
 You can find:
 
@@ -26,7 +26,7 @@ By default, the Inference API uses GPUs to run large models. For small models th
 
 ## Inference for PRO
 
-In addition to thousands of public models available in the Hub, PRO and Enteprise users get free access and higher rate limits to the following models:
+In addition to thousands of public models available in the Hub, PRO and Enteprise users get higher rate limits and free access to the following models:
 
 
 | Model                          | Size                                                                                                                                                                                       | Context Length | Use                                                          |
@@ -41,7 +41,7 @@ In addition to thousands of public models available in the Hub, PRO and Entepris
 
 ### Running Private Models
 
-The free Serverless API is designed to run popular public models. If you have a private model, you can use the [Inference Endpoints](https://huggingface.co/docs/inference/endpoints) to deploy your model.
+The free Serverless API is designed to run popular public models. If you have a private model, you can use [Inference Endpoints](https://huggingface.co/docs/inference/endpoints) to deploy your model.
 
 ### Fine-tuning Models
 
diff --git a/docs/api-inference/rate_limits.md b/docs/api-inference/rate_limits.md
index c3ed7a6a9..3077b2884 100644
--- a/docs/api-inference/rate_limits.md
+++ b/docs/api-inference/rate_limits.md
@@ -1,8 +1,8 @@
 # Rate Limits
 
-The Inference API has temporary rate limits based on the number of requests. These rate limits are subject to change in the future to be compute-based or token-based. 
+The Inference API has rate limits based on the number of requests. These rate limits are subject to change in the future to be compute-based or token-based. 
 
-Serverless API is not meant to be used for heavy production applications. If you need higher rate limits, using [Inference Endpoints](https://huggingface.co/docs/inference/endpoints) to have dedicated resources.
+Serverless API is not meant to be used for heavy production applications. If you need higher rate limits, consider [Inference Endpoints](https://huggingface.co/docs/inference/endpoints) to have dedicated resources.
 
 | User Tier           | Rate Limit                |
 |---------------------|---------------------------|

From a93f0dcd70ab720544065a1d11ec3721d776090a Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Wed, 21 Aug 2024 16:49:41 +0200
Subject: [PATCH 07/38] Apply suggestions from review

---
 docs/api-inference/_toctree.yml | 2 +-
 docs/api-inference/index.md     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
index cb2d54791..fe580d367 100644
--- a/docs/api-inference/_toctree.yml
+++ b/docs/api-inference/_toctree.yml
@@ -7,7 +7,7 @@
     title: Getting Started
   - local: rate_limits
     title: Rate Limits
-  title: title
+  title: Getting Started
 - sections:
   - local: parameters
     title: Parameters
diff --git a/docs/api-inference/index.md b/docs/api-inference/index.md
index 6b5b7a744..3b7839ff3 100644
--- a/docs/api-inference/index.md
+++ b/docs/api-inference/index.md
@@ -17,7 +17,7 @@ The Serverless Inference API offers a fast and free way to explore thousands of
 
 TODO: add some flow chart image
 
-⚡ **Fast and Free to Get Started**: The Inference API is free with rate limits. For production needs, explore [Inference Endpoints](https://huggingface.co/docs/inference-endpoints/index) for dedicated resources, autoscaling, advanced security features, and more.
+⚡ **Fast and Free to Get Started**: The Inference API is free with higher rate limits for PRO users. For production needs, explore [Inference Endpoints](https://ui.endpoints.huggingface.co/) for dedicated resources, autoscaling, advanced security features, and more.
 
 ---
 

From f2610b7731cf1a83f6bc214a40a2c2acf2ed8c81 Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Fri, 23 Aug 2024 15:35:03 +0200
Subject: [PATCH 08/38] Add additional headers

---
 docs/api-inference/overview.md   |   2 +-
 docs/api-inference/parameters.md | 144 ++++++++++++++++++++++++++++++-
 2 files changed, 142 insertions(+), 4 deletions(-)

diff --git a/docs/api-inference/overview.md b/docs/api-inference/overview.md
index 8e4aeb75a..496a0215a 100644
--- a/docs/api-inference/overview.md
+++ b/docs/api-inference/overview.md
@@ -45,5 +45,5 @@ The free Serverless API is designed to run popular public models. If you have a
 
 ### Fine-tuning Models
 
-To automatically finetune a model on your data, please try [AutoTrain](https://huggingface.co/autotrain). It’s a no-code solution for automatically training and deploying a model; all you have to do is upload your data!
+To automatically finetune a model on your data, please try [AutoTrain](https://huggingface.co/autotrain). It’s a no-code solution for automatically training a model; all you have to do is upload your data.
 
diff --git a/docs/api-inference/parameters.md b/docs/api-inference/parameters.md
index f4c21782d..905420fbe 100644
--- a/docs/api-inference/parameters.md
+++ b/docs/api-inference/parameters.md
@@ -10,7 +10,145 @@ Table with
 
 
 
-## Additional parameters (different page?)
+## Additional Options
 
-- Controling cache
-- Modifying the task used by a model (Which task is used by this model?)
\ No newline at end of file
+### Caching
+
+There is a cache layer on the inference API to speed up requests when the inputs are exactly the same. Many models, such as classifiers and embedding models, can use those results as is if they are deterministic, meaning the results will be the same. Howevr, if you use a nondeterministic model, you can disable the cache mechanism from being used, resulting in a real new query.
+
+To do this, you can add `x-use-cache:false` to the request headers. For example
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/MODEL_ID \
+    -X POST \
+    -d '{"inputs": "Can you please let us know more details about your "}' \
+    -H "Authorization: Bearer hf_***" \
+    -H "Content-Type: application/json" \
+    -H "x-use-cache: false"
+```
+</curl>
+
+<python>
+```python
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/MODEL_ID"
+headers = {
+    "Authorization": "Bearer hf_***",
+    "Content-Type": "application/json",
+    "x-use-cache": "false"
+}
+data = {
+    "inputs": "Can you please let us know more details about your "
+}
+response = requests.post(API_URL, headers=headers, json=data)
+print(response.json())
+```
+
+</python>
+
+<js>
+```js
+import fetch from "node-fetch";
+
+async function query(data) {
+    const response = await fetch(
+        "https://api-inference.huggingface.co/models/MODEL_ID",
+        {
+            method: "POST",
+            headers: {
+                Authorization: `Bearer hf_***`,
+                "Content-Type": "application/json",
+                "x-use-cache": "false"
+            },
+            body: JSON.stringify(data),
+        }
+    );
+    const result = await response.json();
+    return result;
+}
+
+query({
+    inputs: "Can you please let us know more details about your "
+}).then((response) => {
+    console.log(JSON.stringify(response, null, 2));
+});
+
+```
+
+</js>
+
+</inferencesnippet>
+
+### Wait for the model
+
+When a model is warm, it is ready to be used and you will get a response relatively quickly. However, some models are cold and need to be loaded before they can be used. In that case, you will get a 503 error. Rather than doing many requests until it's loaded, you can wait for the model to be loaded by adding `x-wait-for-model:true` to the request headers. We suggest to only use this flag to wait for the model to be loaded when you are sure that the model is cold. That means, first try the request without this flag and only if you get a 503 error, try again with this flag.
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/MODEL_ID \
+    -X POST \
+    -d '{"inputs": "Can you please let us know more details about your "}' \
+    -H "Authorization: Bearer hf_***" \
+    -H "Content-Type: application/json" \
+    -H "x-wait-for-model: true"
+```
+</curl>
+
+<python>
+```python
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/MODEL_ID"
+headers = {
+    "Authorization": "Bearer hf_***",
+    "Content-Type": "application/json",
+    "x-wait-for-model": "true"
+}
+data = {
+    "inputs": "Can you please let us know more details about your "
+}
+response = requests.post(API_URL, headers=headers, json=data)
+print(response.json())
+```
+
+</python>
+
+<js>
+```js
+import fetch from "node-fetch";
+
+async function query(data) {
+    const response = await fetch(
+        "https://api-inference.huggingface.co/models/MODEL_ID",
+        {
+            method: "POST",
+            headers: {
+                Authorization: `Bearer hf_***`,
+                "Content-Type": "application/json",
+                "x-wait-for-model": "true"
+            },
+            body: JSON.stringify(data),
+        }
+    );
+    const result = await response.json();
+    return result;
+}
+
+query({
+    inputs: "Can you please let us know more details about your "
+}).then((response) => {
+    console.log(JSON.stringify(response, null, 2));
+});
+
+```
+
+</js>
+
+</inferencesnippet>
\ No newline at end of file

From c0bee69576bbefdbc3f6e2b06c7ef985ba5e00d4 Mon Sep 17 00:00:00 2001
From: Omar Sanseviero <osanseviero@gmail.com>
Date: Mon, 26 Aug 2024 16:23:32 +0200
Subject: [PATCH 09/38] Apply suggestions from code review

Co-authored-by: Lucain <lucain@huggingface.co>
---
 docs/api-inference/_toctree.yml  | 2 +-
 docs/api-inference/index.md      | 2 +-
 docs/api-inference/overview.md   | 4 ++--
 docs/api-inference/parameters.md | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
index fe580d367..a9e8f593c 100644
--- a/docs/api-inference/_toctree.yml
+++ b/docs/api-inference/_toctree.yml
@@ -14,4 +14,4 @@
   - sections:
     - local: tasks/fill_mask
       title: Fill Mask
-  title: Parameters
\ No newline at end of file
+  title: API Reference
\ No newline at end of file
diff --git a/docs/api-inference/index.md b/docs/api-inference/index.md
index 3b7839ff3..eb1adf845 100644
--- a/docs/api-inference/index.md
+++ b/docs/api-inference/index.md
@@ -34,7 +34,7 @@ TODO: add some flow chart image
 The documentation is organized into two sections:
 
 * **Getting Started** Learn the basics of how to use the Inference API.
-* **Parameters** Dive into task-specific settings and parameters.
+* **API Reference** Dive into task-specific settings and parameters.
 
 ---
 
diff --git a/docs/api-inference/overview.md b/docs/api-inference/overview.md
index 496a0215a..f7d0301d8 100644
--- a/docs/api-inference/overview.md
+++ b/docs/api-inference/overview.md
@@ -26,12 +26,12 @@ By default, the Inference API uses GPUs to run large models. For small models th
 
 ## Inference for PRO
 
-In addition to thousands of public models available in the Hub, PRO and Enteprise users get higher rate limits and free access to the following models:
+In addition to thousands of public models available in the Hub, PRO and Enterprise users get higher rate limits and free access to the following models:
 
 
 | Model                          | Size                                                                                                                                                                                       | Context Length | Use                                                          |
 |--------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------|--------------------------------------------------------------|
-| Meta Llama 3.1Instruct  | [8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct), [70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct)                                                      | 128k tokens      | High quality multilingual chat model with large context length |
+| Meta Llama 3.1 Instruct  | [8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct), [70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct)                                                      | 128k tokens      | High quality multilingual chat model with large context length |
 | Meta Llama 3 Instruct          | [8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct), [70B](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct)                                                       | 8k tokens      | One of the best chat models                                  |
 | Llama 2 Chat                   | [7B](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf), [13B](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf), [70B](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) | 4k tokens      | One of the best conversational models                        |
 | Bark                           | [0.9B](https://huggingface.co/suno/bark)                                                                                                                                                   | -              | Text to audio generation                                     |
diff --git a/docs/api-inference/parameters.md b/docs/api-inference/parameters.md
index 905420fbe..a89413c29 100644
--- a/docs/api-inference/parameters.md
+++ b/docs/api-inference/parameters.md
@@ -14,7 +14,7 @@ Table with
 
 ### Caching
 
-There is a cache layer on the inference API to speed up requests when the inputs are exactly the same. Many models, such as classifiers and embedding models, can use those results as is if they are deterministic, meaning the results will be the same. Howevr, if you use a nondeterministic model, you can disable the cache mechanism from being used, resulting in a real new query.
+There is a cache layer on the inference API to speed up requests when the inputs are exactly the same. For many models, such as classifiers and embedding models, results are deterministic meaning you can safely use the cached results. However, if you use a nondeterministic model, you might want to disable the cache mechanism resulting in a real new query.
 
 To do this, you can add `x-use-cache:false` to the request headers. For example
 

From 6294514d0006012a9eadca2de00cebe5c83c9702 Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Mon, 26 Aug 2024 18:58:56 +0200
Subject: [PATCH 10/38] Incorporate reviewer's feedback

---
 docs/api-inference/_redirects.yml             |  4 +--
 docs/api-inference/_toctree.yml               |  5 +--
 docs/api-inference/index.md                   | 10 ++++++
 .../{overview.md => supported_models.md}      | 31 +++----------------
 4 files changed, 20 insertions(+), 30 deletions(-)
 rename docs/api-inference/{overview.md => supported_models.md} (69%)

diff --git a/docs/api-inference/_redirects.yml b/docs/api-inference/_redirects.yml
index 3548bcb8f..f26e94330 100644
--- a/docs/api-inference/_redirects.yml
+++ b/docs/api-inference/_redirects.yml
@@ -1,5 +1,5 @@
-quicktour: overview
+quicktour: index
 detailed_parameters: parameters
 parallelism: TODO
 usage: getting_started
-faq: overview
\ No newline at end of file
+faq: index
\ No newline at end of file
diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
index a9e8f593c..defee864c 100644
--- a/docs/api-inference/_toctree.yml
+++ b/docs/api-inference/_toctree.yml
@@ -1,8 +1,8 @@
 - sections:
   - local: index
     title: Serverless Inference API
-  - local: overview
-    title: Overview
+  - local: supported_models
+    title: Supported Models
   - local: getting_started
     title: Getting Started
   - local: rate_limits
@@ -14,4 +14,5 @@
   - sections:
     - local: tasks/fill_mask
       title: Fill Mask
+    title: Detailed Task Parameters
   title: API Reference
\ No newline at end of file
diff --git a/docs/api-inference/index.md b/docs/api-inference/index.md
index eb1adf845..8b67979e3 100644
--- a/docs/api-inference/index.md
+++ b/docs/api-inference/index.md
@@ -29,6 +29,16 @@ TODO: add some flow chart image
 
 ---
 
+## Main Features
+
+* Leverage over 800,000+ models from different open-source libraries (transformers, sentence transformers, adapter transformers, diffusers, timm, etc.).
+* Use models for a variety of tasks, including text generation, image generation, document embeddings, NER, summarization, image classification, and more.
+* Accelerate your prototyping by using GPU-powered models.
+* Run very large models that are challenging to deploy in production.
+* Production-grade platform without the hassle: built-in automatic scaling, load balancing and caching.
+
+---
+
 ## Contents
 
 The documentation is organized into two sections:
diff --git a/docs/api-inference/overview.md b/docs/api-inference/supported_models.md
similarity index 69%
rename from docs/api-inference/overview.md
rename to docs/api-inference/supported_models.md
index f7d0301d8..866531a06 100644
--- a/docs/api-inference/overview.md
+++ b/docs/api-inference/supported_models.md
@@ -1,16 +1,6 @@
-# Overview
+# Supported Models
 
-## Main Features
-
-* Leverage over 800,000+ models from different open-source libraries (transformers, sentence transformers, adapter transformers, diffusers, timm, etc.).
-* Use models for a variety of tasks, including text generation, image generation, document embeddings, NER, summarization, image classification, and more.
-* Accelerate your prototyping by using GPU-powered models.
-* Run very large models that are challenging to deploy in production.
-* Production-grade platform without the hassle: built-in automatic scaling, load balancing and caching.
-
-## Eligibility
-
-Given the fast-paced nature of the open ML ecosystem, the Inference API exposes models that have large community interest and are in active use (based on recent likes, downloads, and usage). Because of this, deployed models can be swapped without prior notice.
+Given the fast-paced nature of the open ML ecosystem, the Inference API exposes models that have large community interest and are in active use (based on recent likes, downloads, and usage). Because of this, deployed models can be swapped without prior notice. The Hugging Face stack aims to keep all the latest popular models warm and ready to use.
 
 You can find:
 
@@ -20,13 +10,9 @@ You can find:
 
 TODO: add screenshot
 
-## GPU vs CPU
-
-By default, the Inference API uses GPUs to run large models. For small models that can run well on CPU, such as small text classification and text embeddings, the API will automatically switch to CPU to save costs.
-
-## Inference for PRO
+## What do I get with a PRO subscription?
 
-In addition to thousands of public models available in the Hub, PRO and Enterprise users get higher rate limits and free access to the following models:
+In addition to thousands of public models available in the Hub, PRO and Enterprise users get higher [rate limits](./rate_limits) and free access to the following models:
 
 
 | Model                          | Size                                                                                                                                                                                       | Context Length | Use                                                          |
@@ -37,13 +23,6 @@ In addition to thousands of public models available in the Hub, PRO and Enterpri
 | Bark                           | [0.9B](https://huggingface.co/suno/bark)                                                                                                                                                   | -              | Text to audio generation                                     |
 
 
-## FAQ
-
-### Running Private Models
+## Running Private Models
 
 The free Serverless API is designed to run popular public models. If you have a private model, you can use [Inference Endpoints](https://huggingface.co/docs/inference/endpoints) to deploy your model.
-
-### Fine-tuning Models
-
-To automatically finetune a model on your data, please try [AutoTrain](https://huggingface.co/autotrain). It’s a no-code solution for automatically training a model; all you have to do is upload your data.
-

From 12ba289ae645600aa017f3da56b5c353349bcc3b Mon Sep 17 00:00:00 2001
From: Lucain <lucain@huggingface.co>
Date: Tue, 27 Aug 2024 15:35:19 +0200
Subject: [PATCH 11/38] First draft for text-to-image, image-to-image +
 generate script (#1384)

* First draft for text-to-image

* add correct code snippets

* Update docs/api-inference/tasks/text-to-image.md

Co-authored-by: Omar Sanseviero <osanseviero@gmail.com>

* better table?

* Generate tasks pages from script (#1386)

* init project

* first script to generate task pages

* commit generated content

* generate payload table as well

* so undecisive

* hey

* better ?

* Add image-to-image page

* template for snippets section + few things

* few things

* Update scripts/api-inference/templates/specs_headers.handlebars

Co-authored-by: Omar Sanseviero <osanseviero@gmail.com>

* Update scripts/api-inference/templates/specs_headers.handlebars

Co-authored-by: Omar Sanseviero <osanseviero@gmail.com>

* generate

* fetch inference status

---------

Co-authored-by: Omar Sanseviero <osanseviero@gmail.com>
---
 docs/api-inference/_toctree.yml               |   4 +
 docs/api-inference/tasks/image_to_image.md    |  63 ++
 docs/api-inference/tasks/text_to_image.md     | 116 ++++
 scripts/api-inference/.gitignore              |   1 +
 scripts/api-inference/.prettierignore         |   5 +
 scripts/api-inference/README.md               |  11 +
 scripts/api-inference/package.json            |  26 +
 scripts/api-inference/pnpm-lock.yaml          | 541 ++++++++++++++++++
 scripts/api-inference/scripts/.gitignore      |   1 +
 scripts/api-inference/scripts/generate.ts     | 321 +++++++++++
 .../templates/image_to_image.handlebars       |  36 ++
 .../templates/snippets_template.handlebars    |  42 ++
 .../templates/specs_headers.handlebars        |   5 +
 .../templates/specs_output.handlebars         |   5 +
 .../templates/specs_payload.handlebars        |   5 +
 .../templates/text_to_image.handlebars        |  29 +
 scripts/api-inference/tsconfig.json           |  20 +
 17 files changed, 1231 insertions(+)
 create mode 100644 docs/api-inference/tasks/image_to_image.md
 create mode 100644 docs/api-inference/tasks/text_to_image.md
 create mode 100644 scripts/api-inference/.gitignore
 create mode 100644 scripts/api-inference/.prettierignore
 create mode 100644 scripts/api-inference/README.md
 create mode 100644 scripts/api-inference/package.json
 create mode 100644 scripts/api-inference/pnpm-lock.yaml
 create mode 100644 scripts/api-inference/scripts/.gitignore
 create mode 100644 scripts/api-inference/scripts/generate.ts
 create mode 100644 scripts/api-inference/templates/image_to_image.handlebars
 create mode 100644 scripts/api-inference/templates/snippets_template.handlebars
 create mode 100644 scripts/api-inference/templates/specs_headers.handlebars
 create mode 100644 scripts/api-inference/templates/specs_output.handlebars
 create mode 100644 scripts/api-inference/templates/specs_payload.handlebars
 create mode 100644 scripts/api-inference/templates/text_to_image.handlebars
 create mode 100644 scripts/api-inference/tsconfig.json

diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
index defee864c..a68f3abfb 100644
--- a/docs/api-inference/_toctree.yml
+++ b/docs/api-inference/_toctree.yml
@@ -14,5 +14,9 @@
   - sections:
     - local: tasks/fill_mask
       title: Fill Mask
+    - local: tasks/image_to_image
+      title: Image-to-image
+    - local: tasks/text_to_image
+      title: Text-to-image
     title: Detailed Task Parameters
   title: API Reference
\ No newline at end of file
diff --git a/docs/api-inference/tasks/image_to_image.md b/docs/api-inference/tasks/image_to_image.md
new file mode 100644
index 000000000..1b5e2241e
--- /dev/null
+++ b/docs/api-inference/tasks/image_to_image.md
@@ -0,0 +1,63 @@
+## Image-to-image
+
+Image-to-image is the task of transforming a source image to match the characteristics of a target image or a target image domain.
+Any image manipulation and enhancement is possible with image to image models.
+
+Use cases heavily depend on the model and the dataset it was trained on, but some common use cases include:
+- Style transfer
+- Image colorization
+- Image super-resolution
+- Image inpainting
+
+<Tip>
+
+For more details about the `image-to-image` task, check out its [dedicated page](https://huggingface.co/tasks/image-to-image)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+- [fal/AuraSR-v2](https://huggingface.co/fal/AuraSR-v2): An image-to-image model to improve image resolution.
+- [keras-io/super-resolution](https://huggingface.co/keras-io/super-resolution): A model that increases the resolution of an image.
+- [lambdalabs/sd-image-variations-diffusers](https://huggingface.co/lambdalabs/sd-image-variations-diffusers): A model that creates a set of variations of the input image in the style of DALL-E using Stable Diffusion.
+- [mfidabel/controlnet-segment-anything](https://huggingface.co/mfidabel/controlnet-segment-anything): A model that generates images based on segments in the input image and the text prompt.
+- [timbrooks/instruct-pix2pix](https://huggingface.co/timbrooks/instruct-pix2pix): A model that takes an image and an instruction to edit the image.
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-to-image&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs** | _object, required_ | The input image data |
+| **parameters** | _object, optional_ | Additional inference parameters for Image To Image |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;guidance_scale** | _number, optional_ | For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;negative_prompt** | _array, optional_ | One or several prompt to guide what NOT to include in image generation. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_inference_steps** | _integer, optional_ | For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;target_size** | _object, optional_ | The size in pixel of the output image |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;width** | _integer, required_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;height** | _integer, required_ |  |
+
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string, optional_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, optional, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, optional, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+
+#### Response
+
+| Body |  |
+| :--- | :--- |
+| **image** | The output image |
+
+
+### Using the API
+
+
+No snippet available for this task.
+
+
diff --git a/docs/api-inference/tasks/text_to_image.md b/docs/api-inference/tasks/text_to_image.md
new file mode 100644
index 000000000..810c8f68e
--- /dev/null
+++ b/docs/api-inference/tasks/text_to_image.md
@@ -0,0 +1,116 @@
+## Text-to-image
+
+Generate an image based on a given text prompt.
+
+<Tip>
+
+For more details about the `text-to-image` task, check out its [dedicated page](https://huggingface.co/tasks/text-to-image)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+- [black-forest-labs/FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev): One of the most powerful image generation models that can generate realistic outputs.
+- [latent-consistency/lcm-lora-sdxl](https://huggingface.co/latent-consistency/lcm-lora-sdxl): A powerful yet fast image generation model.
+- [Kwai-Kolors/Kolors](https://huggingface.co/Kwai-Kolors/Kolors): Text-to-image model for photorealistic generation.
+- [stabilityai/stable-diffusion-3-medium-diffusers](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers): A powerful text-to-image model.
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-to-image&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs** | _string, required_ | The input text data (sometimes called "prompt" |
+| **parameters** | _object, optional_ | Additional inference parameters for Text To Image |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;guidance_scale** | _number, optional_ | For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;negative_prompt** | _array, optional_ | One or several prompt to guide what NOT to include in image generation. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_inference_steps** | _integer, optional_ | For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;target_size** | _object, optional_ | The size in pixel of the output image |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;width** | _integer, required_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;height** | _integer, required_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;scheduler** | _string, optional_ | For diffusion models. Override the scheduler with a compatible one |
+
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string, optional_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, optional, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, optional, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+
+#### Response
+
+| Body |  |
+| :--- | :--- |
+| **image** | The generated image |
+
+
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev \
+	-X POST \
+	-d '{"inputs": "Astronaut riding a horse"}' \
+	-H 'Content-Type: application/json' \
+	-H "Authorization: Bearer hf_***"
+
+```
+</curl>
+
+<python>
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.content
+image_bytes = query({
+	"inputs": "Astronaut riding a horse",
+})
+# You can access the image with PIL.Image for example
+import io
+from PIL import Image
+image = Image.open(io.BytesIO(image_bytes))
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_to-image).
+</python>
+
+<js>
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev",
+		{
+			headers: {
+				Authorization: "Bearer hf_***"
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.blob();
+	return result;
+}
+query({"inputs": "Astronaut riding a horse"}).then((response) => {
+	// Use image
+});
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#textto-image).
+</js>
+
+</inferencesnippet>
+
+
diff --git a/scripts/api-inference/.gitignore b/scripts/api-inference/.gitignore
new file mode 100644
index 000000000..53c37a166
--- /dev/null
+++ b/scripts/api-inference/.gitignore
@@ -0,0 +1 @@
+dist
\ No newline at end of file
diff --git a/scripts/api-inference/.prettierignore b/scripts/api-inference/.prettierignore
new file mode 100644
index 000000000..d4b43ae6c
--- /dev/null
+++ b/scripts/api-inference/.prettierignore
@@ -0,0 +1,5 @@
+pnpm-lock.yaml
+# In order to avoid code samples to have tabs, they don't display well on npm
+README.md
+dist
+*.handlebars
\ No newline at end of file
diff --git a/scripts/api-inference/README.md b/scripts/api-inference/README.md
new file mode 100644
index 000000000..67d9c79e6
--- /dev/null
+++ b/scripts/api-inference/README.md
@@ -0,0 +1,11 @@
+Install dependencies.
+
+```sh
+pnpm install
+```
+
+Generate documentation.
+
+```sh
+pnpm run generate
+```
\ No newline at end of file
diff --git a/scripts/api-inference/package.json b/scripts/api-inference/package.json
new file mode 100644
index 000000000..13f84e881
--- /dev/null
+++ b/scripts/api-inference/package.json
@@ -0,0 +1,26 @@
+{
+  "name": "api-inference-generator",
+  "version": "1.0.0",
+  "description": "",
+  "main": "index.js",
+  "type": "module",
+  "scripts": {
+    "format": "prettier --write .",
+    "format:check": "prettier --check .",
+    "generate": "tsx scripts/generate.ts"
+  },
+  "keywords": [],
+  "author": "",
+  "license": "ISC",
+  "dependencies": {
+    "@huggingface/tasks": "^0.11.11",
+    "@types/node": "^22.5.0",
+    "handlebars": "^4.7.8",
+    "node": "^20.17.0",
+    "prettier": "^3.3.3",
+    "ts-node": "^10.9.2",
+    "tsx": "^4.17.0",
+    "type-fest": "^4.25.0",
+    "typescript": "^5.5.4"
+  }
+}
diff --git a/scripts/api-inference/pnpm-lock.yaml b/scripts/api-inference/pnpm-lock.yaml
new file mode 100644
index 000000000..58267667d
--- /dev/null
+++ b/scripts/api-inference/pnpm-lock.yaml
@@ -0,0 +1,541 @@
+lockfileVersion: '9.0'
+
+settings:
+  autoInstallPeers: true
+  excludeLinksFromLockfile: false
+
+importers:
+
+  .:
+    dependencies:
+      '@huggingface/tasks':
+        specifier: ^0.11.11
+        version: 0.11.11
+      '@types/node':
+        specifier: ^22.5.0
+        version: 22.5.0
+      handlebars:
+        specifier: ^4.7.8
+        version: 4.7.8
+      node:
+        specifier: ^20.17.0
+        version: 20.17.0
+      prettier:
+        specifier: ^3.3.3
+        version: 3.3.3
+      ts-node:
+        specifier: ^10.9.2
+        version: 10.9.2(@types/node@22.5.0)(typescript@5.5.4)
+      tsx:
+        specifier: ^4.17.0
+        version: 4.17.0
+      type-fest:
+        specifier: ^4.25.0
+        version: 4.25.0
+      typescript:
+        specifier: ^5.5.4
+        version: 5.5.4
+
+packages:
+
+  '@cspotcode/source-map-support@0.8.1':
+    resolution: {integrity: sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==}
+    engines: {node: '>=12'}
+
+  '@esbuild/aix-ppc64@0.23.1':
+    resolution: {integrity: sha512-6VhYk1diRqrhBAqpJEdjASR/+WVRtfjpqKuNw11cLiaWpAT/Uu+nokB+UJnevzy/P9C/ty6AOe0dwueMrGh/iQ==}
+    engines: {node: '>=18'}
+    cpu: [ppc64]
+    os: [aix]
+
+  '@esbuild/android-arm64@0.23.1':
+    resolution: {integrity: sha512-xw50ipykXcLstLeWH7WRdQuysJqejuAGPd30vd1i5zSyKK3WE+ijzHmLKxdiCMtH1pHz78rOg0BKSYOSB/2Khw==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [android]
+
+  '@esbuild/android-arm@0.23.1':
+    resolution: {integrity: sha512-uz6/tEy2IFm9RYOyvKl88zdzZfwEfKZmnX9Cj1BHjeSGNuGLuMD1kR8y5bteYmwqKm1tj8m4cb/aKEorr6fHWQ==}
+    engines: {node: '>=18'}
+    cpu: [arm]
+    os: [android]
+
+  '@esbuild/android-x64@0.23.1':
+    resolution: {integrity: sha512-nlN9B69St9BwUoB+jkyU090bru8L0NA3yFvAd7k8dNsVH8bi9a8cUAUSEcEEgTp2z3dbEDGJGfP6VUnkQnlReg==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [android]
+
+  '@esbuild/darwin-arm64@0.23.1':
+    resolution: {integrity: sha512-YsS2e3Wtgnw7Wq53XXBLcV6JhRsEq8hkfg91ESVadIrzr9wO6jJDMZnCQbHm1Guc5t/CdDiFSSfWP58FNuvT3Q==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [darwin]
+
+  '@esbuild/darwin-x64@0.23.1':
+    resolution: {integrity: sha512-aClqdgTDVPSEGgoCS8QDG37Gu8yc9lTHNAQlsztQ6ENetKEO//b8y31MMu2ZaPbn4kVsIABzVLXYLhCGekGDqw==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [darwin]
+
+  '@esbuild/freebsd-arm64@0.23.1':
+    resolution: {integrity: sha512-h1k6yS8/pN/NHlMl5+v4XPfikhJulk4G+tKGFIOwURBSFzE8bixw1ebjluLOjfwtLqY0kewfjLSrO6tN2MgIhA==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [freebsd]
+
+  '@esbuild/freebsd-x64@0.23.1':
+    resolution: {integrity: sha512-lK1eJeyk1ZX8UklqFd/3A60UuZ/6UVfGT2LuGo3Wp4/z7eRTRYY+0xOu2kpClP+vMTi9wKOfXi2vjUpO1Ro76g==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [freebsd]
+
+  '@esbuild/linux-arm64@0.23.1':
+    resolution: {integrity: sha512-/93bf2yxencYDnItMYV/v116zff6UyTjo4EtEQjUBeGiVpMmffDNUyD9UN2zV+V3LRV3/on4xdZ26NKzn6754g==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [linux]
+
+  '@esbuild/linux-arm@0.23.1':
+    resolution: {integrity: sha512-CXXkzgn+dXAPs3WBwE+Kvnrf4WECwBdfjfeYHpMeVxWE0EceB6vhWGShs6wi0IYEqMSIzdOF1XjQ/Mkm5d7ZdQ==}
+    engines: {node: '>=18'}
+    cpu: [arm]
+    os: [linux]
+
+  '@esbuild/linux-ia32@0.23.1':
+    resolution: {integrity: sha512-VTN4EuOHwXEkXzX5nTvVY4s7E/Krz7COC8xkftbbKRYAl96vPiUssGkeMELQMOnLOJ8k3BY1+ZY52tttZnHcXQ==}
+    engines: {node: '>=18'}
+    cpu: [ia32]
+    os: [linux]
+
+  '@esbuild/linux-loong64@0.23.1':
+    resolution: {integrity: sha512-Vx09LzEoBa5zDnieH8LSMRToj7ir/Jeq0Gu6qJ/1GcBq9GkfoEAoXvLiW1U9J1qE/Y/Oyaq33w5p2ZWrNNHNEw==}
+    engines: {node: '>=18'}
+    cpu: [loong64]
+    os: [linux]
+
+  '@esbuild/linux-mips64el@0.23.1':
+    resolution: {integrity: sha512-nrFzzMQ7W4WRLNUOU5dlWAqa6yVeI0P78WKGUo7lg2HShq/yx+UYkeNSE0SSfSure0SqgnsxPvmAUu/vu0E+3Q==}
+    engines: {node: '>=18'}
+    cpu: [mips64el]
+    os: [linux]
+
+  '@esbuild/linux-ppc64@0.23.1':
+    resolution: {integrity: sha512-dKN8fgVqd0vUIjxuJI6P/9SSSe/mB9rvA98CSH2sJnlZ/OCZWO1DJvxj8jvKTfYUdGfcq2dDxoKaC6bHuTlgcw==}
+    engines: {node: '>=18'}
+    cpu: [ppc64]
+    os: [linux]
+
+  '@esbuild/linux-riscv64@0.23.1':
+    resolution: {integrity: sha512-5AV4Pzp80fhHL83JM6LoA6pTQVWgB1HovMBsLQ9OZWLDqVY8MVobBXNSmAJi//Csh6tcY7e7Lny2Hg1tElMjIA==}
+    engines: {node: '>=18'}
+    cpu: [riscv64]
+    os: [linux]
+
+  '@esbuild/linux-s390x@0.23.1':
+    resolution: {integrity: sha512-9ygs73tuFCe6f6m/Tb+9LtYxWR4c9yg7zjt2cYkjDbDpV/xVn+68cQxMXCjUpYwEkze2RcU/rMnfIXNRFmSoDw==}
+    engines: {node: '>=18'}
+    cpu: [s390x]
+    os: [linux]
+
+  '@esbuild/linux-x64@0.23.1':
+    resolution: {integrity: sha512-EV6+ovTsEXCPAp58g2dD68LxoP/wK5pRvgy0J/HxPGB009omFPv3Yet0HiaqvrIrgPTBuC6wCH1LTOY91EO5hQ==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [linux]
+
+  '@esbuild/netbsd-x64@0.23.1':
+    resolution: {integrity: sha512-aevEkCNu7KlPRpYLjwmdcuNz6bDFiE7Z8XC4CPqExjTvrHugh28QzUXVOZtiYghciKUacNktqxdpymplil1beA==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [netbsd]
+
+  '@esbuild/openbsd-arm64@0.23.1':
+    resolution: {integrity: sha512-3x37szhLexNA4bXhLrCC/LImN/YtWis6WXr1VESlfVtVeoFJBRINPJ3f0a/6LV8zpikqoUg4hyXw0sFBt5Cr+Q==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [openbsd]
+
+  '@esbuild/openbsd-x64@0.23.1':
+    resolution: {integrity: sha512-aY2gMmKmPhxfU+0EdnN+XNtGbjfQgwZj43k8G3fyrDM/UdZww6xrWxmDkuz2eCZchqVeABjV5BpildOrUbBTqA==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [openbsd]
+
+  '@esbuild/sunos-x64@0.23.1':
+    resolution: {integrity: sha512-RBRT2gqEl0IKQABT4XTj78tpk9v7ehp+mazn2HbUeZl1YMdaGAQqhapjGTCe7uw7y0frDi4gS0uHzhvpFuI1sA==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [sunos]
+
+  '@esbuild/win32-arm64@0.23.1':
+    resolution: {integrity: sha512-4O+gPR5rEBe2FpKOVyiJ7wNDPA8nGzDuJ6gN4okSA1gEOYZ67N8JPk58tkWtdtPeLz7lBnY6I5L3jdsr3S+A6A==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [win32]
+
+  '@esbuild/win32-ia32@0.23.1':
+    resolution: {integrity: sha512-BcaL0Vn6QwCwre3Y717nVHZbAa4UBEigzFm6VdsVdT/MbZ38xoj1X9HPkZhbmaBGUD1W8vxAfffbDe8bA6AKnQ==}
+    engines: {node: '>=18'}
+    cpu: [ia32]
+    os: [win32]
+
+  '@esbuild/win32-x64@0.23.1':
+    resolution: {integrity: sha512-BHpFFeslkWrXWyUPnbKm+xYYVYruCinGcftSBaa8zoF9hZO4BcSCFUvHVTtzpIY6YzUnYtuEhZ+C9iEXjxnasg==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [win32]
+
+  '@huggingface/tasks@0.11.11':
+    resolution: {integrity: sha512-YRleUv67oSqDOkcYm4pFdBeaw8I8Dh6/DYlXo02fxXj5iC/WiDi8PE1wBhAhTdASwkl/n1V4xbL69uKXwDNDGw==}
+
+  '@jridgewell/resolve-uri@3.1.2':
+    resolution: {integrity: sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==}
+    engines: {node: '>=6.0.0'}
+
+  '@jridgewell/sourcemap-codec@1.5.0':
+    resolution: {integrity: sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==}
+
+  '@jridgewell/trace-mapping@0.3.9':
+    resolution: {integrity: sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==}
+
+  '@tsconfig/node10@1.0.11':
+    resolution: {integrity: sha512-DcRjDCujK/kCk/cUe8Xz8ZSpm8mS3mNNpta+jGCA6USEDfktlNvm1+IuZ9eTcDbNk41BHwpHHeW+N1lKCz4zOw==}
+
+  '@tsconfig/node12@1.0.11':
+    resolution: {integrity: sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==}
+
+  '@tsconfig/node14@1.0.3':
+    resolution: {integrity: sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==}
+
+  '@tsconfig/node16@1.0.4':
+    resolution: {integrity: sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==}
+
+  '@types/node@22.5.0':
+    resolution: {integrity: sha512-DkFrJOe+rfdHTqqMg0bSNlGlQ85hSoh2TPzZyhHsXnMtligRWpxUySiyw8FY14ITt24HVCiQPWxS3KO/QlGmWg==}
+
+  acorn-walk@8.3.3:
+    resolution: {integrity: sha512-MxXdReSRhGO7VlFe1bRG/oI7/mdLV9B9JJT0N8vZOhF7gFRR5l3M8W9G8JxmKV+JC5mGqJ0QvqfSOLsCPa4nUw==}
+    engines: {node: '>=0.4.0'}
+
+  acorn@8.12.1:
+    resolution: {integrity: sha512-tcpGyI9zbizT9JbV6oYE477V6mTlXvvi0T0G3SNIYE2apm/G5huBa1+K89VGeovbg+jycCrfhl3ADxErOuO6Jg==}
+    engines: {node: '>=0.4.0'}
+    hasBin: true
+
+  arg@4.1.3:
+    resolution: {integrity: sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==}
+
+  create-require@1.1.1:
+    resolution: {integrity: sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==}
+
+  diff@4.0.2:
+    resolution: {integrity: sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==}
+    engines: {node: '>=0.3.1'}
+
+  esbuild@0.23.1:
+    resolution: {integrity: sha512-VVNz/9Sa0bs5SELtn3f7qhJCDPCF5oMEl5cO9/SSinpE9hbPVvxbd572HH5AKiP7WD8INO53GgfDDhRjkylHEg==}
+    engines: {node: '>=18'}
+    hasBin: true
+
+  fsevents@2.3.3:
+    resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==}
+    engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
+    os: [darwin]
+
+  get-tsconfig@4.7.6:
+    resolution: {integrity: sha512-ZAqrLlu18NbDdRaHq+AKXzAmqIUPswPWKUchfytdAjiRFnCe5ojG2bstg6mRiZabkKfCoL/e98pbBELIV/YCeA==}
+
+  handlebars@4.7.8:
+    resolution: {integrity: sha512-vafaFqs8MZkRrSX7sFVUdo3ap/eNiLnb4IakshzvP56X5Nr1iGKAIqdX6tMlm6HcNRIkr6AxO5jFEoJzzpT8aQ==}
+    engines: {node: '>=0.4.7'}
+    hasBin: true
+
+  make-error@1.3.6:
+    resolution: {integrity: sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==}
+
+  minimist@1.2.8:
+    resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==}
+
+  neo-async@2.6.2:
+    resolution: {integrity: sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==}
+
+  node-bin-setup@1.1.3:
+    resolution: {integrity: sha512-opgw9iSCAzT2+6wJOETCpeRYAQxSopqQ2z+N6BXwIMsQQ7Zj5M8MaafQY8JMlolRR6R1UXg2WmhKp0p9lSOivg==}
+
+  node@20.17.0:
+    resolution: {integrity: sha512-zjgqs6fjta3bWGrwCmtT42gIkupAmvdq5QerbnCgNiQHE+3HrYSXuNrTw5sxQAHG2sZGgMVCxsXQ5OXLV+dkjw==}
+    engines: {npm: '>=5.0.0'}
+    hasBin: true
+
+  prettier@3.3.3:
+    resolution: {integrity: sha512-i2tDNA0O5IrMO757lfrdQZCc2jPNDVntV0m/+4whiDfWaTKfMNgR7Qz0NAeGz/nRqF4m5/6CLzbP4/liHt12Ew==}
+    engines: {node: '>=14'}
+    hasBin: true
+
+  resolve-pkg-maps@1.0.0:
+    resolution: {integrity: sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==}
+
+  source-map@0.6.1:
+    resolution: {integrity: sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==}
+    engines: {node: '>=0.10.0'}
+
+  ts-node@10.9.2:
+    resolution: {integrity: sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==}
+    hasBin: true
+    peerDependencies:
+      '@swc/core': '>=1.2.50'
+      '@swc/wasm': '>=1.2.50'
+      '@types/node': '*'
+      typescript: '>=2.7'
+    peerDependenciesMeta:
+      '@swc/core':
+        optional: true
+      '@swc/wasm':
+        optional: true
+
+  tsx@4.17.0:
+    resolution: {integrity: sha512-eN4mnDA5UMKDt4YZixo9tBioibaMBpoxBkD+rIPAjVmYERSG0/dWEY1CEFuV89CgASlKL499q8AhmkMnnjtOJg==}
+    engines: {node: '>=18.0.0'}
+    hasBin: true
+
+  type-fest@4.25.0:
+    resolution: {integrity: sha512-bRkIGlXsnGBRBQRAY56UXBm//9qH4bmJfFvq83gSz41N282df+fjy8ofcEgc1sM8geNt5cl6mC2g9Fht1cs8Aw==}
+    engines: {node: '>=16'}
+
+  typescript@5.5.4:
+    resolution: {integrity: sha512-Mtq29sKDAEYP7aljRgtPOpTvOfbwRWlS6dPRzwjdE+C0R4brX/GUyhHSecbHMFLNBLcJIPt9nl9yG5TZ1weH+Q==}
+    engines: {node: '>=14.17'}
+    hasBin: true
+
+  uglify-js@3.19.2:
+    resolution: {integrity: sha512-S8KA6DDI47nQXJSi2ctQ629YzwOVs+bQML6DAtvy0wgNdpi+0ySpQK0g2pxBq2xfF2z3YCscu7NNA8nXT9PlIQ==}
+    engines: {node: '>=0.8.0'}
+    hasBin: true
+
+  undici-types@6.19.8:
+    resolution: {integrity: sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==}
+
+  v8-compile-cache-lib@3.0.1:
+    resolution: {integrity: sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==}
+
+  wordwrap@1.0.0:
+    resolution: {integrity: sha512-gvVzJFlPycKc5dZN4yPkP8w7Dc37BtP1yczEneOb4uq34pXZcvrtRTmWV8W+Ume+XCxKgbjM+nevkyFPMybd4Q==}
+
+  yn@3.1.1:
+    resolution: {integrity: sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==}
+    engines: {node: '>=6'}
+
+snapshots:
+
+  '@cspotcode/source-map-support@0.8.1':
+    dependencies:
+      '@jridgewell/trace-mapping': 0.3.9
+
+  '@esbuild/aix-ppc64@0.23.1':
+    optional: true
+
+  '@esbuild/android-arm64@0.23.1':
+    optional: true
+
+  '@esbuild/android-arm@0.23.1':
+    optional: true
+
+  '@esbuild/android-x64@0.23.1':
+    optional: true
+
+  '@esbuild/darwin-arm64@0.23.1':
+    optional: true
+
+  '@esbuild/darwin-x64@0.23.1':
+    optional: true
+
+  '@esbuild/freebsd-arm64@0.23.1':
+    optional: true
+
+  '@esbuild/freebsd-x64@0.23.1':
+    optional: true
+
+  '@esbuild/linux-arm64@0.23.1':
+    optional: true
+
+  '@esbuild/linux-arm@0.23.1':
+    optional: true
+
+  '@esbuild/linux-ia32@0.23.1':
+    optional: true
+
+  '@esbuild/linux-loong64@0.23.1':
+    optional: true
+
+  '@esbuild/linux-mips64el@0.23.1':
+    optional: true
+
+  '@esbuild/linux-ppc64@0.23.1':
+    optional: true
+
+  '@esbuild/linux-riscv64@0.23.1':
+    optional: true
+
+  '@esbuild/linux-s390x@0.23.1':
+    optional: true
+
+  '@esbuild/linux-x64@0.23.1':
+    optional: true
+
+  '@esbuild/netbsd-x64@0.23.1':
+    optional: true
+
+  '@esbuild/openbsd-arm64@0.23.1':
+    optional: true
+
+  '@esbuild/openbsd-x64@0.23.1':
+    optional: true
+
+  '@esbuild/sunos-x64@0.23.1':
+    optional: true
+
+  '@esbuild/win32-arm64@0.23.1':
+    optional: true
+
+  '@esbuild/win32-ia32@0.23.1':
+    optional: true
+
+  '@esbuild/win32-x64@0.23.1':
+    optional: true
+
+  '@huggingface/tasks@0.11.11': {}
+
+  '@jridgewell/resolve-uri@3.1.2': {}
+
+  '@jridgewell/sourcemap-codec@1.5.0': {}
+
+  '@jridgewell/trace-mapping@0.3.9':
+    dependencies:
+      '@jridgewell/resolve-uri': 3.1.2
+      '@jridgewell/sourcemap-codec': 1.5.0
+
+  '@tsconfig/node10@1.0.11': {}
+
+  '@tsconfig/node12@1.0.11': {}
+
+  '@tsconfig/node14@1.0.3': {}
+
+  '@tsconfig/node16@1.0.4': {}
+
+  '@types/node@22.5.0':
+    dependencies:
+      undici-types: 6.19.8
+
+  acorn-walk@8.3.3:
+    dependencies:
+      acorn: 8.12.1
+
+  acorn@8.12.1: {}
+
+  arg@4.1.3: {}
+
+  create-require@1.1.1: {}
+
+  diff@4.0.2: {}
+
+  esbuild@0.23.1:
+    optionalDependencies:
+      '@esbuild/aix-ppc64': 0.23.1
+      '@esbuild/android-arm': 0.23.1
+      '@esbuild/android-arm64': 0.23.1
+      '@esbuild/android-x64': 0.23.1
+      '@esbuild/darwin-arm64': 0.23.1
+      '@esbuild/darwin-x64': 0.23.1
+      '@esbuild/freebsd-arm64': 0.23.1
+      '@esbuild/freebsd-x64': 0.23.1
+      '@esbuild/linux-arm': 0.23.1
+      '@esbuild/linux-arm64': 0.23.1
+      '@esbuild/linux-ia32': 0.23.1
+      '@esbuild/linux-loong64': 0.23.1
+      '@esbuild/linux-mips64el': 0.23.1
+      '@esbuild/linux-ppc64': 0.23.1
+      '@esbuild/linux-riscv64': 0.23.1
+      '@esbuild/linux-s390x': 0.23.1
+      '@esbuild/linux-x64': 0.23.1
+      '@esbuild/netbsd-x64': 0.23.1
+      '@esbuild/openbsd-arm64': 0.23.1
+      '@esbuild/openbsd-x64': 0.23.1
+      '@esbuild/sunos-x64': 0.23.1
+      '@esbuild/win32-arm64': 0.23.1
+      '@esbuild/win32-ia32': 0.23.1
+      '@esbuild/win32-x64': 0.23.1
+
+  fsevents@2.3.3:
+    optional: true
+
+  get-tsconfig@4.7.6:
+    dependencies:
+      resolve-pkg-maps: 1.0.0
+
+  handlebars@4.7.8:
+    dependencies:
+      minimist: 1.2.8
+      neo-async: 2.6.2
+      source-map: 0.6.1
+      wordwrap: 1.0.0
+    optionalDependencies:
+      uglify-js: 3.19.2
+
+  make-error@1.3.6: {}
+
+  minimist@1.2.8: {}
+
+  neo-async@2.6.2: {}
+
+  node-bin-setup@1.1.3: {}
+
+  node@20.17.0:
+    dependencies:
+      node-bin-setup: 1.1.3
+
+  prettier@3.3.3: {}
+
+  resolve-pkg-maps@1.0.0: {}
+
+  source-map@0.6.1: {}
+
+  ts-node@10.9.2(@types/node@22.5.0)(typescript@5.5.4):
+    dependencies:
+      '@cspotcode/source-map-support': 0.8.1
+      '@tsconfig/node10': 1.0.11
+      '@tsconfig/node12': 1.0.11
+      '@tsconfig/node14': 1.0.3
+      '@tsconfig/node16': 1.0.4
+      '@types/node': 22.5.0
+      acorn: 8.12.1
+      acorn-walk: 8.3.3
+      arg: 4.1.3
+      create-require: 1.1.1
+      diff: 4.0.2
+      make-error: 1.3.6
+      typescript: 5.5.4
+      v8-compile-cache-lib: 3.0.1
+      yn: 3.1.1
+
+  tsx@4.17.0:
+    dependencies:
+      esbuild: 0.23.1
+      get-tsconfig: 4.7.6
+    optionalDependencies:
+      fsevents: 2.3.3
+
+  type-fest@4.25.0: {}
+
+  typescript@5.5.4: {}
+
+  uglify-js@3.19.2:
+    optional: true
+
+  undici-types@6.19.8: {}
+
+  v8-compile-cache-lib@3.0.1: {}
+
+  wordwrap@1.0.0: {}
+
+  yn@3.1.1: {}
diff --git a/scripts/api-inference/scripts/.gitignore b/scripts/api-inference/scripts/.gitignore
new file mode 100644
index 000000000..4c43fe68f
--- /dev/null
+++ b/scripts/api-inference/scripts/.gitignore
@@ -0,0 +1 @@
+*.js
\ No newline at end of file
diff --git a/scripts/api-inference/scripts/generate.ts b/scripts/api-inference/scripts/generate.ts
new file mode 100644
index 000000000..73144662a
--- /dev/null
+++ b/scripts/api-inference/scripts/generate.ts
@@ -0,0 +1,321 @@
+import { snippets, PipelineType } from "@huggingface/tasks";
+import Handlebars from "handlebars";
+import * as fs from "node:fs/promises";
+import * as path from "node:path/posix";
+import type { JsonObject } from "type-fest";
+
+const inferenceSnippetLanguages = ["python", "js", "curl"] as const;
+type InferenceSnippetLanguage = (typeof inferenceSnippetLanguages)[number];
+
+// Taken from https://stackoverflow.com/a/31632215
+Handlebars.registerHelper({
+  eq: (v1, v2) => v1 === v2,
+  ne: (v1, v2) => v1 !== v2,
+  lt: (v1, v2) => v1 < v2,
+  gt: (v1, v2) => v1 > v2,
+  lte: (v1, v2) => v1 <= v2,
+  gte: (v1, v2) => v1 >= v2,
+  and() {
+    return Array.prototype.every.call(arguments, Boolean);
+  },
+  or() {
+    return Array.prototype.slice.call(arguments, 0, -1).some(Boolean);
+  },
+});
+
+console.log("🛠️  Preparing...");
+
+////////////////////////
+//// Filepath utils ////
+////////////////////////
+
+const ROOT_DIR = path
+  .join(path.normalize(import.meta.url), "..", "..")
+  .replace(/^(file:)/, "");
+const TEMPLATE_DIR = path.join(ROOT_DIR, "templates");
+const DOCS_DIR = path.join(ROOT_DIR, "..", "..", "docs");
+const TASKS_DOCS_DIR = path.join(DOCS_DIR, "api-inference", "tasks");
+
+function readTemplate(templateName: string): Promise<string> {
+  const templateNameSnakeCase = templateName.replace(/-/g, "_");
+  const templatePath = path.join(
+    TEMPLATE_DIR,
+    `${templateNameSnakeCase}.handlebars`,
+  );
+  console.log(`   🔍 Reading ${templateNameSnakeCase}.handlebars`);
+  return fs.readFile(templatePath, { encoding: "utf-8" });
+}
+
+function writeTaskDoc(templateName: string, content: string): Promise<void> {
+  const templateNameSnakeCase = templateName.replace(/-/g, "_");
+  const taskDocPath = path.join(TASKS_DOCS_DIR, `${templateNameSnakeCase}.md`);
+  console.log(`   💾 Saving to ${taskDocPath}`);
+  return fs
+    .mkdir(TASKS_DOCS_DIR, { recursive: true })
+    .then(() => fs.writeFile(taskDocPath, content, { encoding: "utf-8" }));
+}
+
+/////////////////////////
+//// Task page utils ////
+/////////////////////////
+
+const TASKS_API_URL = "https://huggingface.co/api/tasks";
+console.log(`   🕸️  Fetching ${TASKS_API_URL}`);
+const response = await fetch(TASKS_API_URL);
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+const TASKS_DATA = (await response.json()) as any;
+
+///////////////////////
+//// Snippet utils ////
+///////////////////////
+
+const GET_SNIPPET_FN = {
+  curl: snippets.curl.getCurlInferenceSnippet,
+  js: snippets.js.getJsInferenceSnippet,
+  python: snippets.python.getPythonInferenceSnippet,
+} as const;
+
+const HAS_SNIPPET_FN = {
+  curl: snippets.curl.hasCurlInferenceSnippet,
+  js: snippets.js.hasJsInferenceSnippet,
+  python: snippets.python.hasPythonInferenceSnippet,
+} as const;
+
+export function getInferenceSnippet(
+  id: string,
+  pipeline_tag: PipelineType,
+  language: InferenceSnippetLanguage,
+): string | undefined {
+  const modelData = {
+    id,
+    pipeline_tag,
+    mask_token: "",
+    library_name: "",
+    config: {},
+  };
+  if (HAS_SNIPPET_FN[language](modelData)) {
+    return GET_SNIPPET_FN[language](modelData, "hf_***");
+  }
+}
+
+/////////////////////
+//// Specs utils ////
+/////////////////////
+
+type SpecNameType = "input" | "output" | "stream_output";
+
+const SPECS_URL_TEMPLATE = Handlebars.compile(
+  `https://raw.githubusercontent.com/huggingface/huggingface.js/main/packages/tasks/src/tasks/{{task}}/spec/{{name}}.json`,
+);
+
+async function fetchOneSpec(
+  task: PipelineType,
+  name: SpecNameType,
+): Promise<JsonObject | undefined> {
+  const url = SPECS_URL_TEMPLATE({ task, name });
+  console.log(`   🕸️  Fetching ${task} ${name} specs`);
+  return fetch(url)
+    .then((res) => res.json())
+    .catch(() => undefined);
+}
+
+async function fetchSpecs(
+  task: PipelineType,
+): Promise<
+  Record<"input" | "output" | "stream_output", JsonObject | undefined>
+> {
+  return {
+    input: await fetchOneSpec(task, "input"),
+    output: await fetchOneSpec(task, "output"),
+    stream_output: await fetchOneSpec(task, "stream_output"),
+  };
+}
+
+function processPayloadSchema(schema: any, prefix: string = ""): JsonObject[] {
+  let rows: JsonObject[] = [];
+
+  Object.entries(schema.properties || {}).forEach(
+    ([key, value]: [string, any]) => {
+      const isRequired = schema.required?.includes(key);
+      let type = value.type || "object";
+
+      if (value.$ref) {
+        // Handle references
+        const refSchemaKey = value.$ref.split("/").pop();
+        value = schema.$defs?.[refSchemaKey!];
+      }
+
+      const description = value.description || "";
+      const isObject = type === "object" && value.properties;
+      const row = {
+        name: `${prefix}${key}`,
+        type: type,
+        description: description,
+        required: isRequired ? "required" : "optional",
+      };
+      rows.push(row);
+
+      if (isObject) {
+        // Recursively process nested objects
+        rows = rows.concat(
+          processPayloadSchema(
+            value,
+            prefix + "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;",
+          ),
+        );
+      }
+    },
+  );
+
+  return rows;
+}
+
+//////////////////////////
+//// Inline templates ////
+//////////////////////////
+
+const TIP_LINK_TO_TASK_PAGE_TEMPLATE = Handlebars.compile(`<Tip>
+
+For more details about the \`{{task}}\` task, check out its [dedicated page](https://huggingface.co/tasks/{{task}})! You will find examples and related materials.
+
+</Tip>`);
+
+const TIP_LIST_MODELS_LINK_TEMPLATE = Handlebars.compile(
+  `This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag={{task}}&sort=trending).`,
+);
+
+const SPECS_HEADERS = await readTemplate("specs-headers");
+const SNIPPETS_TEMPLATE = Handlebars.compile(
+  await readTemplate("snippets-template"),
+);
+const SPECS_PAYLOAD_TEMPLATE = Handlebars.compile(
+  await readTemplate("specs-payload"),
+);
+const SPECS_OUTPUT_TEMPLATE = Handlebars.compile(
+  await readTemplate("specs-output"),
+);
+
+////////////////////
+//// Data utils ////
+////////////////////
+
+const TASKS: PipelineType[] = ["image-to-image", "text-to-image"];
+
+const DATA: {
+  constants: {
+    specsHeaders: string;
+  };
+  models: Record<string, { id: string; description: string }[]>;
+  snippets: Record<string, string>;
+  specs: Record<
+    string,
+    {
+      input: string | undefined;
+      output: string | undefined;
+      stream_output: string | undefined;
+    }
+  >;
+  tips: {
+    linksToTaskPage: Record<string, string>;
+    listModelsLink: Record<string, string>;
+  };
+} = {
+  constants: {
+    specsHeaders: SPECS_HEADERS,
+  },
+  models: {},
+  snippets: {},
+  specs: {},
+  tips: { linksToTaskPage: {}, listModelsLink: {} },
+};
+
+// Check for each model if inference status is "warm"
+await Promise.all(
+  TASKS.map(async (task) => {
+    await Promise.all(
+      TASKS_DATA[task].models.map(
+        async (model: {
+          id: string;
+          description: string;
+          inference: string | undefined;
+        }) => {
+          console.log(`   ⚡ Checking inference status ${model.id}`);
+          const modelData = await fetch(
+            `https://huggingface.co/api/models/${model.id}?expand[]=inference`,
+          ).then((res) => res.json());
+          model.inference = modelData.inference;
+        },
+      ),
+    );
+  }),
+);
+
+// Fetch recommended models
+TASKS.forEach((task) => {
+  DATA.models[task] = TASKS_DATA[task].models;
+});
+
+// Fetch snippets
+// TODO: render snippets only if they are available
+TASKS.forEach((task) => {
+  const mainModel = TASKS_DATA[task].models[0].id;
+  const taskSnippets = {
+    curl: getInferenceSnippet(mainModel, task, "curl"),
+    python: getInferenceSnippet(mainModel, task, "python"),
+    javascript: getInferenceSnippet(mainModel, task, "js"),
+  };
+  DATA.snippets[task] = SNIPPETS_TEMPLATE({
+    taskSnippets,
+    taskSnakeCase: task.replace("-", "_"),
+    taskAttached: task.replace("-", ""),
+  });
+});
+
+// Render specs
+await Promise.all(
+  TASKS.map(async (task) => {
+    const specs = await fetchSpecs(task);
+    DATA.specs[task] = {
+      input: specs.input
+        ? SPECS_PAYLOAD_TEMPLATE({ schema: processPayloadSchema(specs.input) })
+        : undefined,
+      output: specs.output
+        ? SPECS_OUTPUT_TEMPLATE({ schema: processPayloadSchema(specs.output) })
+        : undefined,
+      stream_output: specs.stream_output
+        ? SPECS_OUTPUT_TEMPLATE({
+            schema: processPayloadSchema(specs.stream_output),
+          })
+        : undefined,
+    };
+  }),
+);
+
+// Render tips
+TASKS.forEach((task) => {
+  DATA.tips.linksToTaskPage[task] = TIP_LINK_TO_TASK_PAGE_TEMPLATE({ task });
+  DATA.tips.listModelsLink[task] = TIP_LIST_MODELS_LINK_TEMPLATE({ task });
+});
+
+/////////////////////////
+//// Rendering utils ////
+/////////////////////////
+
+async function renderTemplate(
+  templateName: string,
+  data: JsonObject,
+): Promise<string> {
+  console.log(`🎨  Rendering ${templateName}`);
+  const template = Handlebars.compile(await readTemplate(templateName));
+  return template(data);
+}
+
+await Promise.all(
+  TASKS.map(async (task) => {
+    // @ts-ignore
+    const rendered = await renderTemplate(task, DATA);
+    await writeTaskDoc(task, rendered);
+  }),
+);
+
+console.log("✅ All done!");
diff --git a/scripts/api-inference/templates/image_to_image.handlebars b/scripts/api-inference/templates/image_to_image.handlebars
new file mode 100644
index 000000000..b432eab19
--- /dev/null
+++ b/scripts/api-inference/templates/image_to_image.handlebars
@@ -0,0 +1,36 @@
+## Image-to-image
+
+Image-to-image is the task of transforming a source image to match the characteristics of a target image or a target image domain.
+Any image manipulation and enhancement is possible with image to image models.
+
+Use cases heavily depend on the model and the dataset it was trained on, but some common use cases include:
+- Style transfer
+- Image colorization
+- Image super-resolution
+- Image inpainting
+
+{{{tips.linksToTaskPage.image-to-image}}}
+
+### Recommended models
+
+{{#each models.image-to-image}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.image-to-image}}}
+
+### API specification
+
+#### Request
+
+{{{specs.image-to-image.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+{{{specs.image-to-image.output}}}
+
+### Using the API
+
+{{{snippets.image-to-image}}}
diff --git a/scripts/api-inference/templates/snippets_template.handlebars b/scripts/api-inference/templates/snippets_template.handlebars
new file mode 100644
index 000000000..2d0f099e2
--- /dev/null
+++ b/scripts/api-inference/templates/snippets_template.handlebars
@@ -0,0 +1,42 @@
+{{#if (or taskSnippets.curl taskSnippets.python taskSnippets.javascript)}}
+
+<inferencesnippet>
+
+{{!-- cURL snippet (if exists) --}}
+{{#if taskSnippets.curl}}
+<curl>
+```bash
+{{{taskSnippets.curl}}}
+```
+</curl>
+{{/if}}
+
+{{!-- Python snippet (if exists) --}}
+{{#if taskSnippets.python}}
+<python>
+```py
+{{{taskSnippets.python}}}
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.{{taskSnakeCase}}).
+</python>
+{{/if}}
+
+{{!-- JavaScript snippet (if exists) --}}
+{{#if taskSnippets.javascript}}
+<js>
+```js
+{{{taskSnippets.javascript}}}
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#{{taskAttached}}).
+</js>
+{{/if}}
+
+</inferencesnippet>
+
+{{else}}
+
+No snippet available for this task.
+
+{{/if}}
\ No newline at end of file
diff --git a/scripts/api-inference/templates/specs_headers.handlebars b/scripts/api-inference/templates/specs_headers.handlebars
new file mode 100644
index 000000000..44b28ecc8
--- /dev/null
+++ b/scripts/api-inference/templates/specs_headers.handlebars
@@ -0,0 +1,5 @@
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string, optional_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, optional, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, optional, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
diff --git a/scripts/api-inference/templates/specs_output.handlebars b/scripts/api-inference/templates/specs_output.handlebars
new file mode 100644
index 000000000..7f3391b98
--- /dev/null
+++ b/scripts/api-inference/templates/specs_output.handlebars
@@ -0,0 +1,5 @@
+| Body |  |
+| :--- | :--- |
+{{#each schema}}
+| **{{{name}}}** | {{{description}}} |
+{{/each}}
\ No newline at end of file
diff --git a/scripts/api-inference/templates/specs_payload.handlebars b/scripts/api-inference/templates/specs_payload.handlebars
new file mode 100644
index 000000000..70460b184
--- /dev/null
+++ b/scripts/api-inference/templates/specs_payload.handlebars
@@ -0,0 +1,5 @@
+| Payload |  |  |
+| :--- | :--- | :--- |
+{{#each schema}}
+| **{{{name}}}** | _{{type}}, {{required}}_ | {{{description}}} |
+{{/each}}
\ No newline at end of file
diff --git a/scripts/api-inference/templates/text_to_image.handlebars b/scripts/api-inference/templates/text_to_image.handlebars
new file mode 100644
index 000000000..6c9c568d1
--- /dev/null
+++ b/scripts/api-inference/templates/text_to_image.handlebars
@@ -0,0 +1,29 @@
+## Text-to-image
+
+Generate an image based on a given text prompt.
+
+{{{tips.linksToTaskPage.text-to-image}}}
+
+### Recommended models
+
+{{#each models.text-to-image}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.text-to-image}}}
+
+### API specification
+
+#### Request
+
+{{{specs.text-to-image.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+{{{specs.text-to-image.output}}}
+
+### Using the API
+
+{{{snippets.text-to-image}}}
diff --git a/scripts/api-inference/tsconfig.json b/scripts/api-inference/tsconfig.json
new file mode 100644
index 000000000..20b47e4ab
--- /dev/null
+++ b/scripts/api-inference/tsconfig.json
@@ -0,0 +1,20 @@
+{
+  "compilerOptions": {
+    "allowSyntheticDefaultImports": true,
+    "lib": ["ES2022", "DOM"],
+    "module": "ESNext",
+    "target": "ESNext",
+    "moduleResolution": "node",
+    "forceConsistentCasingInFileNames": true,
+    "strict": true,
+    "noImplicitAny": true,
+    "strictNullChecks": true,
+    "skipLibCheck": true,
+    "noImplicitOverride": true,
+    "outDir": "./dist",
+    "declaration": true,
+    "declarationMap": true
+  },
+  "include": ["scripts"],
+  "exclude": ["dist"]
+}

From 9b1e7353c3021a9dc194d0e2c4254bb4674bf61f Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Tue, 27 Aug 2024 15:45:31 +0200
Subject: [PATCH 12/38] Add getting started

---
 docs/api-inference/getting_started.md | 76 ++++++++++++++++++++++++++-
 docs/api-inference/parameters.md      |  2 +-
 2 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/docs/api-inference/getting_started.md b/docs/api-inference/getting_started.md
index 6d668cf10..ad21542ec 100644
--- a/docs/api-inference/getting_started.md
+++ b/docs/api-inference/getting_started.md
@@ -1,3 +1,77 @@
 # Getting Started
 
-TODO:
\ No newline at end of file
+The Serverless Inference API allows you to easily do inference on a wide range of models and tasks. You can do requests with your favorite tools (Python, cURL, etc). We also provide a Python SDK (`huggingface_hub`) to make it even easier.
+
+We'll do a minimal example using a [sentiment classification model](https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest). Please visit task-specific parameters and further documentation in our [API Reference](./parameters.md).
+
+## Getting a Token
+
+Using the Serverless Inference API requires passing a user token in the request headers. You can get a token by signing up on the Hugging Face website and then going to the [tokens page](https://huggingface.co/settings/tokens). We recommend creating a `Fine-grained` token with the scope to `Make calls to the serverless Inference API`.
+
+TODO: add screenshot
+
+## cURL
+
+```bash
+curl https://api-inference.huggingface.co/models/cardiffnlp/twitter-roberta-base-sentiment-latest \
+    -X POST \
+    -d '{"inputs": "Today is a nice day"}' \
+    -H "Authorization: Bearer hf_***" \
+    -H "Content-Type: application/json"
+```
+
+## Python
+
+You can use the `requests` library to make a request to the Inference API.
+
+```python
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/cardiffnlp/twitter-roberta-base-sentiment-latest"
+headers = {"Authorization": "Bearer hf_***"}
+
+payload = {"inputs": "Today is a nice day"}
+response = requests.post(API_URL, headers=headers, json=payload)
+response.json()
+```
+
+Hugging Face also provides a [`InferenceClient`](https://huggingface.co/docs/huggingface_hub/guides/inference) that handles inference, caching, async, and more. Make sure to install it with `pip install huggingface_hub` first
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(model="cardiffnlp/twitter-roberta-base-sentiment-latest", token="hf_***")
+client.text_classification("Today is a nice day")
+```
+
+## JavaScript
+
+```js
+import fetch from "node-fetch";
+
+async function query(data) {
+    const response = await fetch(
+        "https://api-inference.huggingface.co/models/MODEL_ID",
+        {
+            method: "POST",
+            headers: {
+                Authorization: `Bearer cardiffnlp/twitter-roberta-base-sentiment-latest`,
+                "Content-Type": "application/json",
+            },
+            body: JSON.stringify(data),
+        }
+    );
+    const result = await response.json();
+    return result;
+}
+
+query({
+    inputs: "Today is a nice day"
+}).then((response) => {
+    console.log(JSON.stringify(response, null, 2));
+});
+```
+
+## Next Steps
+
+Now that you know the basics, you can explore the [API Reference](./parameters.md) to learn more about task-specific settings and parameters. 
\ No newline at end of file
diff --git a/docs/api-inference/parameters.md b/docs/api-inference/parameters.md
index a89413c29..905420fbe 100644
--- a/docs/api-inference/parameters.md
+++ b/docs/api-inference/parameters.md
@@ -14,7 +14,7 @@ Table with
 
 ### Caching
 
-There is a cache layer on the inference API to speed up requests when the inputs are exactly the same. For many models, such as classifiers and embedding models, results are deterministic meaning you can safely use the cached results. However, if you use a nondeterministic model, you might want to disable the cache mechanism resulting in a real new query.
+There is a cache layer on the inference API to speed up requests when the inputs are exactly the same. Many models, such as classifiers and embedding models, can use those results as is if they are deterministic, meaning the results will be the same. Howevr, if you use a nondeterministic model, you can disable the cache mechanism from being used, resulting in a real new query.
 
 To do this, you can add `x-use-cache:false` to the request headers. For example
 

From fb57a2db3cd8bb935dfb19eee16c4e6b965fa52c Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Mon, 19 Aug 2024 12:20:23 +0200
Subject: [PATCH 13/38] Add draft of docs structure

---
 docs/api-inference/_toctree.yml       | 15 +++++++++++++++
 docs/api-inference/getting-started.md |  1 +
 docs/api-inference/index.md           |  1 +
 docs/api-inference/overview.md        |  7 +++++++
 docs/api-inference/task_parameters.md | 16 ++++++++++++++++
 docs/api-inference/tasks/fill-mask.md |  1 +
 6 files changed, 41 insertions(+)
 create mode 100644 docs/api-inference/_toctree.yml
 create mode 100644 docs/api-inference/getting-started.md
 create mode 100644 docs/api-inference/index.md
 create mode 100644 docs/api-inference/overview.md
 create mode 100644 docs/api-inference/task_parameters.md
 create mode 100644 docs/api-inference/tasks/fill-mask.md

diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
new file mode 100644
index 000000000..0d32376dd
--- /dev/null
+++ b/docs/api-inference/_toctree.yml
@@ -0,0 +1,15 @@
+- sections:
+  - local: index
+    title: Serverless Inference API
+  - local: overview
+    title: Overview
+  - local: getting-started
+    title: Get started
+  title: Get Started
+- sections:
+  - local: task_parameters
+    title: Task Parameters
+  - sections:
+    - local: tasks/fill-mask
+      title: Fill Mask
+  title: Parameters
\ No newline at end of file
diff --git a/docs/api-inference/getting-started.md b/docs/api-inference/getting-started.md
new file mode 100644
index 000000000..bad55622f
--- /dev/null
+++ b/docs/api-inference/getting-started.md
@@ -0,0 +1 @@
+# Getting Started
diff --git a/docs/api-inference/index.md b/docs/api-inference/index.md
new file mode 100644
index 000000000..2d3ac96f8
--- /dev/null
+++ b/docs/api-inference/index.md
@@ -0,0 +1 @@
+# Serverless Inference API
diff --git a/docs/api-inference/overview.md b/docs/api-inference/overview.md
new file mode 100644
index 000000000..d35789b21
--- /dev/null
+++ b/docs/api-inference/overview.md
@@ -0,0 +1,7 @@
+# Overview
+
+## Main Features
+
+## Warm vs Cold vs Frozen models
+
+## Security and Compliance
diff --git a/docs/api-inference/task_parameters.md b/docs/api-inference/task_parameters.md
new file mode 100644
index 000000000..4e97e656e
--- /dev/null
+++ b/docs/api-inference/task_parameters.md
@@ -0,0 +1,16 @@
+# Detailed Parameters
+
+Table with 
+- Domain
+- Task
+- Whether it's supported in Inference API
+- Supported libraries (not sure)
+- Recommended model
+- Link to model specific page
+
+
+
+## Additional parameters (different page?)
+
+- Controling cache
+- Modifying the task used by a model (Which task is used by this model?)
\ No newline at end of file
diff --git a/docs/api-inference/tasks/fill-mask.md b/docs/api-inference/tasks/fill-mask.md
new file mode 100644
index 000000000..bba61811b
--- /dev/null
+++ b/docs/api-inference/tasks/fill-mask.md
@@ -0,0 +1 @@
+## Fill Mask
\ No newline at end of file

From bad42b07854a181e7baa6c8f8fc3d2c2b910830c Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Tue, 20 Aug 2024 16:12:19 +0200
Subject: [PATCH 14/38] Add index page

---
 docs/api-inference/index.md           | 49 +++++++++++++++++++++++++++
 docs/api-inference/tasks/fill-mask.md |  7 +++-
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/docs/api-inference/index.md b/docs/api-inference/index.md
index 2d3ac96f8..7d03f2ae6 100644
--- a/docs/api-inference/index.md
+++ b/docs/api-inference/index.md
@@ -1 +1,50 @@
 # Serverless Inference API
+
+**Instant Access to 800,000+ ML Models for Fast Prototyping**
+
+Explore the most popular models for text, image, speech, and more — all with a simple API request. Build, test, and experiment without worrying about infrastructure or setup.
+
+---
+
+## Why use the Inference API?
+
+The Serverless Inference API offers a fast and free way to explore thousands of models for a variety of tasks. Whether you're prototyping a new application or experimenting with ML capabilities, this API gives you instant access to high-performing models across multiple domains:
+
+* **Text Generation:** Including large language models and tool-calling prompts, generate and experiment with high-quality responses.
+* **Image Generation:** Easily create customized images, including LoRAs for your own styles.
+* **Document Embeddings:** Build search and retrieval systems with SOTA embeddings.
+* **Classical AI Tasks:** Ready-to-use models for text classification, image classification, speech recognition, and more.
+
+TODO: add some flow chart image
+
+⚡ **Fast and Free to Get Started**: The Inference API is free with rate limits. For production needs, explore [Inference Endpoints](https://huggingface.co/docs/inference-endpoints/index) for dedicated resources, autoscaling, advanced security features, and more.
+
+---
+
+## Key Benefits
+
+- 🚀 **Instant Prototyping:** Access powerful models without setup.
+- 🎯 **Diverse Use Cases:** One API for text, image, and beyond.
+- 🔧 **Developer-Friendly:** Simple requests, fast responses.
+
+---
+
+## Contents
+
+The documentation is organized into two sections:
+
+* **Quick Overview** Learn the basics of how to use the Inference API.
+* **Detailed Parameters** Dive deep into task-specific settings and parameters.
+
+---
+
+## Looking for custom support from the Hugging Face team?
+
+<a target="_blank" href="https://huggingface.co/support">
+    <img alt="HuggingFace Expert Acceleration Program" src="https://cdn-media.huggingface.co/marketing/transformers/new-support-improved.png" style="max-width: 600px; border: 1px solid #eee; border-radius: 4px; box-shadow: 0 1px 2px 0 rgba(0, 0, 0, 0.05);">
+</a><br>
+
+## Hugging Face is trusted in production by over 10,000 companies
+
+<img class="block dark:hidden !shadow-none !border-0 !rounded-none" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/inference-api/companies-light.png" width="600">
+<img class="hidden dark:block !shadow-none !border-0 !rounded-none" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/inference-api/companies-dark.png" width="600">
\ No newline at end of file
diff --git a/docs/api-inference/tasks/fill-mask.md b/docs/api-inference/tasks/fill-mask.md
index bba61811b..64260ae39 100644
--- a/docs/api-inference/tasks/fill-mask.md
+++ b/docs/api-inference/tasks/fill-mask.md
@@ -1 +1,6 @@
-## Fill Mask
\ No newline at end of file
+## Fill Mask
+
+Mask filling is the task of predicting the right word (token to be precise) in the middle of a sequence. 
+
+Automated docs below
+

From d656272638dd5bc35cdd2a762d6af12f2e3d9e65 Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Wed, 21 Aug 2024 15:09:27 +0200
Subject: [PATCH 15/38] Prepare overview and rate limits

---
 docs/api-inference/_toctree.yml               |  7 ++--
 ...{getting-started.md => getting_started.md} |  0
 docs/api-inference/index.md                   |  2 +-
 docs/api-inference/overview.md                | 34 +++++++++++++++++--
 docs/api-inference/rate_limits.md             | 11 ++++++
 .../tasks/{fill-mask.md => fill_mask.md}      |  0
 6 files changed, 49 insertions(+), 5 deletions(-)
 rename docs/api-inference/{getting-started.md => getting_started.md} (100%)
 create mode 100644 docs/api-inference/rate_limits.md
 rename docs/api-inference/tasks/{fill-mask.md => fill_mask.md} (100%)

diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
index 0d32376dd..99c74cc49 100644
--- a/docs/api-inference/_toctree.yml
+++ b/docs/api-inference/_toctree.yml
@@ -3,13 +3,16 @@
     title: Serverless Inference API
   - local: overview
     title: Overview
-  - local: getting-started
+  - local: getting_started
+  - local: rate_limits
+    title: Rate Limits
     title: Get started
+    
   title: Get Started
 - sections:
   - local: task_parameters
     title: Task Parameters
   - sections:
-    - local: tasks/fill-mask
+    - local: tasks/fill_mask
       title: Fill Mask
   title: Parameters
\ No newline at end of file
diff --git a/docs/api-inference/getting-started.md b/docs/api-inference/getting_started.md
similarity index 100%
rename from docs/api-inference/getting-started.md
rename to docs/api-inference/getting_started.md
diff --git a/docs/api-inference/index.md b/docs/api-inference/index.md
index 7d03f2ae6..8ae6ce7a1 100644
--- a/docs/api-inference/index.md
+++ b/docs/api-inference/index.md
@@ -33,7 +33,7 @@ TODO: add some flow chart image
 
 The documentation is organized into two sections:
 
-* **Quick Overview** Learn the basics of how to use the Inference API.
+* **Getting Started** Learn the basics of how to use the Inference API.
 * **Detailed Parameters** Dive deep into task-specific settings and parameters.
 
 ---
diff --git a/docs/api-inference/overview.md b/docs/api-inference/overview.md
index d35789b21..cdc86ac70 100644
--- a/docs/api-inference/overview.md
+++ b/docs/api-inference/overview.md
@@ -2,6 +2,36 @@
 
 ## Main Features
 
-## Warm vs Cold vs Frozen models
+* Leverage over 800,000+ models from different open-source libraries (transformers, sentence transformers, adapter transformers, diffusers, timm, etc.).
+* Use models for a variety of tasks, including text generation, image generation, document embeddings, NER, summarization, image classification, and more.
+* Accelerate your prototyping by using GPU-powered models.
+* Run very large models that are challenging to deploy in production.
+* Benefit from the built-in automatic scaling, load balancing and caching.
 
-## Security and Compliance
+## Eligibility
+
+Given the fast-paced nature of the open ML ecosystem, the Inference API allows using models that have large community interest and are actively being used(based on recent likes, downloads, and usage). Because of this, deployed models can be swapped without prior notice.
+
+You can find:
+
+* **[Warm models](https://huggingface.co/models?inference=warm&sort=trending):** models ready to be used.
+* **[Cold models](https://huggingface.co/models?inference=cold&sort=trending):** models that are not loaded but can be used.
+* **[Frozen models](https://huggingface.co/models?inference=frozen&sort=trending):** models that currently can't be run with the API.
+
+TODO: add screenshot
+
+## GPU vs CPU
+
+By default, the Inference API uses GPUs to run large models. For small models that can run well on CPU, such as small text classification and text embeddings, the API will automatically switch to CPU to save costs.
+
+## Inference for PRO
+
+In addition to thousands of public models available in the Hub, PRO and Enteprise users get free access and higher rate limits to the following models:
+
+
+| Model                          | Size                                                                                                                                                                                       | Context Length | Use                                                          |
+|--------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------|--------------------------------------------------------------|
+| Meta Llama 3.1Instruct  | [8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct), [70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct)                                                      | 128k tokens      | High quality multilingual chat model with large context length |
+| Meta Llama 3 Instruct          | [8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct), [70B](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct)                                                       | 8k tokens      | One of the best chat models                                  |
+| Llama 2 Chat                   | [7B](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf), [13B](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf), [70B](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) | 4k tokens      | One of the best conversational models                        |
+| Bark                           | [0.9B](https://huggingface.co/suno/bark)                                                                                                                                                   | -              | Text to audio generation                                     |
diff --git a/docs/api-inference/rate_limits.md b/docs/api-inference/rate_limits.md
new file mode 100644
index 000000000..c3ed7a6a9
--- /dev/null
+++ b/docs/api-inference/rate_limits.md
@@ -0,0 +1,11 @@
+# Rate Limits
+
+The Inference API has temporary rate limits based on the number of requests. These rate limits are subject to change in the future to be compute-based or token-based. 
+
+Serverless API is not meant to be used for heavy production applications. If you need higher rate limits, using [Inference Endpoints](https://huggingface.co/docs/inference/endpoints) to have dedicated resources.
+
+| User Tier           | Rate Limit                |
+|---------------------|---------------------------|
+| Unregistered Users  | 1 request per hour        |
+| Signed-up Users     | 300 requests per hour     |
+| PRO and Enterprise Users           | 1000 requests per hour    |
\ No newline at end of file
diff --git a/docs/api-inference/tasks/fill-mask.md b/docs/api-inference/tasks/fill_mask.md
similarity index 100%
rename from docs/api-inference/tasks/fill-mask.md
rename to docs/api-inference/tasks/fill_mask.md

From 01983fcb29c1236b3f4643b69ac3ef47f4c1c56b Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Wed, 21 Aug 2024 15:15:24 +0200
Subject: [PATCH 16/38] Manage redirects

---
 docs/api-inference/_redirects.yml                    |  5 +++++
 docs/api-inference/getting_started.md                |  2 ++
 docs/api-inference/index.md                          |  2 +-
 docs/api-inference/overview.md                       | 12 ++++++++++++
 .../{task_parameters.md => parameters.md}            |  2 +-
 5 files changed, 21 insertions(+), 2 deletions(-)
 create mode 100644 docs/api-inference/_redirects.yml
 rename docs/api-inference/{task_parameters.md => parameters.md} (92%)

diff --git a/docs/api-inference/_redirects.yml b/docs/api-inference/_redirects.yml
new file mode 100644
index 000000000..3548bcb8f
--- /dev/null
+++ b/docs/api-inference/_redirects.yml
@@ -0,0 +1,5 @@
+quicktour: overview
+detailed_parameters: parameters
+parallelism: TODO
+usage: getting_started
+faq: overview
\ No newline at end of file
diff --git a/docs/api-inference/getting_started.md b/docs/api-inference/getting_started.md
index bad55622f..6d668cf10 100644
--- a/docs/api-inference/getting_started.md
+++ b/docs/api-inference/getting_started.md
@@ -1 +1,3 @@
 # Getting Started
+
+TODO:
\ No newline at end of file
diff --git a/docs/api-inference/index.md b/docs/api-inference/index.md
index 8ae6ce7a1..6b5b7a744 100644
--- a/docs/api-inference/index.md
+++ b/docs/api-inference/index.md
@@ -34,7 +34,7 @@ TODO: add some flow chart image
 The documentation is organized into two sections:
 
 * **Getting Started** Learn the basics of how to use the Inference API.
-* **Detailed Parameters** Dive deep into task-specific settings and parameters.
+* **Parameters** Dive into task-specific settings and parameters.
 
 ---
 
diff --git a/docs/api-inference/overview.md b/docs/api-inference/overview.md
index cdc86ac70..26964c13b 100644
--- a/docs/api-inference/overview.md
+++ b/docs/api-inference/overview.md
@@ -35,3 +35,15 @@ In addition to thousands of public models available in the Hub, PRO and Entepris
 | Meta Llama 3 Instruct          | [8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct), [70B](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct)                                                       | 8k tokens      | One of the best chat models                                  |
 | Llama 2 Chat                   | [7B](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf), [13B](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf), [70B](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) | 4k tokens      | One of the best conversational models                        |
 | Bark                           | [0.9B](https://huggingface.co/suno/bark)                                                                                                                                                   | -              | Text to audio generation                                     |
+
+
+## FAQ
+
+### Running Private Models
+
+The free Serverless API is designed to run popular public models. If you have a private model, you can use the [Inference Endpoints](https://huggingface.co/docs/inference/endpoints) to deploy your model.
+
+### Fine-tuning Models
+
+To automatically finetune a model on your data, please try [AutoTrain](https://huggingface.co/autotrain). It’s a no-code solution for automatically training and deploying a model; all you have to do is upload your data!
+
diff --git a/docs/api-inference/task_parameters.md b/docs/api-inference/parameters.md
similarity index 92%
rename from docs/api-inference/task_parameters.md
rename to docs/api-inference/parameters.md
index 4e97e656e..f4c21782d 100644
--- a/docs/api-inference/task_parameters.md
+++ b/docs/api-inference/parameters.md
@@ -1,4 +1,4 @@
-# Detailed Parameters
+# Parameters
 
 Table with 
 - Domain

From dfdc02d431ee0ec36964f3c1e0c88d83bac772c7 Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Wed, 21 Aug 2024 15:17:33 +0200
Subject: [PATCH 17/38] Clean up

---
 docs/api-inference/_toctree.yml | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
index 99c74cc49..cb2d54791 100644
--- a/docs/api-inference/_toctree.yml
+++ b/docs/api-inference/_toctree.yml
@@ -4,14 +4,13 @@
   - local: overview
     title: Overview
   - local: getting_started
+    title: Getting Started
   - local: rate_limits
     title: Rate Limits
-    title: Get started
-    
-  title: Get Started
+  title: title
 - sections:
-  - local: task_parameters
-    title: Task Parameters
+  - local: parameters
+    title: Parameters
   - sections:
     - local: tasks/fill_mask
       title: Fill Mask

From abe2d4f80f15ac97281cc6546ff6263526ab1549 Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Wed, 21 Aug 2024 16:49:41 +0200
Subject: [PATCH 18/38] Apply suggestions from review

---
 docs/api-inference/_toctree.yml | 2 +-
 docs/api-inference/index.md     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
index cb2d54791..fe580d367 100644
--- a/docs/api-inference/_toctree.yml
+++ b/docs/api-inference/_toctree.yml
@@ -7,7 +7,7 @@
     title: Getting Started
   - local: rate_limits
     title: Rate Limits
-  title: title
+  title: Getting Started
 - sections:
   - local: parameters
     title: Parameters
diff --git a/docs/api-inference/index.md b/docs/api-inference/index.md
index 6b5b7a744..3b7839ff3 100644
--- a/docs/api-inference/index.md
+++ b/docs/api-inference/index.md
@@ -17,7 +17,7 @@ The Serverless Inference API offers a fast and free way to explore thousands of
 
 TODO: add some flow chart image
 
-⚡ **Fast and Free to Get Started**: The Inference API is free with rate limits. For production needs, explore [Inference Endpoints](https://huggingface.co/docs/inference-endpoints/index) for dedicated resources, autoscaling, advanced security features, and more.
+⚡ **Fast and Free to Get Started**: The Inference API is free with higher rate limits for PRO users. For production needs, explore [Inference Endpoints](https://ui.endpoints.huggingface.co/) for dedicated resources, autoscaling, advanced security features, and more.
 
 ---
 

From 042a0e4240336c2164f1e9602ab82299a669a318 Mon Sep 17 00:00:00 2001
From: Omar Sanseviero <osanseviero@gmail.com>
Date: Wed, 21 Aug 2024 16:49:29 +0200
Subject: [PATCH 19/38] Apply suggestions from code review

Co-authored-by: Pedro Cuenca <pedro@huggingface.co>
---
 docs/api-inference/overview.md    | 8 ++++----
 docs/api-inference/rate_limits.md | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/api-inference/overview.md b/docs/api-inference/overview.md
index 26964c13b..8e4aeb75a 100644
--- a/docs/api-inference/overview.md
+++ b/docs/api-inference/overview.md
@@ -6,11 +6,11 @@
 * Use models for a variety of tasks, including text generation, image generation, document embeddings, NER, summarization, image classification, and more.
 * Accelerate your prototyping by using GPU-powered models.
 * Run very large models that are challenging to deploy in production.
-* Benefit from the built-in automatic scaling, load balancing and caching.
+* Production-grade platform without the hassle: built-in automatic scaling, load balancing and caching.
 
 ## Eligibility
 
-Given the fast-paced nature of the open ML ecosystem, the Inference API allows using models that have large community interest and are actively being used(based on recent likes, downloads, and usage). Because of this, deployed models can be swapped without prior notice.
+Given the fast-paced nature of the open ML ecosystem, the Inference API exposes models that have large community interest and are in active use (based on recent likes, downloads, and usage). Because of this, deployed models can be swapped without prior notice.
 
 You can find:
 
@@ -26,7 +26,7 @@ By default, the Inference API uses GPUs to run large models. For small models th
 
 ## Inference for PRO
 
-In addition to thousands of public models available in the Hub, PRO and Enteprise users get free access and higher rate limits to the following models:
+In addition to thousands of public models available in the Hub, PRO and Enteprise users get higher rate limits and free access to the following models:
 
 
 | Model                          | Size                                                                                                                                                                                       | Context Length | Use                                                          |
@@ -41,7 +41,7 @@ In addition to thousands of public models available in the Hub, PRO and Entepris
 
 ### Running Private Models
 
-The free Serverless API is designed to run popular public models. If you have a private model, you can use the [Inference Endpoints](https://huggingface.co/docs/inference/endpoints) to deploy your model.
+The free Serverless API is designed to run popular public models. If you have a private model, you can use [Inference Endpoints](https://huggingface.co/docs/inference/endpoints) to deploy your model.
 
 ### Fine-tuning Models
 
diff --git a/docs/api-inference/rate_limits.md b/docs/api-inference/rate_limits.md
index c3ed7a6a9..3077b2884 100644
--- a/docs/api-inference/rate_limits.md
+++ b/docs/api-inference/rate_limits.md
@@ -1,8 +1,8 @@
 # Rate Limits
 
-The Inference API has temporary rate limits based on the number of requests. These rate limits are subject to change in the future to be compute-based or token-based. 
+The Inference API has rate limits based on the number of requests. These rate limits are subject to change in the future to be compute-based or token-based. 
 
-Serverless API is not meant to be used for heavy production applications. If you need higher rate limits, using [Inference Endpoints](https://huggingface.co/docs/inference/endpoints) to have dedicated resources.
+Serverless API is not meant to be used for heavy production applications. If you need higher rate limits, consider [Inference Endpoints](https://huggingface.co/docs/inference/endpoints) to have dedicated resources.
 
 | User Tier           | Rate Limit                |
 |---------------------|---------------------------|

From d774816e7435089c7113102bb2865d97cace6f61 Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Fri, 23 Aug 2024 15:35:03 +0200
Subject: [PATCH 20/38] Add additional headers

---
 docs/api-inference/overview.md   |   2 +-
 docs/api-inference/parameters.md | 144 ++++++++++++++++++++++++++++++-
 2 files changed, 142 insertions(+), 4 deletions(-)

diff --git a/docs/api-inference/overview.md b/docs/api-inference/overview.md
index 8e4aeb75a..496a0215a 100644
--- a/docs/api-inference/overview.md
+++ b/docs/api-inference/overview.md
@@ -45,5 +45,5 @@ The free Serverless API is designed to run popular public models. If you have a
 
 ### Fine-tuning Models
 
-To automatically finetune a model on your data, please try [AutoTrain](https://huggingface.co/autotrain). It’s a no-code solution for automatically training and deploying a model; all you have to do is upload your data!
+To automatically finetune a model on your data, please try [AutoTrain](https://huggingface.co/autotrain). It’s a no-code solution for automatically training a model; all you have to do is upload your data.
 
diff --git a/docs/api-inference/parameters.md b/docs/api-inference/parameters.md
index f4c21782d..905420fbe 100644
--- a/docs/api-inference/parameters.md
+++ b/docs/api-inference/parameters.md
@@ -10,7 +10,145 @@ Table with
 
 
 
-## Additional parameters (different page?)
+## Additional Options
 
-- Controling cache
-- Modifying the task used by a model (Which task is used by this model?)
\ No newline at end of file
+### Caching
+
+There is a cache layer on the inference API to speed up requests when the inputs are exactly the same. Many models, such as classifiers and embedding models, can use those results as is if they are deterministic, meaning the results will be the same. Howevr, if you use a nondeterministic model, you can disable the cache mechanism from being used, resulting in a real new query.
+
+To do this, you can add `x-use-cache:false` to the request headers. For example
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/MODEL_ID \
+    -X POST \
+    -d '{"inputs": "Can you please let us know more details about your "}' \
+    -H "Authorization: Bearer hf_***" \
+    -H "Content-Type: application/json" \
+    -H "x-use-cache: false"
+```
+</curl>
+
+<python>
+```python
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/MODEL_ID"
+headers = {
+    "Authorization": "Bearer hf_***",
+    "Content-Type": "application/json",
+    "x-use-cache": "false"
+}
+data = {
+    "inputs": "Can you please let us know more details about your "
+}
+response = requests.post(API_URL, headers=headers, json=data)
+print(response.json())
+```
+
+</python>
+
+<js>
+```js
+import fetch from "node-fetch";
+
+async function query(data) {
+    const response = await fetch(
+        "https://api-inference.huggingface.co/models/MODEL_ID",
+        {
+            method: "POST",
+            headers: {
+                Authorization: `Bearer hf_***`,
+                "Content-Type": "application/json",
+                "x-use-cache": "false"
+            },
+            body: JSON.stringify(data),
+        }
+    );
+    const result = await response.json();
+    return result;
+}
+
+query({
+    inputs: "Can you please let us know more details about your "
+}).then((response) => {
+    console.log(JSON.stringify(response, null, 2));
+});
+
+```
+
+</js>
+
+</inferencesnippet>
+
+### Wait for the model
+
+When a model is warm, it is ready to be used and you will get a response relatively quickly. However, some models are cold and need to be loaded before they can be used. In that case, you will get a 503 error. Rather than doing many requests until it's loaded, you can wait for the model to be loaded by adding `x-wait-for-model:true` to the request headers. We suggest to only use this flag to wait for the model to be loaded when you are sure that the model is cold. That means, first try the request without this flag and only if you get a 503 error, try again with this flag.
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/MODEL_ID \
+    -X POST \
+    -d '{"inputs": "Can you please let us know more details about your "}' \
+    -H "Authorization: Bearer hf_***" \
+    -H "Content-Type: application/json" \
+    -H "x-wait-for-model: true"
+```
+</curl>
+
+<python>
+```python
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/MODEL_ID"
+headers = {
+    "Authorization": "Bearer hf_***",
+    "Content-Type": "application/json",
+    "x-wait-for-model": "true"
+}
+data = {
+    "inputs": "Can you please let us know more details about your "
+}
+response = requests.post(API_URL, headers=headers, json=data)
+print(response.json())
+```
+
+</python>
+
+<js>
+```js
+import fetch from "node-fetch";
+
+async function query(data) {
+    const response = await fetch(
+        "https://api-inference.huggingface.co/models/MODEL_ID",
+        {
+            method: "POST",
+            headers: {
+                Authorization: `Bearer hf_***`,
+                "Content-Type": "application/json",
+                "x-wait-for-model": "true"
+            },
+            body: JSON.stringify(data),
+        }
+    );
+    const result = await response.json();
+    return result;
+}
+
+query({
+    inputs: "Can you please let us know more details about your "
+}).then((response) => {
+    console.log(JSON.stringify(response, null, 2));
+});
+
+```
+
+</js>
+
+</inferencesnippet>
\ No newline at end of file

From a097022bcee49c08b24a0c62a338816d565becbe Mon Sep 17 00:00:00 2001
From: Omar Sanseviero <osanseviero@gmail.com>
Date: Mon, 26 Aug 2024 16:23:32 +0200
Subject: [PATCH 21/38] Apply suggestions from code review

Co-authored-by: Lucain <lucain@huggingface.co>
---
 docs/api-inference/_toctree.yml  | 2 +-
 docs/api-inference/index.md      | 2 +-
 docs/api-inference/overview.md   | 4 ++--
 docs/api-inference/parameters.md | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
index fe580d367..a9e8f593c 100644
--- a/docs/api-inference/_toctree.yml
+++ b/docs/api-inference/_toctree.yml
@@ -14,4 +14,4 @@
   - sections:
     - local: tasks/fill_mask
       title: Fill Mask
-  title: Parameters
\ No newline at end of file
+  title: API Reference
\ No newline at end of file
diff --git a/docs/api-inference/index.md b/docs/api-inference/index.md
index 3b7839ff3..eb1adf845 100644
--- a/docs/api-inference/index.md
+++ b/docs/api-inference/index.md
@@ -34,7 +34,7 @@ TODO: add some flow chart image
 The documentation is organized into two sections:
 
 * **Getting Started** Learn the basics of how to use the Inference API.
-* **Parameters** Dive into task-specific settings and parameters.
+* **API Reference** Dive into task-specific settings and parameters.
 
 ---
 
diff --git a/docs/api-inference/overview.md b/docs/api-inference/overview.md
index 496a0215a..f7d0301d8 100644
--- a/docs/api-inference/overview.md
+++ b/docs/api-inference/overview.md
@@ -26,12 +26,12 @@ By default, the Inference API uses GPUs to run large models. For small models th
 
 ## Inference for PRO
 
-In addition to thousands of public models available in the Hub, PRO and Enteprise users get higher rate limits and free access to the following models:
+In addition to thousands of public models available in the Hub, PRO and Enterprise users get higher rate limits and free access to the following models:
 
 
 | Model                          | Size                                                                                                                                                                                       | Context Length | Use                                                          |
 |--------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------|--------------------------------------------------------------|
-| Meta Llama 3.1Instruct  | [8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct), [70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct)                                                      | 128k tokens      | High quality multilingual chat model with large context length |
+| Meta Llama 3.1 Instruct  | [8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct), [70B](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct)                                                      | 128k tokens      | High quality multilingual chat model with large context length |
 | Meta Llama 3 Instruct          | [8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct), [70B](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct)                                                       | 8k tokens      | One of the best chat models                                  |
 | Llama 2 Chat                   | [7B](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf), [13B](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf), [70B](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) | 4k tokens      | One of the best conversational models                        |
 | Bark                           | [0.9B](https://huggingface.co/suno/bark)                                                                                                                                                   | -              | Text to audio generation                                     |
diff --git a/docs/api-inference/parameters.md b/docs/api-inference/parameters.md
index 905420fbe..a89413c29 100644
--- a/docs/api-inference/parameters.md
+++ b/docs/api-inference/parameters.md
@@ -14,7 +14,7 @@ Table with
 
 ### Caching
 
-There is a cache layer on the inference API to speed up requests when the inputs are exactly the same. Many models, such as classifiers and embedding models, can use those results as is if they are deterministic, meaning the results will be the same. Howevr, if you use a nondeterministic model, you can disable the cache mechanism from being used, resulting in a real new query.
+There is a cache layer on the inference API to speed up requests when the inputs are exactly the same. For many models, such as classifiers and embedding models, results are deterministic meaning you can safely use the cached results. However, if you use a nondeterministic model, you might want to disable the cache mechanism resulting in a real new query.
 
 To do this, you can add `x-use-cache:false` to the request headers. For example
 

From 9bf223e118d2f2fca4dcc07e9e58beeab1db3aba Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Mon, 26 Aug 2024 18:58:56 +0200
Subject: [PATCH 22/38] Incorporate reviewer's feedback

---
 docs/api-inference/_redirects.yml             |  4 +--
 docs/api-inference/_toctree.yml               |  5 +--
 docs/api-inference/index.md                   | 10 ++++++
 .../{overview.md => supported_models.md}      | 31 +++----------------
 4 files changed, 20 insertions(+), 30 deletions(-)
 rename docs/api-inference/{overview.md => supported_models.md} (69%)

diff --git a/docs/api-inference/_redirects.yml b/docs/api-inference/_redirects.yml
index 3548bcb8f..f26e94330 100644
--- a/docs/api-inference/_redirects.yml
+++ b/docs/api-inference/_redirects.yml
@@ -1,5 +1,5 @@
-quicktour: overview
+quicktour: index
 detailed_parameters: parameters
 parallelism: TODO
 usage: getting_started
-faq: overview
\ No newline at end of file
+faq: index
\ No newline at end of file
diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
index a9e8f593c..defee864c 100644
--- a/docs/api-inference/_toctree.yml
+++ b/docs/api-inference/_toctree.yml
@@ -1,8 +1,8 @@
 - sections:
   - local: index
     title: Serverless Inference API
-  - local: overview
-    title: Overview
+  - local: supported_models
+    title: Supported Models
   - local: getting_started
     title: Getting Started
   - local: rate_limits
@@ -14,4 +14,5 @@
   - sections:
     - local: tasks/fill_mask
       title: Fill Mask
+    title: Detailed Task Parameters
   title: API Reference
\ No newline at end of file
diff --git a/docs/api-inference/index.md b/docs/api-inference/index.md
index eb1adf845..8b67979e3 100644
--- a/docs/api-inference/index.md
+++ b/docs/api-inference/index.md
@@ -29,6 +29,16 @@ TODO: add some flow chart image
 
 ---
 
+## Main Features
+
+* Leverage over 800,000+ models from different open-source libraries (transformers, sentence transformers, adapter transformers, diffusers, timm, etc.).
+* Use models for a variety of tasks, including text generation, image generation, document embeddings, NER, summarization, image classification, and more.
+* Accelerate your prototyping by using GPU-powered models.
+* Run very large models that are challenging to deploy in production.
+* Production-grade platform without the hassle: built-in automatic scaling, load balancing and caching.
+
+---
+
 ## Contents
 
 The documentation is organized into two sections:
diff --git a/docs/api-inference/overview.md b/docs/api-inference/supported_models.md
similarity index 69%
rename from docs/api-inference/overview.md
rename to docs/api-inference/supported_models.md
index f7d0301d8..866531a06 100644
--- a/docs/api-inference/overview.md
+++ b/docs/api-inference/supported_models.md
@@ -1,16 +1,6 @@
-# Overview
+# Supported Models
 
-## Main Features
-
-* Leverage over 800,000+ models from different open-source libraries (transformers, sentence transformers, adapter transformers, diffusers, timm, etc.).
-* Use models for a variety of tasks, including text generation, image generation, document embeddings, NER, summarization, image classification, and more.
-* Accelerate your prototyping by using GPU-powered models.
-* Run very large models that are challenging to deploy in production.
-* Production-grade platform without the hassle: built-in automatic scaling, load balancing and caching.
-
-## Eligibility
-
-Given the fast-paced nature of the open ML ecosystem, the Inference API exposes models that have large community interest and are in active use (based on recent likes, downloads, and usage). Because of this, deployed models can be swapped without prior notice.
+Given the fast-paced nature of the open ML ecosystem, the Inference API exposes models that have large community interest and are in active use (based on recent likes, downloads, and usage). Because of this, deployed models can be swapped without prior notice. The Hugging Face stack aims to keep all the latest popular models warm and ready to use.
 
 You can find:
 
@@ -20,13 +10,9 @@ You can find:
 
 TODO: add screenshot
 
-## GPU vs CPU
-
-By default, the Inference API uses GPUs to run large models. For small models that can run well on CPU, such as small text classification and text embeddings, the API will automatically switch to CPU to save costs.
-
-## Inference for PRO
+## What do I get with a PRO subscription?
 
-In addition to thousands of public models available in the Hub, PRO and Enterprise users get higher rate limits and free access to the following models:
+In addition to thousands of public models available in the Hub, PRO and Enterprise users get higher [rate limits](./rate_limits) and free access to the following models:
 
 
 | Model                          | Size                                                                                                                                                                                       | Context Length | Use                                                          |
@@ -37,13 +23,6 @@ In addition to thousands of public models available in the Hub, PRO and Enterpri
 | Bark                           | [0.9B](https://huggingface.co/suno/bark)                                                                                                                                                   | -              | Text to audio generation                                     |
 
 
-## FAQ
-
-### Running Private Models
+## Running Private Models
 
 The free Serverless API is designed to run popular public models. If you have a private model, you can use [Inference Endpoints](https://huggingface.co/docs/inference/endpoints) to deploy your model.
-
-### Fine-tuning Models
-
-To automatically finetune a model on your data, please try [AutoTrain](https://huggingface.co/autotrain). It’s a no-code solution for automatically training a model; all you have to do is upload your data.
-

From 51750bf7b6fd8e6f401384794dd15c3cf49c3db3 Mon Sep 17 00:00:00 2001
From: Lucain <lucain@huggingface.co>
Date: Tue, 27 Aug 2024 15:35:19 +0200
Subject: [PATCH 23/38] First draft for text-to-image, image-to-image +
 generate script (#1384)

* First draft for text-to-image

* add correct code snippets

* Update docs/api-inference/tasks/text-to-image.md

Co-authored-by: Omar Sanseviero <osanseviero@gmail.com>

* better table?

* Generate tasks pages from script (#1386)

* init project

* first script to generate task pages

* commit generated content

* generate payload table as well

* so undecisive

* hey

* better ?

* Add image-to-image page

* template for snippets section + few things

* few things

* Update scripts/api-inference/templates/specs_headers.handlebars

Co-authored-by: Omar Sanseviero <osanseviero@gmail.com>

* Update scripts/api-inference/templates/specs_headers.handlebars

Co-authored-by: Omar Sanseviero <osanseviero@gmail.com>

* generate

* fetch inference status

---------

Co-authored-by: Omar Sanseviero <osanseviero@gmail.com>
---
 docs/api-inference/_toctree.yml               |   4 +
 docs/api-inference/tasks/image_to_image.md    |  63 ++
 docs/api-inference/tasks/text_to_image.md     | 116 ++++
 scripts/api-inference/.gitignore              |   1 +
 scripts/api-inference/.prettierignore         |   5 +
 scripts/api-inference/README.md               |  11 +
 scripts/api-inference/package.json            |  26 +
 scripts/api-inference/pnpm-lock.yaml          | 541 ++++++++++++++++++
 scripts/api-inference/scripts/.gitignore      |   1 +
 scripts/api-inference/scripts/generate.ts     | 321 +++++++++++
 .../templates/image_to_image.handlebars       |  36 ++
 .../templates/snippets_template.handlebars    |  42 ++
 .../templates/specs_headers.handlebars        |   5 +
 .../templates/specs_output.handlebars         |   5 +
 .../templates/specs_payload.handlebars        |   5 +
 .../templates/text_to_image.handlebars        |  29 +
 scripts/api-inference/tsconfig.json           |  20 +
 17 files changed, 1231 insertions(+)
 create mode 100644 docs/api-inference/tasks/image_to_image.md
 create mode 100644 docs/api-inference/tasks/text_to_image.md
 create mode 100644 scripts/api-inference/.gitignore
 create mode 100644 scripts/api-inference/.prettierignore
 create mode 100644 scripts/api-inference/README.md
 create mode 100644 scripts/api-inference/package.json
 create mode 100644 scripts/api-inference/pnpm-lock.yaml
 create mode 100644 scripts/api-inference/scripts/.gitignore
 create mode 100644 scripts/api-inference/scripts/generate.ts
 create mode 100644 scripts/api-inference/templates/image_to_image.handlebars
 create mode 100644 scripts/api-inference/templates/snippets_template.handlebars
 create mode 100644 scripts/api-inference/templates/specs_headers.handlebars
 create mode 100644 scripts/api-inference/templates/specs_output.handlebars
 create mode 100644 scripts/api-inference/templates/specs_payload.handlebars
 create mode 100644 scripts/api-inference/templates/text_to_image.handlebars
 create mode 100644 scripts/api-inference/tsconfig.json

diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
index defee864c..a68f3abfb 100644
--- a/docs/api-inference/_toctree.yml
+++ b/docs/api-inference/_toctree.yml
@@ -14,5 +14,9 @@
   - sections:
     - local: tasks/fill_mask
       title: Fill Mask
+    - local: tasks/image_to_image
+      title: Image-to-image
+    - local: tasks/text_to_image
+      title: Text-to-image
     title: Detailed Task Parameters
   title: API Reference
\ No newline at end of file
diff --git a/docs/api-inference/tasks/image_to_image.md b/docs/api-inference/tasks/image_to_image.md
new file mode 100644
index 000000000..1b5e2241e
--- /dev/null
+++ b/docs/api-inference/tasks/image_to_image.md
@@ -0,0 +1,63 @@
+## Image-to-image
+
+Image-to-image is the task of transforming a source image to match the characteristics of a target image or a target image domain.
+Any image manipulation and enhancement is possible with image to image models.
+
+Use cases heavily depend on the model and the dataset it was trained on, but some common use cases include:
+- Style transfer
+- Image colorization
+- Image super-resolution
+- Image inpainting
+
+<Tip>
+
+For more details about the `image-to-image` task, check out its [dedicated page](https://huggingface.co/tasks/image-to-image)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+- [fal/AuraSR-v2](https://huggingface.co/fal/AuraSR-v2): An image-to-image model to improve image resolution.
+- [keras-io/super-resolution](https://huggingface.co/keras-io/super-resolution): A model that increases the resolution of an image.
+- [lambdalabs/sd-image-variations-diffusers](https://huggingface.co/lambdalabs/sd-image-variations-diffusers): A model that creates a set of variations of the input image in the style of DALL-E using Stable Diffusion.
+- [mfidabel/controlnet-segment-anything](https://huggingface.co/mfidabel/controlnet-segment-anything): A model that generates images based on segments in the input image and the text prompt.
+- [timbrooks/instruct-pix2pix](https://huggingface.co/timbrooks/instruct-pix2pix): A model that takes an image and an instruction to edit the image.
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-to-image&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs** | _object, required_ | The input image data |
+| **parameters** | _object, optional_ | Additional inference parameters for Image To Image |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;guidance_scale** | _number, optional_ | For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;negative_prompt** | _array, optional_ | One or several prompt to guide what NOT to include in image generation. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_inference_steps** | _integer, optional_ | For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;target_size** | _object, optional_ | The size in pixel of the output image |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;width** | _integer, required_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;height** | _integer, required_ |  |
+
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string, optional_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, optional, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, optional, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+
+#### Response
+
+| Body |  |
+| :--- | :--- |
+| **image** | The output image |
+
+
+### Using the API
+
+
+No snippet available for this task.
+
+
diff --git a/docs/api-inference/tasks/text_to_image.md b/docs/api-inference/tasks/text_to_image.md
new file mode 100644
index 000000000..810c8f68e
--- /dev/null
+++ b/docs/api-inference/tasks/text_to_image.md
@@ -0,0 +1,116 @@
+## Text-to-image
+
+Generate an image based on a given text prompt.
+
+<Tip>
+
+For more details about the `text-to-image` task, check out its [dedicated page](https://huggingface.co/tasks/text-to-image)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+- [black-forest-labs/FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev): One of the most powerful image generation models that can generate realistic outputs.
+- [latent-consistency/lcm-lora-sdxl](https://huggingface.co/latent-consistency/lcm-lora-sdxl): A powerful yet fast image generation model.
+- [Kwai-Kolors/Kolors](https://huggingface.co/Kwai-Kolors/Kolors): Text-to-image model for photorealistic generation.
+- [stabilityai/stable-diffusion-3-medium-diffusers](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers): A powerful text-to-image model.
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-to-image&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs** | _string, required_ | The input text data (sometimes called "prompt" |
+| **parameters** | _object, optional_ | Additional inference parameters for Text To Image |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;guidance_scale** | _number, optional_ | For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;negative_prompt** | _array, optional_ | One or several prompt to guide what NOT to include in image generation. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_inference_steps** | _integer, optional_ | For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;target_size** | _object, optional_ | The size in pixel of the output image |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;width** | _integer, required_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;height** | _integer, required_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;scheduler** | _string, optional_ | For diffusion models. Override the scheduler with a compatible one |
+
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string, optional_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, optional, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, optional, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+
+#### Response
+
+| Body |  |
+| :--- | :--- |
+| **image** | The generated image |
+
+
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev \
+	-X POST \
+	-d '{"inputs": "Astronaut riding a horse"}' \
+	-H 'Content-Type: application/json' \
+	-H "Authorization: Bearer hf_***"
+
+```
+</curl>
+
+<python>
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.content
+image_bytes = query({
+	"inputs": "Astronaut riding a horse",
+})
+# You can access the image with PIL.Image for example
+import io
+from PIL import Image
+image = Image.open(io.BytesIO(image_bytes))
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_to-image).
+</python>
+
+<js>
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-dev",
+		{
+			headers: {
+				Authorization: "Bearer hf_***"
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.blob();
+	return result;
+}
+query({"inputs": "Astronaut riding a horse"}).then((response) => {
+	// Use image
+});
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#textto-image).
+</js>
+
+</inferencesnippet>
+
+
diff --git a/scripts/api-inference/.gitignore b/scripts/api-inference/.gitignore
new file mode 100644
index 000000000..53c37a166
--- /dev/null
+++ b/scripts/api-inference/.gitignore
@@ -0,0 +1 @@
+dist
\ No newline at end of file
diff --git a/scripts/api-inference/.prettierignore b/scripts/api-inference/.prettierignore
new file mode 100644
index 000000000..d4b43ae6c
--- /dev/null
+++ b/scripts/api-inference/.prettierignore
@@ -0,0 +1,5 @@
+pnpm-lock.yaml
+# In order to avoid code samples to have tabs, they don't display well on npm
+README.md
+dist
+*.handlebars
\ No newline at end of file
diff --git a/scripts/api-inference/README.md b/scripts/api-inference/README.md
new file mode 100644
index 000000000..67d9c79e6
--- /dev/null
+++ b/scripts/api-inference/README.md
@@ -0,0 +1,11 @@
+Install dependencies.
+
+```sh
+pnpm install
+```
+
+Generate documentation.
+
+```sh
+pnpm run generate
+```
\ No newline at end of file
diff --git a/scripts/api-inference/package.json b/scripts/api-inference/package.json
new file mode 100644
index 000000000..13f84e881
--- /dev/null
+++ b/scripts/api-inference/package.json
@@ -0,0 +1,26 @@
+{
+  "name": "api-inference-generator",
+  "version": "1.0.0",
+  "description": "",
+  "main": "index.js",
+  "type": "module",
+  "scripts": {
+    "format": "prettier --write .",
+    "format:check": "prettier --check .",
+    "generate": "tsx scripts/generate.ts"
+  },
+  "keywords": [],
+  "author": "",
+  "license": "ISC",
+  "dependencies": {
+    "@huggingface/tasks": "^0.11.11",
+    "@types/node": "^22.5.0",
+    "handlebars": "^4.7.8",
+    "node": "^20.17.0",
+    "prettier": "^3.3.3",
+    "ts-node": "^10.9.2",
+    "tsx": "^4.17.0",
+    "type-fest": "^4.25.0",
+    "typescript": "^5.5.4"
+  }
+}
diff --git a/scripts/api-inference/pnpm-lock.yaml b/scripts/api-inference/pnpm-lock.yaml
new file mode 100644
index 000000000..58267667d
--- /dev/null
+++ b/scripts/api-inference/pnpm-lock.yaml
@@ -0,0 +1,541 @@
+lockfileVersion: '9.0'
+
+settings:
+  autoInstallPeers: true
+  excludeLinksFromLockfile: false
+
+importers:
+
+  .:
+    dependencies:
+      '@huggingface/tasks':
+        specifier: ^0.11.11
+        version: 0.11.11
+      '@types/node':
+        specifier: ^22.5.0
+        version: 22.5.0
+      handlebars:
+        specifier: ^4.7.8
+        version: 4.7.8
+      node:
+        specifier: ^20.17.0
+        version: 20.17.0
+      prettier:
+        specifier: ^3.3.3
+        version: 3.3.3
+      ts-node:
+        specifier: ^10.9.2
+        version: 10.9.2(@types/node@22.5.0)(typescript@5.5.4)
+      tsx:
+        specifier: ^4.17.0
+        version: 4.17.0
+      type-fest:
+        specifier: ^4.25.0
+        version: 4.25.0
+      typescript:
+        specifier: ^5.5.4
+        version: 5.5.4
+
+packages:
+
+  '@cspotcode/source-map-support@0.8.1':
+    resolution: {integrity: sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==}
+    engines: {node: '>=12'}
+
+  '@esbuild/aix-ppc64@0.23.1':
+    resolution: {integrity: sha512-6VhYk1diRqrhBAqpJEdjASR/+WVRtfjpqKuNw11cLiaWpAT/Uu+nokB+UJnevzy/P9C/ty6AOe0dwueMrGh/iQ==}
+    engines: {node: '>=18'}
+    cpu: [ppc64]
+    os: [aix]
+
+  '@esbuild/android-arm64@0.23.1':
+    resolution: {integrity: sha512-xw50ipykXcLstLeWH7WRdQuysJqejuAGPd30vd1i5zSyKK3WE+ijzHmLKxdiCMtH1pHz78rOg0BKSYOSB/2Khw==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [android]
+
+  '@esbuild/android-arm@0.23.1':
+    resolution: {integrity: sha512-uz6/tEy2IFm9RYOyvKl88zdzZfwEfKZmnX9Cj1BHjeSGNuGLuMD1kR8y5bteYmwqKm1tj8m4cb/aKEorr6fHWQ==}
+    engines: {node: '>=18'}
+    cpu: [arm]
+    os: [android]
+
+  '@esbuild/android-x64@0.23.1':
+    resolution: {integrity: sha512-nlN9B69St9BwUoB+jkyU090bru8L0NA3yFvAd7k8dNsVH8bi9a8cUAUSEcEEgTp2z3dbEDGJGfP6VUnkQnlReg==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [android]
+
+  '@esbuild/darwin-arm64@0.23.1':
+    resolution: {integrity: sha512-YsS2e3Wtgnw7Wq53XXBLcV6JhRsEq8hkfg91ESVadIrzr9wO6jJDMZnCQbHm1Guc5t/CdDiFSSfWP58FNuvT3Q==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [darwin]
+
+  '@esbuild/darwin-x64@0.23.1':
+    resolution: {integrity: sha512-aClqdgTDVPSEGgoCS8QDG37Gu8yc9lTHNAQlsztQ6ENetKEO//b8y31MMu2ZaPbn4kVsIABzVLXYLhCGekGDqw==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [darwin]
+
+  '@esbuild/freebsd-arm64@0.23.1':
+    resolution: {integrity: sha512-h1k6yS8/pN/NHlMl5+v4XPfikhJulk4G+tKGFIOwURBSFzE8bixw1ebjluLOjfwtLqY0kewfjLSrO6tN2MgIhA==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [freebsd]
+
+  '@esbuild/freebsd-x64@0.23.1':
+    resolution: {integrity: sha512-lK1eJeyk1ZX8UklqFd/3A60UuZ/6UVfGT2LuGo3Wp4/z7eRTRYY+0xOu2kpClP+vMTi9wKOfXi2vjUpO1Ro76g==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [freebsd]
+
+  '@esbuild/linux-arm64@0.23.1':
+    resolution: {integrity: sha512-/93bf2yxencYDnItMYV/v116zff6UyTjo4EtEQjUBeGiVpMmffDNUyD9UN2zV+V3LRV3/on4xdZ26NKzn6754g==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [linux]
+
+  '@esbuild/linux-arm@0.23.1':
+    resolution: {integrity: sha512-CXXkzgn+dXAPs3WBwE+Kvnrf4WECwBdfjfeYHpMeVxWE0EceB6vhWGShs6wi0IYEqMSIzdOF1XjQ/Mkm5d7ZdQ==}
+    engines: {node: '>=18'}
+    cpu: [arm]
+    os: [linux]
+
+  '@esbuild/linux-ia32@0.23.1':
+    resolution: {integrity: sha512-VTN4EuOHwXEkXzX5nTvVY4s7E/Krz7COC8xkftbbKRYAl96vPiUssGkeMELQMOnLOJ8k3BY1+ZY52tttZnHcXQ==}
+    engines: {node: '>=18'}
+    cpu: [ia32]
+    os: [linux]
+
+  '@esbuild/linux-loong64@0.23.1':
+    resolution: {integrity: sha512-Vx09LzEoBa5zDnieH8LSMRToj7ir/Jeq0Gu6qJ/1GcBq9GkfoEAoXvLiW1U9J1qE/Y/Oyaq33w5p2ZWrNNHNEw==}
+    engines: {node: '>=18'}
+    cpu: [loong64]
+    os: [linux]
+
+  '@esbuild/linux-mips64el@0.23.1':
+    resolution: {integrity: sha512-nrFzzMQ7W4WRLNUOU5dlWAqa6yVeI0P78WKGUo7lg2HShq/yx+UYkeNSE0SSfSure0SqgnsxPvmAUu/vu0E+3Q==}
+    engines: {node: '>=18'}
+    cpu: [mips64el]
+    os: [linux]
+
+  '@esbuild/linux-ppc64@0.23.1':
+    resolution: {integrity: sha512-dKN8fgVqd0vUIjxuJI6P/9SSSe/mB9rvA98CSH2sJnlZ/OCZWO1DJvxj8jvKTfYUdGfcq2dDxoKaC6bHuTlgcw==}
+    engines: {node: '>=18'}
+    cpu: [ppc64]
+    os: [linux]
+
+  '@esbuild/linux-riscv64@0.23.1':
+    resolution: {integrity: sha512-5AV4Pzp80fhHL83JM6LoA6pTQVWgB1HovMBsLQ9OZWLDqVY8MVobBXNSmAJi//Csh6tcY7e7Lny2Hg1tElMjIA==}
+    engines: {node: '>=18'}
+    cpu: [riscv64]
+    os: [linux]
+
+  '@esbuild/linux-s390x@0.23.1':
+    resolution: {integrity: sha512-9ygs73tuFCe6f6m/Tb+9LtYxWR4c9yg7zjt2cYkjDbDpV/xVn+68cQxMXCjUpYwEkze2RcU/rMnfIXNRFmSoDw==}
+    engines: {node: '>=18'}
+    cpu: [s390x]
+    os: [linux]
+
+  '@esbuild/linux-x64@0.23.1':
+    resolution: {integrity: sha512-EV6+ovTsEXCPAp58g2dD68LxoP/wK5pRvgy0J/HxPGB009omFPv3Yet0HiaqvrIrgPTBuC6wCH1LTOY91EO5hQ==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [linux]
+
+  '@esbuild/netbsd-x64@0.23.1':
+    resolution: {integrity: sha512-aevEkCNu7KlPRpYLjwmdcuNz6bDFiE7Z8XC4CPqExjTvrHugh28QzUXVOZtiYghciKUacNktqxdpymplil1beA==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [netbsd]
+
+  '@esbuild/openbsd-arm64@0.23.1':
+    resolution: {integrity: sha512-3x37szhLexNA4bXhLrCC/LImN/YtWis6WXr1VESlfVtVeoFJBRINPJ3f0a/6LV8zpikqoUg4hyXw0sFBt5Cr+Q==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [openbsd]
+
+  '@esbuild/openbsd-x64@0.23.1':
+    resolution: {integrity: sha512-aY2gMmKmPhxfU+0EdnN+XNtGbjfQgwZj43k8G3fyrDM/UdZww6xrWxmDkuz2eCZchqVeABjV5BpildOrUbBTqA==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [openbsd]
+
+  '@esbuild/sunos-x64@0.23.1':
+    resolution: {integrity: sha512-RBRT2gqEl0IKQABT4XTj78tpk9v7ehp+mazn2HbUeZl1YMdaGAQqhapjGTCe7uw7y0frDi4gS0uHzhvpFuI1sA==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [sunos]
+
+  '@esbuild/win32-arm64@0.23.1':
+    resolution: {integrity: sha512-4O+gPR5rEBe2FpKOVyiJ7wNDPA8nGzDuJ6gN4okSA1gEOYZ67N8JPk58tkWtdtPeLz7lBnY6I5L3jdsr3S+A6A==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [win32]
+
+  '@esbuild/win32-ia32@0.23.1':
+    resolution: {integrity: sha512-BcaL0Vn6QwCwre3Y717nVHZbAa4UBEigzFm6VdsVdT/MbZ38xoj1X9HPkZhbmaBGUD1W8vxAfffbDe8bA6AKnQ==}
+    engines: {node: '>=18'}
+    cpu: [ia32]
+    os: [win32]
+
+  '@esbuild/win32-x64@0.23.1':
+    resolution: {integrity: sha512-BHpFFeslkWrXWyUPnbKm+xYYVYruCinGcftSBaa8zoF9hZO4BcSCFUvHVTtzpIY6YzUnYtuEhZ+C9iEXjxnasg==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [win32]
+
+  '@huggingface/tasks@0.11.11':
+    resolution: {integrity: sha512-YRleUv67oSqDOkcYm4pFdBeaw8I8Dh6/DYlXo02fxXj5iC/WiDi8PE1wBhAhTdASwkl/n1V4xbL69uKXwDNDGw==}
+
+  '@jridgewell/resolve-uri@3.1.2':
+    resolution: {integrity: sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==}
+    engines: {node: '>=6.0.0'}
+
+  '@jridgewell/sourcemap-codec@1.5.0':
+    resolution: {integrity: sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==}
+
+  '@jridgewell/trace-mapping@0.3.9':
+    resolution: {integrity: sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==}
+
+  '@tsconfig/node10@1.0.11':
+    resolution: {integrity: sha512-DcRjDCujK/kCk/cUe8Xz8ZSpm8mS3mNNpta+jGCA6USEDfktlNvm1+IuZ9eTcDbNk41BHwpHHeW+N1lKCz4zOw==}
+
+  '@tsconfig/node12@1.0.11':
+    resolution: {integrity: sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==}
+
+  '@tsconfig/node14@1.0.3':
+    resolution: {integrity: sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==}
+
+  '@tsconfig/node16@1.0.4':
+    resolution: {integrity: sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==}
+
+  '@types/node@22.5.0':
+    resolution: {integrity: sha512-DkFrJOe+rfdHTqqMg0bSNlGlQ85hSoh2TPzZyhHsXnMtligRWpxUySiyw8FY14ITt24HVCiQPWxS3KO/QlGmWg==}
+
+  acorn-walk@8.3.3:
+    resolution: {integrity: sha512-MxXdReSRhGO7VlFe1bRG/oI7/mdLV9B9JJT0N8vZOhF7gFRR5l3M8W9G8JxmKV+JC5mGqJ0QvqfSOLsCPa4nUw==}
+    engines: {node: '>=0.4.0'}
+
+  acorn@8.12.1:
+    resolution: {integrity: sha512-tcpGyI9zbizT9JbV6oYE477V6mTlXvvi0T0G3SNIYE2apm/G5huBa1+K89VGeovbg+jycCrfhl3ADxErOuO6Jg==}
+    engines: {node: '>=0.4.0'}
+    hasBin: true
+
+  arg@4.1.3:
+    resolution: {integrity: sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==}
+
+  create-require@1.1.1:
+    resolution: {integrity: sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==}
+
+  diff@4.0.2:
+    resolution: {integrity: sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==}
+    engines: {node: '>=0.3.1'}
+
+  esbuild@0.23.1:
+    resolution: {integrity: sha512-VVNz/9Sa0bs5SELtn3f7qhJCDPCF5oMEl5cO9/SSinpE9hbPVvxbd572HH5AKiP7WD8INO53GgfDDhRjkylHEg==}
+    engines: {node: '>=18'}
+    hasBin: true
+
+  fsevents@2.3.3:
+    resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==}
+    engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
+    os: [darwin]
+
+  get-tsconfig@4.7.6:
+    resolution: {integrity: sha512-ZAqrLlu18NbDdRaHq+AKXzAmqIUPswPWKUchfytdAjiRFnCe5ojG2bstg6mRiZabkKfCoL/e98pbBELIV/YCeA==}
+
+  handlebars@4.7.8:
+    resolution: {integrity: sha512-vafaFqs8MZkRrSX7sFVUdo3ap/eNiLnb4IakshzvP56X5Nr1iGKAIqdX6tMlm6HcNRIkr6AxO5jFEoJzzpT8aQ==}
+    engines: {node: '>=0.4.7'}
+    hasBin: true
+
+  make-error@1.3.6:
+    resolution: {integrity: sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==}
+
+  minimist@1.2.8:
+    resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==}
+
+  neo-async@2.6.2:
+    resolution: {integrity: sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==}
+
+  node-bin-setup@1.1.3:
+    resolution: {integrity: sha512-opgw9iSCAzT2+6wJOETCpeRYAQxSopqQ2z+N6BXwIMsQQ7Zj5M8MaafQY8JMlolRR6R1UXg2WmhKp0p9lSOivg==}
+
+  node@20.17.0:
+    resolution: {integrity: sha512-zjgqs6fjta3bWGrwCmtT42gIkupAmvdq5QerbnCgNiQHE+3HrYSXuNrTw5sxQAHG2sZGgMVCxsXQ5OXLV+dkjw==}
+    engines: {npm: '>=5.0.0'}
+    hasBin: true
+
+  prettier@3.3.3:
+    resolution: {integrity: sha512-i2tDNA0O5IrMO757lfrdQZCc2jPNDVntV0m/+4whiDfWaTKfMNgR7Qz0NAeGz/nRqF4m5/6CLzbP4/liHt12Ew==}
+    engines: {node: '>=14'}
+    hasBin: true
+
+  resolve-pkg-maps@1.0.0:
+    resolution: {integrity: sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==}
+
+  source-map@0.6.1:
+    resolution: {integrity: sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==}
+    engines: {node: '>=0.10.0'}
+
+  ts-node@10.9.2:
+    resolution: {integrity: sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==}
+    hasBin: true
+    peerDependencies:
+      '@swc/core': '>=1.2.50'
+      '@swc/wasm': '>=1.2.50'
+      '@types/node': '*'
+      typescript: '>=2.7'
+    peerDependenciesMeta:
+      '@swc/core':
+        optional: true
+      '@swc/wasm':
+        optional: true
+
+  tsx@4.17.0:
+    resolution: {integrity: sha512-eN4mnDA5UMKDt4YZixo9tBioibaMBpoxBkD+rIPAjVmYERSG0/dWEY1CEFuV89CgASlKL499q8AhmkMnnjtOJg==}
+    engines: {node: '>=18.0.0'}
+    hasBin: true
+
+  type-fest@4.25.0:
+    resolution: {integrity: sha512-bRkIGlXsnGBRBQRAY56UXBm//9qH4bmJfFvq83gSz41N282df+fjy8ofcEgc1sM8geNt5cl6mC2g9Fht1cs8Aw==}
+    engines: {node: '>=16'}
+
+  typescript@5.5.4:
+    resolution: {integrity: sha512-Mtq29sKDAEYP7aljRgtPOpTvOfbwRWlS6dPRzwjdE+C0R4brX/GUyhHSecbHMFLNBLcJIPt9nl9yG5TZ1weH+Q==}
+    engines: {node: '>=14.17'}
+    hasBin: true
+
+  uglify-js@3.19.2:
+    resolution: {integrity: sha512-S8KA6DDI47nQXJSi2ctQ629YzwOVs+bQML6DAtvy0wgNdpi+0ySpQK0g2pxBq2xfF2z3YCscu7NNA8nXT9PlIQ==}
+    engines: {node: '>=0.8.0'}
+    hasBin: true
+
+  undici-types@6.19.8:
+    resolution: {integrity: sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==}
+
+  v8-compile-cache-lib@3.0.1:
+    resolution: {integrity: sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==}
+
+  wordwrap@1.0.0:
+    resolution: {integrity: sha512-gvVzJFlPycKc5dZN4yPkP8w7Dc37BtP1yczEneOb4uq34pXZcvrtRTmWV8W+Ume+XCxKgbjM+nevkyFPMybd4Q==}
+
+  yn@3.1.1:
+    resolution: {integrity: sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==}
+    engines: {node: '>=6'}
+
+snapshots:
+
+  '@cspotcode/source-map-support@0.8.1':
+    dependencies:
+      '@jridgewell/trace-mapping': 0.3.9
+
+  '@esbuild/aix-ppc64@0.23.1':
+    optional: true
+
+  '@esbuild/android-arm64@0.23.1':
+    optional: true
+
+  '@esbuild/android-arm@0.23.1':
+    optional: true
+
+  '@esbuild/android-x64@0.23.1':
+    optional: true
+
+  '@esbuild/darwin-arm64@0.23.1':
+    optional: true
+
+  '@esbuild/darwin-x64@0.23.1':
+    optional: true
+
+  '@esbuild/freebsd-arm64@0.23.1':
+    optional: true
+
+  '@esbuild/freebsd-x64@0.23.1':
+    optional: true
+
+  '@esbuild/linux-arm64@0.23.1':
+    optional: true
+
+  '@esbuild/linux-arm@0.23.1':
+    optional: true
+
+  '@esbuild/linux-ia32@0.23.1':
+    optional: true
+
+  '@esbuild/linux-loong64@0.23.1':
+    optional: true
+
+  '@esbuild/linux-mips64el@0.23.1':
+    optional: true
+
+  '@esbuild/linux-ppc64@0.23.1':
+    optional: true
+
+  '@esbuild/linux-riscv64@0.23.1':
+    optional: true
+
+  '@esbuild/linux-s390x@0.23.1':
+    optional: true
+
+  '@esbuild/linux-x64@0.23.1':
+    optional: true
+
+  '@esbuild/netbsd-x64@0.23.1':
+    optional: true
+
+  '@esbuild/openbsd-arm64@0.23.1':
+    optional: true
+
+  '@esbuild/openbsd-x64@0.23.1':
+    optional: true
+
+  '@esbuild/sunos-x64@0.23.1':
+    optional: true
+
+  '@esbuild/win32-arm64@0.23.1':
+    optional: true
+
+  '@esbuild/win32-ia32@0.23.1':
+    optional: true
+
+  '@esbuild/win32-x64@0.23.1':
+    optional: true
+
+  '@huggingface/tasks@0.11.11': {}
+
+  '@jridgewell/resolve-uri@3.1.2': {}
+
+  '@jridgewell/sourcemap-codec@1.5.0': {}
+
+  '@jridgewell/trace-mapping@0.3.9':
+    dependencies:
+      '@jridgewell/resolve-uri': 3.1.2
+      '@jridgewell/sourcemap-codec': 1.5.0
+
+  '@tsconfig/node10@1.0.11': {}
+
+  '@tsconfig/node12@1.0.11': {}
+
+  '@tsconfig/node14@1.0.3': {}
+
+  '@tsconfig/node16@1.0.4': {}
+
+  '@types/node@22.5.0':
+    dependencies:
+      undici-types: 6.19.8
+
+  acorn-walk@8.3.3:
+    dependencies:
+      acorn: 8.12.1
+
+  acorn@8.12.1: {}
+
+  arg@4.1.3: {}
+
+  create-require@1.1.1: {}
+
+  diff@4.0.2: {}
+
+  esbuild@0.23.1:
+    optionalDependencies:
+      '@esbuild/aix-ppc64': 0.23.1
+      '@esbuild/android-arm': 0.23.1
+      '@esbuild/android-arm64': 0.23.1
+      '@esbuild/android-x64': 0.23.1
+      '@esbuild/darwin-arm64': 0.23.1
+      '@esbuild/darwin-x64': 0.23.1
+      '@esbuild/freebsd-arm64': 0.23.1
+      '@esbuild/freebsd-x64': 0.23.1
+      '@esbuild/linux-arm': 0.23.1
+      '@esbuild/linux-arm64': 0.23.1
+      '@esbuild/linux-ia32': 0.23.1
+      '@esbuild/linux-loong64': 0.23.1
+      '@esbuild/linux-mips64el': 0.23.1
+      '@esbuild/linux-ppc64': 0.23.1
+      '@esbuild/linux-riscv64': 0.23.1
+      '@esbuild/linux-s390x': 0.23.1
+      '@esbuild/linux-x64': 0.23.1
+      '@esbuild/netbsd-x64': 0.23.1
+      '@esbuild/openbsd-arm64': 0.23.1
+      '@esbuild/openbsd-x64': 0.23.1
+      '@esbuild/sunos-x64': 0.23.1
+      '@esbuild/win32-arm64': 0.23.1
+      '@esbuild/win32-ia32': 0.23.1
+      '@esbuild/win32-x64': 0.23.1
+
+  fsevents@2.3.3:
+    optional: true
+
+  get-tsconfig@4.7.6:
+    dependencies:
+      resolve-pkg-maps: 1.0.0
+
+  handlebars@4.7.8:
+    dependencies:
+      minimist: 1.2.8
+      neo-async: 2.6.2
+      source-map: 0.6.1
+      wordwrap: 1.0.0
+    optionalDependencies:
+      uglify-js: 3.19.2
+
+  make-error@1.3.6: {}
+
+  minimist@1.2.8: {}
+
+  neo-async@2.6.2: {}
+
+  node-bin-setup@1.1.3: {}
+
+  node@20.17.0:
+    dependencies:
+      node-bin-setup: 1.1.3
+
+  prettier@3.3.3: {}
+
+  resolve-pkg-maps@1.0.0: {}
+
+  source-map@0.6.1: {}
+
+  ts-node@10.9.2(@types/node@22.5.0)(typescript@5.5.4):
+    dependencies:
+      '@cspotcode/source-map-support': 0.8.1
+      '@tsconfig/node10': 1.0.11
+      '@tsconfig/node12': 1.0.11
+      '@tsconfig/node14': 1.0.3
+      '@tsconfig/node16': 1.0.4
+      '@types/node': 22.5.0
+      acorn: 8.12.1
+      acorn-walk: 8.3.3
+      arg: 4.1.3
+      create-require: 1.1.1
+      diff: 4.0.2
+      make-error: 1.3.6
+      typescript: 5.5.4
+      v8-compile-cache-lib: 3.0.1
+      yn: 3.1.1
+
+  tsx@4.17.0:
+    dependencies:
+      esbuild: 0.23.1
+      get-tsconfig: 4.7.6
+    optionalDependencies:
+      fsevents: 2.3.3
+
+  type-fest@4.25.0: {}
+
+  typescript@5.5.4: {}
+
+  uglify-js@3.19.2:
+    optional: true
+
+  undici-types@6.19.8: {}
+
+  v8-compile-cache-lib@3.0.1: {}
+
+  wordwrap@1.0.0: {}
+
+  yn@3.1.1: {}
diff --git a/scripts/api-inference/scripts/.gitignore b/scripts/api-inference/scripts/.gitignore
new file mode 100644
index 000000000..4c43fe68f
--- /dev/null
+++ b/scripts/api-inference/scripts/.gitignore
@@ -0,0 +1 @@
+*.js
\ No newline at end of file
diff --git a/scripts/api-inference/scripts/generate.ts b/scripts/api-inference/scripts/generate.ts
new file mode 100644
index 000000000..73144662a
--- /dev/null
+++ b/scripts/api-inference/scripts/generate.ts
@@ -0,0 +1,321 @@
+import { snippets, PipelineType } from "@huggingface/tasks";
+import Handlebars from "handlebars";
+import * as fs from "node:fs/promises";
+import * as path from "node:path/posix";
+import type { JsonObject } from "type-fest";
+
+const inferenceSnippetLanguages = ["python", "js", "curl"] as const;
+type InferenceSnippetLanguage = (typeof inferenceSnippetLanguages)[number];
+
+// Taken from https://stackoverflow.com/a/31632215
+Handlebars.registerHelper({
+  eq: (v1, v2) => v1 === v2,
+  ne: (v1, v2) => v1 !== v2,
+  lt: (v1, v2) => v1 < v2,
+  gt: (v1, v2) => v1 > v2,
+  lte: (v1, v2) => v1 <= v2,
+  gte: (v1, v2) => v1 >= v2,
+  and() {
+    return Array.prototype.every.call(arguments, Boolean);
+  },
+  or() {
+    return Array.prototype.slice.call(arguments, 0, -1).some(Boolean);
+  },
+});
+
+console.log("🛠️  Preparing...");
+
+////////////////////////
+//// Filepath utils ////
+////////////////////////
+
+const ROOT_DIR = path
+  .join(path.normalize(import.meta.url), "..", "..")
+  .replace(/^(file:)/, "");
+const TEMPLATE_DIR = path.join(ROOT_DIR, "templates");
+const DOCS_DIR = path.join(ROOT_DIR, "..", "..", "docs");
+const TASKS_DOCS_DIR = path.join(DOCS_DIR, "api-inference", "tasks");
+
+function readTemplate(templateName: string): Promise<string> {
+  const templateNameSnakeCase = templateName.replace(/-/g, "_");
+  const templatePath = path.join(
+    TEMPLATE_DIR,
+    `${templateNameSnakeCase}.handlebars`,
+  );
+  console.log(`   🔍 Reading ${templateNameSnakeCase}.handlebars`);
+  return fs.readFile(templatePath, { encoding: "utf-8" });
+}
+
+function writeTaskDoc(templateName: string, content: string): Promise<void> {
+  const templateNameSnakeCase = templateName.replace(/-/g, "_");
+  const taskDocPath = path.join(TASKS_DOCS_DIR, `${templateNameSnakeCase}.md`);
+  console.log(`   💾 Saving to ${taskDocPath}`);
+  return fs
+    .mkdir(TASKS_DOCS_DIR, { recursive: true })
+    .then(() => fs.writeFile(taskDocPath, content, { encoding: "utf-8" }));
+}
+
+/////////////////////////
+//// Task page utils ////
+/////////////////////////
+
+const TASKS_API_URL = "https://huggingface.co/api/tasks";
+console.log(`   🕸️  Fetching ${TASKS_API_URL}`);
+const response = await fetch(TASKS_API_URL);
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+const TASKS_DATA = (await response.json()) as any;
+
+///////////////////////
+//// Snippet utils ////
+///////////////////////
+
+const GET_SNIPPET_FN = {
+  curl: snippets.curl.getCurlInferenceSnippet,
+  js: snippets.js.getJsInferenceSnippet,
+  python: snippets.python.getPythonInferenceSnippet,
+} as const;
+
+const HAS_SNIPPET_FN = {
+  curl: snippets.curl.hasCurlInferenceSnippet,
+  js: snippets.js.hasJsInferenceSnippet,
+  python: snippets.python.hasPythonInferenceSnippet,
+} as const;
+
+export function getInferenceSnippet(
+  id: string,
+  pipeline_tag: PipelineType,
+  language: InferenceSnippetLanguage,
+): string | undefined {
+  const modelData = {
+    id,
+    pipeline_tag,
+    mask_token: "",
+    library_name: "",
+    config: {},
+  };
+  if (HAS_SNIPPET_FN[language](modelData)) {
+    return GET_SNIPPET_FN[language](modelData, "hf_***");
+  }
+}
+
+/////////////////////
+//// Specs utils ////
+/////////////////////
+
+type SpecNameType = "input" | "output" | "stream_output";
+
+const SPECS_URL_TEMPLATE = Handlebars.compile(
+  `https://raw.githubusercontent.com/huggingface/huggingface.js/main/packages/tasks/src/tasks/{{task}}/spec/{{name}}.json`,
+);
+
+async function fetchOneSpec(
+  task: PipelineType,
+  name: SpecNameType,
+): Promise<JsonObject | undefined> {
+  const url = SPECS_URL_TEMPLATE({ task, name });
+  console.log(`   🕸️  Fetching ${task} ${name} specs`);
+  return fetch(url)
+    .then((res) => res.json())
+    .catch(() => undefined);
+}
+
+async function fetchSpecs(
+  task: PipelineType,
+): Promise<
+  Record<"input" | "output" | "stream_output", JsonObject | undefined>
+> {
+  return {
+    input: await fetchOneSpec(task, "input"),
+    output: await fetchOneSpec(task, "output"),
+    stream_output: await fetchOneSpec(task, "stream_output"),
+  };
+}
+
+function processPayloadSchema(schema: any, prefix: string = ""): JsonObject[] {
+  let rows: JsonObject[] = [];
+
+  Object.entries(schema.properties || {}).forEach(
+    ([key, value]: [string, any]) => {
+      const isRequired = schema.required?.includes(key);
+      let type = value.type || "object";
+
+      if (value.$ref) {
+        // Handle references
+        const refSchemaKey = value.$ref.split("/").pop();
+        value = schema.$defs?.[refSchemaKey!];
+      }
+
+      const description = value.description || "";
+      const isObject = type === "object" && value.properties;
+      const row = {
+        name: `${prefix}${key}`,
+        type: type,
+        description: description,
+        required: isRequired ? "required" : "optional",
+      };
+      rows.push(row);
+
+      if (isObject) {
+        // Recursively process nested objects
+        rows = rows.concat(
+          processPayloadSchema(
+            value,
+            prefix + "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;",
+          ),
+        );
+      }
+    },
+  );
+
+  return rows;
+}
+
+//////////////////////////
+//// Inline templates ////
+//////////////////////////
+
+const TIP_LINK_TO_TASK_PAGE_TEMPLATE = Handlebars.compile(`<Tip>
+
+For more details about the \`{{task}}\` task, check out its [dedicated page](https://huggingface.co/tasks/{{task}})! You will find examples and related materials.
+
+</Tip>`);
+
+const TIP_LIST_MODELS_LINK_TEMPLATE = Handlebars.compile(
+  `This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag={{task}}&sort=trending).`,
+);
+
+const SPECS_HEADERS = await readTemplate("specs-headers");
+const SNIPPETS_TEMPLATE = Handlebars.compile(
+  await readTemplate("snippets-template"),
+);
+const SPECS_PAYLOAD_TEMPLATE = Handlebars.compile(
+  await readTemplate("specs-payload"),
+);
+const SPECS_OUTPUT_TEMPLATE = Handlebars.compile(
+  await readTemplate("specs-output"),
+);
+
+////////////////////
+//// Data utils ////
+////////////////////
+
+const TASKS: PipelineType[] = ["image-to-image", "text-to-image"];
+
+const DATA: {
+  constants: {
+    specsHeaders: string;
+  };
+  models: Record<string, { id: string; description: string }[]>;
+  snippets: Record<string, string>;
+  specs: Record<
+    string,
+    {
+      input: string | undefined;
+      output: string | undefined;
+      stream_output: string | undefined;
+    }
+  >;
+  tips: {
+    linksToTaskPage: Record<string, string>;
+    listModelsLink: Record<string, string>;
+  };
+} = {
+  constants: {
+    specsHeaders: SPECS_HEADERS,
+  },
+  models: {},
+  snippets: {},
+  specs: {},
+  tips: { linksToTaskPage: {}, listModelsLink: {} },
+};
+
+// Check for each model if inference status is "warm"
+await Promise.all(
+  TASKS.map(async (task) => {
+    await Promise.all(
+      TASKS_DATA[task].models.map(
+        async (model: {
+          id: string;
+          description: string;
+          inference: string | undefined;
+        }) => {
+          console.log(`   ⚡ Checking inference status ${model.id}`);
+          const modelData = await fetch(
+            `https://huggingface.co/api/models/${model.id}?expand[]=inference`,
+          ).then((res) => res.json());
+          model.inference = modelData.inference;
+        },
+      ),
+    );
+  }),
+);
+
+// Fetch recommended models
+TASKS.forEach((task) => {
+  DATA.models[task] = TASKS_DATA[task].models;
+});
+
+// Fetch snippets
+// TODO: render snippets only if they are available
+TASKS.forEach((task) => {
+  const mainModel = TASKS_DATA[task].models[0].id;
+  const taskSnippets = {
+    curl: getInferenceSnippet(mainModel, task, "curl"),
+    python: getInferenceSnippet(mainModel, task, "python"),
+    javascript: getInferenceSnippet(mainModel, task, "js"),
+  };
+  DATA.snippets[task] = SNIPPETS_TEMPLATE({
+    taskSnippets,
+    taskSnakeCase: task.replace("-", "_"),
+    taskAttached: task.replace("-", ""),
+  });
+});
+
+// Render specs
+await Promise.all(
+  TASKS.map(async (task) => {
+    const specs = await fetchSpecs(task);
+    DATA.specs[task] = {
+      input: specs.input
+        ? SPECS_PAYLOAD_TEMPLATE({ schema: processPayloadSchema(specs.input) })
+        : undefined,
+      output: specs.output
+        ? SPECS_OUTPUT_TEMPLATE({ schema: processPayloadSchema(specs.output) })
+        : undefined,
+      stream_output: specs.stream_output
+        ? SPECS_OUTPUT_TEMPLATE({
+            schema: processPayloadSchema(specs.stream_output),
+          })
+        : undefined,
+    };
+  }),
+);
+
+// Render tips
+TASKS.forEach((task) => {
+  DATA.tips.linksToTaskPage[task] = TIP_LINK_TO_TASK_PAGE_TEMPLATE({ task });
+  DATA.tips.listModelsLink[task] = TIP_LIST_MODELS_LINK_TEMPLATE({ task });
+});
+
+/////////////////////////
+//// Rendering utils ////
+/////////////////////////
+
+async function renderTemplate(
+  templateName: string,
+  data: JsonObject,
+): Promise<string> {
+  console.log(`🎨  Rendering ${templateName}`);
+  const template = Handlebars.compile(await readTemplate(templateName));
+  return template(data);
+}
+
+await Promise.all(
+  TASKS.map(async (task) => {
+    // @ts-ignore
+    const rendered = await renderTemplate(task, DATA);
+    await writeTaskDoc(task, rendered);
+  }),
+);
+
+console.log("✅ All done!");
diff --git a/scripts/api-inference/templates/image_to_image.handlebars b/scripts/api-inference/templates/image_to_image.handlebars
new file mode 100644
index 000000000..b432eab19
--- /dev/null
+++ b/scripts/api-inference/templates/image_to_image.handlebars
@@ -0,0 +1,36 @@
+## Image-to-image
+
+Image-to-image is the task of transforming a source image to match the characteristics of a target image or a target image domain.
+Any image manipulation and enhancement is possible with image to image models.
+
+Use cases heavily depend on the model and the dataset it was trained on, but some common use cases include:
+- Style transfer
+- Image colorization
+- Image super-resolution
+- Image inpainting
+
+{{{tips.linksToTaskPage.image-to-image}}}
+
+### Recommended models
+
+{{#each models.image-to-image}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.image-to-image}}}
+
+### API specification
+
+#### Request
+
+{{{specs.image-to-image.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+{{{specs.image-to-image.output}}}
+
+### Using the API
+
+{{{snippets.image-to-image}}}
diff --git a/scripts/api-inference/templates/snippets_template.handlebars b/scripts/api-inference/templates/snippets_template.handlebars
new file mode 100644
index 000000000..2d0f099e2
--- /dev/null
+++ b/scripts/api-inference/templates/snippets_template.handlebars
@@ -0,0 +1,42 @@
+{{#if (or taskSnippets.curl taskSnippets.python taskSnippets.javascript)}}
+
+<inferencesnippet>
+
+{{!-- cURL snippet (if exists) --}}
+{{#if taskSnippets.curl}}
+<curl>
+```bash
+{{{taskSnippets.curl}}}
+```
+</curl>
+{{/if}}
+
+{{!-- Python snippet (if exists) --}}
+{{#if taskSnippets.python}}
+<python>
+```py
+{{{taskSnippets.python}}}
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.{{taskSnakeCase}}).
+</python>
+{{/if}}
+
+{{!-- JavaScript snippet (if exists) --}}
+{{#if taskSnippets.javascript}}
+<js>
+```js
+{{{taskSnippets.javascript}}}
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#{{taskAttached}}).
+</js>
+{{/if}}
+
+</inferencesnippet>
+
+{{else}}
+
+No snippet available for this task.
+
+{{/if}}
\ No newline at end of file
diff --git a/scripts/api-inference/templates/specs_headers.handlebars b/scripts/api-inference/templates/specs_headers.handlebars
new file mode 100644
index 000000000..44b28ecc8
--- /dev/null
+++ b/scripts/api-inference/templates/specs_headers.handlebars
@@ -0,0 +1,5 @@
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string, optional_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, optional, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, optional, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
diff --git a/scripts/api-inference/templates/specs_output.handlebars b/scripts/api-inference/templates/specs_output.handlebars
new file mode 100644
index 000000000..7f3391b98
--- /dev/null
+++ b/scripts/api-inference/templates/specs_output.handlebars
@@ -0,0 +1,5 @@
+| Body |  |
+| :--- | :--- |
+{{#each schema}}
+| **{{{name}}}** | {{{description}}} |
+{{/each}}
\ No newline at end of file
diff --git a/scripts/api-inference/templates/specs_payload.handlebars b/scripts/api-inference/templates/specs_payload.handlebars
new file mode 100644
index 000000000..70460b184
--- /dev/null
+++ b/scripts/api-inference/templates/specs_payload.handlebars
@@ -0,0 +1,5 @@
+| Payload |  |  |
+| :--- | :--- | :--- |
+{{#each schema}}
+| **{{{name}}}** | _{{type}}, {{required}}_ | {{{description}}} |
+{{/each}}
\ No newline at end of file
diff --git a/scripts/api-inference/templates/text_to_image.handlebars b/scripts/api-inference/templates/text_to_image.handlebars
new file mode 100644
index 000000000..6c9c568d1
--- /dev/null
+++ b/scripts/api-inference/templates/text_to_image.handlebars
@@ -0,0 +1,29 @@
+## Text-to-image
+
+Generate an image based on a given text prompt.
+
+{{{tips.linksToTaskPage.text-to-image}}}
+
+### Recommended models
+
+{{#each models.text-to-image}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.text-to-image}}}
+
+### API specification
+
+#### Request
+
+{{{specs.text-to-image.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+{{{specs.text-to-image.output}}}
+
+### Using the API
+
+{{{snippets.text-to-image}}}
diff --git a/scripts/api-inference/tsconfig.json b/scripts/api-inference/tsconfig.json
new file mode 100644
index 000000000..20b47e4ab
--- /dev/null
+++ b/scripts/api-inference/tsconfig.json
@@ -0,0 +1,20 @@
+{
+  "compilerOptions": {
+    "allowSyntheticDefaultImports": true,
+    "lib": ["ES2022", "DOM"],
+    "module": "ESNext",
+    "target": "ESNext",
+    "moduleResolution": "node",
+    "forceConsistentCasingInFileNames": true,
+    "strict": true,
+    "noImplicitAny": true,
+    "strictNullChecks": true,
+    "skipLibCheck": true,
+    "noImplicitOverride": true,
+    "outDir": "./dist",
+    "declaration": true,
+    "declarationMap": true
+  },
+  "include": ["scripts"],
+  "exclude": ["dist"]
+}

From 0c3410677b2a5e555b99c9d67a0400c880d16712 Mon Sep 17 00:00:00 2001
From: osanseviero <osanseviero@gmail.com>
Date: Tue, 27 Aug 2024 15:45:31 +0200
Subject: [PATCH 24/38] Add getting started

---
 docs/api-inference/getting_started.md | 76 ++++++++++++++++++++++++++-
 docs/api-inference/parameters.md      |  2 +-
 2 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/docs/api-inference/getting_started.md b/docs/api-inference/getting_started.md
index 6d668cf10..ad21542ec 100644
--- a/docs/api-inference/getting_started.md
+++ b/docs/api-inference/getting_started.md
@@ -1,3 +1,77 @@
 # Getting Started
 
-TODO:
\ No newline at end of file
+The Serverless Inference API allows you to easily do inference on a wide range of models and tasks. You can do requests with your favorite tools (Python, cURL, etc). We also provide a Python SDK (`huggingface_hub`) to make it even easier.
+
+We'll do a minimal example using a [sentiment classification model](https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest). Please visit task-specific parameters and further documentation in our [API Reference](./parameters.md).
+
+## Getting a Token
+
+Using the Serverless Inference API requires passing a user token in the request headers. You can get a token by signing up on the Hugging Face website and then going to the [tokens page](https://huggingface.co/settings/tokens). We recommend creating a `Fine-grained` token with the scope to `Make calls to the serverless Inference API`.
+
+TODO: add screenshot
+
+## cURL
+
+```bash
+curl https://api-inference.huggingface.co/models/cardiffnlp/twitter-roberta-base-sentiment-latest \
+    -X POST \
+    -d '{"inputs": "Today is a nice day"}' \
+    -H "Authorization: Bearer hf_***" \
+    -H "Content-Type: application/json"
+```
+
+## Python
+
+You can use the `requests` library to make a request to the Inference API.
+
+```python
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/cardiffnlp/twitter-roberta-base-sentiment-latest"
+headers = {"Authorization": "Bearer hf_***"}
+
+payload = {"inputs": "Today is a nice day"}
+response = requests.post(API_URL, headers=headers, json=payload)
+response.json()
+```
+
+Hugging Face also provides a [`InferenceClient`](https://huggingface.co/docs/huggingface_hub/guides/inference) that handles inference, caching, async, and more. Make sure to install it with `pip install huggingface_hub` first
+
+```python
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(model="cardiffnlp/twitter-roberta-base-sentiment-latest", token="hf_***")
+client.text_classification("Today is a nice day")
+```
+
+## JavaScript
+
+```js
+import fetch from "node-fetch";
+
+async function query(data) {
+    const response = await fetch(
+        "https://api-inference.huggingface.co/models/MODEL_ID",
+        {
+            method: "POST",
+            headers: {
+                Authorization: `Bearer cardiffnlp/twitter-roberta-base-sentiment-latest`,
+                "Content-Type": "application/json",
+            },
+            body: JSON.stringify(data),
+        }
+    );
+    const result = await response.json();
+    return result;
+}
+
+query({
+    inputs: "Today is a nice day"
+}).then((response) => {
+    console.log(JSON.stringify(response, null, 2));
+});
+```
+
+## Next Steps
+
+Now that you know the basics, you can explore the [API Reference](./parameters.md) to learn more about task-specific settings and parameters. 
\ No newline at end of file
diff --git a/docs/api-inference/parameters.md b/docs/api-inference/parameters.md
index a89413c29..905420fbe 100644
--- a/docs/api-inference/parameters.md
+++ b/docs/api-inference/parameters.md
@@ -14,7 +14,7 @@ Table with
 
 ### Caching
 
-There is a cache layer on the inference API to speed up requests when the inputs are exactly the same. For many models, such as classifiers and embedding models, results are deterministic meaning you can safely use the cached results. However, if you use a nondeterministic model, you might want to disable the cache mechanism resulting in a real new query.
+There is a cache layer on the inference API to speed up requests when the inputs are exactly the same. Many models, such as classifiers and embedding models, can use those results as is if they are deterministic, meaning the results will be the same. Howevr, if you use a nondeterministic model, you can disable the cache mechanism from being used, resulting in a real new query.
 
 To do this, you can add `x-use-cache:false` to the request headers. For example
 

From ac640c8897057364f686357d0a19738de5e288c5 Mon Sep 17 00:00:00 2001
From: Omar Sanseviero <osanseviero@gmail.com>
Date: Wed, 28 Aug 2024 11:01:41 +0200
Subject: [PATCH 25/38] Update docs/api-inference/getting_started.md

Co-authored-by: Lucain <lucain@huggingface.co>
---
 docs/api-inference/getting_started.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/api-inference/getting_started.md b/docs/api-inference/getting_started.md
index ad21542ec..c0647b935 100644
--- a/docs/api-inference/getting_started.md
+++ b/docs/api-inference/getting_started.md
@@ -9,6 +9,7 @@ We'll do a minimal example using a [sentiment classification model](https://hugg
 Using the Serverless Inference API requires passing a user token in the request headers. You can get a token by signing up on the Hugging Face website and then going to the [tokens page](https://huggingface.co/settings/tokens). We recommend creating a `Fine-grained` token with the scope to `Make calls to the serverless Inference API`.
 
 TODO: add screenshot
+For more details about user tokens, check out [this guide](https://huggingface.co/docs/hub/en/security-tokens).
 
 ## cURL
 

From b785d8b4725928599ee9bef485b78eaea30c2781 Mon Sep 17 00:00:00 2001
From: Lucain <lucain@huggingface.co>
Date: Wed, 28 Aug 2024 17:08:29 +0200
Subject: [PATCH 26/38] Draft to add text-generation parameters (#1393)

* first draft to add text-generation parameters

* headers

* more structure

* add chat-completion

* better handling of arrays

* better handling of parameters

* Add new tasks pages (fill mask, summarization, question answering, sentence similarity) (#1394)

* add fill mask

* add summarization

* add question answering

* Table question answering

* handle array output

* Add sentence similarity

* text classification (almost)

* better with an enum

* Add mask token

* capitalize

* remove sentence-similarity

* Update docs/api-inference/tasks/table_question_answering.md

Co-authored-by: Omar Sanseviero <osanseviero@gmail.com>

---------

Co-authored-by: Omar Sanseviero <osanseviero@gmail.com>

* mention chat completion in text generation docs

* fix chat completion snippets

---------

Co-authored-by: Omar Sanseviero <osanseviero@gmail.com>
---
 docs/api-inference/_toctree.yml               |  16 +-
 docs/api-inference/tasks/chat_completion.md   | 202 +++++++++++++++
 docs/api-inference/tasks/fill_mask.md         | 114 +++++++-
 docs/api-inference/tasks/image_to_image.md    |  31 ++-
 .../api-inference/tasks/question_answering.md | 127 +++++++++
 docs/api-inference/tasks/summarization.md     | 106 ++++++++
 .../tasks/table_question_answering.md         | 138 ++++++++++
 .../tasks/text_classification.md              | 112 ++++++++
 docs/api-inference/tasks/text_generation.md   | 203 +++++++++++++++
 docs/api-inference/tasks/text_to_image.md     |  33 +--
 scripts/api-inference/scripts/generate.ts     | 243 +++++++++++++++---
 .../{ => common}/snippets_template.handlebars |   0
 .../templates/common/specs_headers.handlebars |   9 +
 .../templates/common/specs_output.handlebars  |   9 +
 .../templates/common/specs_payload.handlebars |   9 +
 .../templates/specs_headers.handlebars        |   5 -
 .../templates/specs_output.handlebars         |   5 -
 .../templates/specs_payload.handlebars        |   5 -
 .../templates/task/chat_completion.handlebars |  38 +++
 .../templates/task/fill_mask.handlebars       |  29 +++
 .../{ => task}/image_to_image.handlebars      |   2 +-
 .../task/question_answering.handlebars        |  29 +++
 .../templates/task/summarization.handlebars   |  29 +++
 .../task/table_question_answering.handlebars  |  29 +++
 .../task/text_classification.handlebars       |  29 +++
 .../templates/task/text_generation.handlebars |  39 +++
 .../{ => task}/text_to_image.handlebars       |   2 +-
 27 files changed, 1503 insertions(+), 90 deletions(-)
 create mode 100644 docs/api-inference/tasks/chat_completion.md
 create mode 100644 docs/api-inference/tasks/question_answering.md
 create mode 100644 docs/api-inference/tasks/summarization.md
 create mode 100644 docs/api-inference/tasks/table_question_answering.md
 create mode 100644 docs/api-inference/tasks/text_classification.md
 create mode 100644 docs/api-inference/tasks/text_generation.md
 rename scripts/api-inference/templates/{ => common}/snippets_template.handlebars (100%)
 create mode 100644 scripts/api-inference/templates/common/specs_headers.handlebars
 create mode 100644 scripts/api-inference/templates/common/specs_output.handlebars
 create mode 100644 scripts/api-inference/templates/common/specs_payload.handlebars
 delete mode 100644 scripts/api-inference/templates/specs_headers.handlebars
 delete mode 100644 scripts/api-inference/templates/specs_output.handlebars
 delete mode 100644 scripts/api-inference/templates/specs_payload.handlebars
 create mode 100644 scripts/api-inference/templates/task/chat_completion.handlebars
 create mode 100644 scripts/api-inference/templates/task/fill_mask.handlebars
 rename scripts/api-inference/templates/{ => task}/image_to_image.handlebars (97%)
 create mode 100644 scripts/api-inference/templates/task/question_answering.handlebars
 create mode 100644 scripts/api-inference/templates/task/summarization.handlebars
 create mode 100644 scripts/api-inference/templates/task/table_question_answering.handlebars
 create mode 100644 scripts/api-inference/templates/task/text_classification.handlebars
 create mode 100644 scripts/api-inference/templates/task/text_generation.handlebars
 rename scripts/api-inference/templates/{ => task}/text_to_image.handlebars (96%)

diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
index a68f3abfb..247a96201 100644
--- a/docs/api-inference/_toctree.yml
+++ b/docs/api-inference/_toctree.yml
@@ -12,11 +12,23 @@
   - local: parameters
     title: Parameters
   - sections:
+    - local: tasks/chat_completion
+      title: Chat Completion
     - local: tasks/fill_mask
       title: Fill Mask
     - local: tasks/image_to_image
-      title: Image-to-image
+      title: Image to Image
+    - local: tasks/question_answering
+      title: Question Answering
+    - local: tasks/summarization
+      title: Summarization
+    - local: tasks/table_question_answering
+      title: Table Question Answering
+    - local: tasks/text_classification
+      title: Text Classification
+    - local: tasks/text_generation
+      title: Text Generation
     - local: tasks/text_to_image
-      title: Text-to-image
+      title: Text to Image
     title: Detailed Task Parameters
   title: API Reference
\ No newline at end of file
diff --git a/docs/api-inference/tasks/chat_completion.md b/docs/api-inference/tasks/chat_completion.md
new file mode 100644
index 000000000..c01fe9ac1
--- /dev/null
+++ b/docs/api-inference/tasks/chat_completion.md
@@ -0,0 +1,202 @@
+## Chat Completion
+
+Generate a response given a list of messages.
+This is a subtask of [`text-generation`](./text_generation) designed to generate responses in a conversational context.
+
+
+
+### Recommended models
+
+- [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions.
+- [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
+- [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct): Small yet powerful text generation model.
+- [AI-MO/NuminaMath-7B-TIR](https://huggingface.co/AI-MO/NuminaMath-7B-TIR): A very powerful model that can solve mathematical problems.
+- [HuggingFaceH4/starchat2-15b-v0.1](https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1): Strong coding assistant model.
+- [mistralai/Mistral-Nemo-Instruct-2407](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407): Very strong open-source large language model.
+
+
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **frequency_penalty** | _number_ | Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. |
+| **logprobs** | _boolean_ | Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message. |
+| **max_tokens** | _integer_ | The maximum number of tokens that can be generated in the chat completion. |
+| **messages*** | _object[]_ | A list of messages comprising the conversation so far. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;content** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;name** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;role*** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;tool_calls** | _object[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;function*** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;arguments*** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;description** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;name*** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;id*** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;type*** | _string_ |  |
+| **presence_penalty** | _number_ | Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics |
+| **seed** | _integer_ |  |
+| **stop** | _string[]_ | Up to 4 sequences where the API will stop generating further tokens. |
+| **stream** | _boolean_ |  |
+| **temperature** | _number_ | What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.  We generally recommend altering this or `top_p` but not both. |
+| **tool_choice** | _object_ | One of the following: |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#1)** |  |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;FunctionName*** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#2)** |  | Possible values: OneOf. |
+| **tool_prompt** | _string_ | A prompt to be appended before the tools |
+| **tools** | _object[]_ | A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;function*** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;arguments*** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;description** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;name*** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;type*** | _string_ |  |
+| **top_logprobs** | _integer_ | An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used. |
+| **top_p** | _number_ | An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+Output type depends on the `stream` input parameter.
+If `stream` is `false` (default), the response will be a JSON object with the following fields:
+
+| Body |  |
+| :--- | :--- | :--- |
+| **choices** | _object[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;finish_reason** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;index** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;logprobs** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;content** | _object[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;logprob** | _number_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;token** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_logprobs** | _object[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;logprob** | _number_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;token** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;message** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;content** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;name** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;role** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;tool_calls** | _object[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;function** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;arguments** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;description** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;name** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;id** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;type** | _string_ |  |
+| **created** | _integer_ |  |
+| **id** | _string_ |  |
+| **model** | _string_ |  |
+| **object** | _string_ |  |
+| **system_fingerprint** | _string_ |  |
+| **usage** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;completion_tokens** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;prompt_tokens** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;total_tokens** | _integer_ |  |
+
+
+If `stream` is `true`, generated tokens are returned as a stream, using Server-Sent Events (SSE).
+For more information about streaming, check out [this guide](https://huggingface.co/docs/text-generation-inference/conceptual/streaming).
+
+| Body |  |
+| :--- | :--- | :--- |
+| **choices** | _object[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;delta** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;content** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;role** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;tool_calls** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;function** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;arguments** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;name** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;id** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;index** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;type** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;finish_reason** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;index** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;logprobs** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;content** | _object[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;logprob** | _number_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;token** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_logprobs** | _object[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;logprob** | _number_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;token** | _string_ |  |
+| **created** | _integer_ |  |
+| **id** | _string_ |  |
+| **model** | _string_ |  |
+| **object** | _string_ |  |
+| **system_fingerprint** | _string_ |  |
+
+
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl 'https://api-inference.huggingface.co/models/google/gemma-2-2b-it/v1/chat/completions' \
+-H "Authorization: Bearer hf_***" \
+-H 'Content-Type: application/json' \
+-d '{
+	"model": "google/gemma-2-2b-it",
+	"messages": [{"role": "user", "content": "What is the capital of France?"}],
+	"max_tokens": 500,
+	"stream": false
+}'
+
+```
+</curl>
+
+<python>
+```py
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    "google/gemma-2-2b-it",
+    token="hf_***",
+)
+
+for message in client.chat_completion(
+	messages=[{"role": "user", "content": "What is the capital of France?"}],
+	max_tokens=500,
+	stream=True,
+):
+    print(message.choices[0].delta.content, end="")
+
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion).
+</python>
+
+<js>
+```js
+import { HfInference } from "@huggingface/inference";
+
+const inference = new HfInference("hf_***");
+
+for await (const chunk of inference.chatCompletionStream({
+	model: "google/gemma-2-2b-it",
+	messages: [{ role: "user", content: "What is the capital of France?" }],
+	max_tokens: 500,
+})) {
+	process.stdout.write(chunk.choices[0]?.delta?.content || "");
+}
+
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#chatcompletion).
+</js>
+
+</inferencesnippet>
+
+
diff --git a/docs/api-inference/tasks/fill_mask.md b/docs/api-inference/tasks/fill_mask.md
index 64260ae39..197fef37c 100644
--- a/docs/api-inference/tasks/fill_mask.md
+++ b/docs/api-inference/tasks/fill_mask.md
@@ -1,6 +1,114 @@
-## Fill Mask
+## Fill-mask
 
-Mask filling is the task of predicting the right word (token to be precise) in the middle of a sequence. 
+Mask filling is the task of predicting the right word (token to be precise) in the middle of a sequence.
+
+<Tip>
+
+For more details about the `fill-mask` task, check out its [dedicated page](https://huggingface.co/tasks/fill-mask)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+- [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased): A faster and smaller model than the famous BERT model.
+- [xlm-roberta-base](https://huggingface.co/xlm-roberta-base): A multilingual model trained on 100 languages.
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=fill-mask&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The text with masked tokens |
+| **parameters** | _object_ | Additional inference parameters for Fill Mask |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ | When passed, overrides the number of predictions to return. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;targets** | _string[]_ | When passed, the model will limit the scores to the passed targets instead of looking up in the whole vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first resulting token will be used (with a warning, and that might be slower). |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | Output is an array of objects. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;sequence** | _string_ | The corresponding input with the mask token prediction. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The corresponding probability |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;token** | _integer_ | The predicted token id (to replace the masked one). |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;token_str** | _string_ | The predicted token (to replace the masked one). |
+
+
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/distilbert-base-uncased \
+	-X POST \
+	-d '{"inputs": "The answer to the universe is [MASK]."}' \
+	-H 'Content-Type: application/json' \
+	-H "Authorization: Bearer hf_***"
+
+```
+</curl>
+
+<python>
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/distilbert-base-uncased"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+	
+output = query({
+	"inputs": "The answer to the universe is [MASK].",
+})
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.fill_mask).
+</python>
+
+<js>
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://api-inference.huggingface.co/models/distilbert-base-uncased",
+		{
+			headers: {
+				Authorization: "Bearer hf_***"
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({"inputs": "The answer to the universe is [MASK]."}).then((response) => {
+	console.log(JSON.stringify(response));
+});
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#fillmask).
+</js>
+
+</inferencesnippet>
 
-Automated docs below
 
diff --git a/docs/api-inference/tasks/image_to_image.md b/docs/api-inference/tasks/image_to_image.md
index 1b5e2241e..eb197489e 100644
--- a/docs/api-inference/tasks/image_to_image.md
+++ b/docs/api-inference/tasks/image_to_image.md
@@ -1,4 +1,4 @@
-## Image-to-image
+## Image to Image
 
 Image-to-image is the task of transforming a source image to match the characteristics of a target image or a target image domain.
 Any image manipulation and enhancement is possible with image to image models.
@@ -31,28 +31,31 @@ This is only a subset of the supported models. Find the model that suits you bes
 
 | Payload |  |  |
 | :--- | :--- | :--- |
-| **inputs** | _object, required_ | The input image data |
-| **parameters** | _object, optional_ | Additional inference parameters for Image To Image |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;guidance_scale** | _number, optional_ | For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;negative_prompt** | _array, optional_ | One or several prompt to guide what NOT to include in image generation. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_inference_steps** | _integer, optional_ | For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;target_size** | _object, optional_ | The size in pixel of the output image |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;width** | _integer, required_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;height** | _integer, required_ |  |
+| **inputs*** | _object_ | The input image data |
+| **parameters** | _object_ | Additional inference parameters for Image To Image |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;guidance_scale** | _number_ | For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;negative_prompt** | _string[]_ | One or several prompt to guide what NOT to include in image generation. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_inference_steps** | _integer_ | For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;target_size** | _object_ | The size in pixel of the output image |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;width*** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;height*** | _integer_ |  |
 
 
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
 | Headers |   |    |
 | :--- | :--- | :--- |
-| **authorization** | _string, optional_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
-| **x-use-cache** | _boolean, optional, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
-| **x-wait-for-model** | _boolean, optional, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
 
+For more information about Inference API headers, check out the parameters [guide](../parameters).
 
 #### Response
 
 | Body |  |
-| :--- | :--- |
-| **image** | The output image |
+| :--- | :--- | :--- |
+| **image** | _object_ | The output image |
 
 
 ### Using the API
diff --git a/docs/api-inference/tasks/question_answering.md b/docs/api-inference/tasks/question_answering.md
new file mode 100644
index 000000000..3f724c9c2
--- /dev/null
+++ b/docs/api-inference/tasks/question_answering.md
@@ -0,0 +1,127 @@
+## Question Answering
+
+Question Answering models can retrieve the answer to a question from a given text, which is useful for searching for an answer in a document.
+
+<Tip>
+
+For more details about the `question-answering` task, check out its [dedicated page](https://huggingface.co/tasks/question-answering)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+- [deepset/roberta-base-squad2](https://huggingface.co/deepset/roberta-base-squad2): A robust baseline model for most question answering domains.
+- [google/tapas-base-finetuned-wtq](https://huggingface.co/google/tapas-base-finetuned-wtq): A special model that can answer questions from tables!
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=question-answering&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _object_ | One (context, question) pair to answer |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;context*** | _string_ | The context to be used for answering the question |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;question*** | _string_ | The question to be answered |
+| **parameters** | _object_ | Additional inference parameters for Question Answering |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ | The number of answers to return (will be chosen by order of likelihood). Note that we return less than topk answers if there are not enough options available within the context. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;doc_stride** | _integer_ | If the context is too long to fit with the question for the model, it will be split in several chunks with some overlap. This argument controls the size of that overlap. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;max_answer_len** | _integer_ | The maximum length of predicted answers (e.g., only answers with a shorter length are considered). |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;max_seq_len** | _integer_ | The maximum length of the total sentence (context + question) in tokens of each chunk passed to the model. The context will be split in several chunks (using docStride as overlap) if needed. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;max_question_len** | _integer_ | The maximum length of the question after tokenization. It will be truncated if needed. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;handle_impossible_answer** | _boolean_ | Whether to accept impossible as an answer. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;align_to_words** | _boolean_ | Attempts to align the answer to real words. Improves quality on space separated languages. Might hurt on non-space-separated languages (like Japanese or Chinese) |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | Output is an array of objects. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;answer** | _string_ | The answer to the question. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The probability associated to the answer. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;start** | _integer_ | The character position in the input where the answer begins. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;end** | _integer_ | The character position in the input where the answer ends. |
+
+
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/deepset/roberta-base-squad2 \
+	-X POST \
+	-d '{"inputs": { "question": "What is my name?", "context": "My name is Clara and I live in Berkeley." }}' \
+	-H 'Content-Type: application/json' \
+	-H "Authorization: Bearer hf_***"
+
+```
+</curl>
+
+<python>
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/deepset/roberta-base-squad2"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+	
+output = query({
+	"inputs": {
+	"question": "What is my name?",
+	"context": "My name is Clara and I live in Berkeley."
+},
+})
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.question_answering).
+</python>
+
+<js>
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://api-inference.huggingface.co/models/deepset/roberta-base-squad2",
+		{
+			headers: {
+				Authorization: "Bearer hf_***"
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({"inputs": {
+	"question": "What is my name?",
+	"context": "My name is Clara and I live in Berkeley."
+}}).then((response) => {
+	console.log(JSON.stringify(response));
+});
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#questionanswering).
+</js>
+
+</inferencesnippet>
+
+
diff --git a/docs/api-inference/tasks/summarization.md b/docs/api-inference/tasks/summarization.md
new file mode 100644
index 000000000..f0ed74b66
--- /dev/null
+++ b/docs/api-inference/tasks/summarization.md
@@ -0,0 +1,106 @@
+## Summarization
+
+Summarization is the task of producing a shorter version of a document while preserving its important information. Some models can extract text from the original input, while other models can generate entirely new text.
+
+<Tip>
+
+For more details about the `summarization` task, check out its [dedicated page](https://huggingface.co/tasks/summarization)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+- [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn): A strong summarization model trained on English news articles. Excels at generating factual summaries.
+- [google/bigbird-pegasus-large-pubmed](https://huggingface.co/google/bigbird-pegasus-large-pubmed): A summarization model trained on medical articles.
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=summarization&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **summary_text** | _string_ | The summarized text. |
+
+
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/facebook/bart-large-cnn \
+	-X POST \
+	-d '{"inputs": "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."}' \
+	-H 'Content-Type: application/json' \
+	-H "Authorization: Bearer hf_***"
+
+```
+</curl>
+
+<python>
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+	
+output = query({
+	"inputs": "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
+})
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.summarization).
+</python>
+
+<js>
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://api-inference.huggingface.co/models/facebook/bart-large-cnn",
+		{
+			headers: {
+				Authorization: "Bearer hf_***"
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({"inputs": "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."}).then((response) => {
+	console.log(JSON.stringify(response));
+});
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#summarization).
+</js>
+
+</inferencesnippet>
+
+
diff --git a/docs/api-inference/tasks/table_question_answering.md b/docs/api-inference/tasks/table_question_answering.md
new file mode 100644
index 000000000..e3122e425
--- /dev/null
+++ b/docs/api-inference/tasks/table_question_answering.md
@@ -0,0 +1,138 @@
+## Table Question Answering
+
+Table Question Answering (Table QA) is the answering a question about an information on a given table.
+
+<Tip>
+
+For more details about the `table-question-answering` task, check out its [dedicated page](https://huggingface.co/tasks/table-question-answering)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+- [microsoft/tapex-base](https://huggingface.co/microsoft/tapex-base): A table question answering model that is capable of neural SQL execution, i.e., employ TAPEX to execute a SQL query on a given table.
+- [google/tapas-base-finetuned-wtq](https://huggingface.co/google/tapas-base-finetuned-wtq): A robust table question answering model.
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=table-question-answering&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _object_ | One (table, question) pair to answer |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;table*** | _object_ | The table to serve as context for the questions |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;question*** | _string_ | The question to be answered about the table |
+| **parameters** | _object_ | Additional inference parameters for Table Question Answering |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | Output is an array of objects. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;answer** | _string_ | The answer of the question given the table. If there is an aggregator, the answer will be preceded by `AGGREGATOR >`. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;coordinates** | _array[]_ | Coordinates of the cells of the answers. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;cells** | _string[]_ | List of strings made up of the answer cell values. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;aggregator** | _string_ | If the model has an aggregator, this returns the aggregator. |
+
+
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/microsoft/tapex-base \
+	-X POST \
+	-d '{"inputs": { "query": "How many stars does the transformers repository have?", "table": { "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": [ "Python", "Python", "Rust, Python and NodeJS" ] } }}' \
+	-H 'Content-Type: application/json' \
+	-H "Authorization: Bearer hf_***"
+
+```
+</curl>
+
+<python>
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/microsoft/tapex-base"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+	
+output = query({
+	"inputs": {
+	"query": "How many stars does the transformers repository have?",
+	"table": {
+		"Repository": ["Transformers", "Datasets", "Tokenizers"],
+		"Stars": ["36542", "4512", "3934"],
+		"Contributors": ["651", "77", "34"],
+		"Programming language": [
+			"Python",
+			"Python",
+			"Rust, Python and NodeJS"
+		]
+	}
+},
+})
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.table_question-answering).
+</python>
+
+<js>
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://api-inference.huggingface.co/models/microsoft/tapex-base",
+		{
+			headers: {
+				Authorization: "Bearer hf_***"
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({"inputs": {
+	"query": "How many stars does the transformers repository have?",
+	"table": {
+		"Repository": ["Transformers", "Datasets", "Tokenizers"],
+		"Stars": ["36542", "4512", "3934"],
+		"Contributors": ["651", "77", "34"],
+		"Programming language": [
+			"Python",
+			"Python",
+			"Rust, Python and NodeJS"
+		]
+	}
+}}).then((response) => {
+	console.log(JSON.stringify(response));
+});
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#tablequestion-answering).
+</js>
+
+</inferencesnippet>
+
+
diff --git a/docs/api-inference/tasks/text_classification.md b/docs/api-inference/tasks/text_classification.md
new file mode 100644
index 000000000..8fffb6654
--- /dev/null
+++ b/docs/api-inference/tasks/text_classification.md
@@ -0,0 +1,112 @@
+## Text Classification
+
+Text Classification is the task of assigning a label or class to a given text. Some use cases are sentiment analysis, natural language inference, and assessing grammatical correctness.
+
+<Tip>
+
+For more details about the `text-classification` task, check out its [dedicated page](https://huggingface.co/tasks/text-classification)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+- [distilbert-base-uncased-finetuned-sst-2-english](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english): A robust model trained for sentiment analysis.
+- [roberta-large-mnli](https://huggingface.co/roberta-large-mnli): Multi-genre natural language inference model.
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-classification&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The text to classify |
+| **parameters** | _object_ | Additional inference parameters for Text Classification |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;function_to_apply** | _enum_ | Possible values: sigmoid, softmax, none. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ | When specified, limits the output to the top K most probable classes. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _undefined[]_ | Output is an array of undefineds. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The predicted class label. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The corresponding probability. |
+
+
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/distilbert-base-uncased-finetuned-sst-2-english \
+	-X POST \
+	-d '{"inputs": "I like you. I love you"}' \
+	-H 'Content-Type: application/json' \
+	-H "Authorization: Bearer hf_***"
+
+```
+</curl>
+
+<python>
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/distilbert-base-uncased-finetuned-sst-2-english"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+	
+output = query({
+	"inputs": "I like you. I love you",
+})
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_classification).
+</python>
+
+<js>
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://api-inference.huggingface.co/models/distilbert-base-uncased-finetuned-sst-2-english",
+		{
+			headers: {
+				Authorization: "Bearer hf_***"
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({"inputs": "I like you. I love you"}).then((response) => {
+	console.log(JSON.stringify(response));
+});
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#textclassification).
+</js>
+
+</inferencesnippet>
+
+
diff --git a/docs/api-inference/tasks/text_generation.md b/docs/api-inference/tasks/text_generation.md
new file mode 100644
index 000000000..fb3e41b3f
--- /dev/null
+++ b/docs/api-inference/tasks/text_generation.md
@@ -0,0 +1,203 @@
+## Text Generation
+
+Generate text based on a prompt.
+
+If you are interested in a Chat Completion task, which generates a response based on a list of messages, check out the [`chat-completion`](./chat_completion) task.
+
+<Tip>
+
+For more details about the `text-generation` task, check out its [dedicated page](https://huggingface.co/tasks/text-generation)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+- [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions.
+- [bigcode/starcoder](https://huggingface.co/bigcode/starcoder): A code generation model that can generate code in 80+ languages.
+- [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
+- [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct): Small yet powerful text generation model.
+- [AI-MO/NuminaMath-7B-TIR](https://huggingface.co/AI-MO/NuminaMath-7B-TIR): A very powerful model that can solve mathematical problems.
+- [HuggingFaceH4/starchat2-15b-v0.1](https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1): Strong coding assistant model.
+- [mistralai/Mistral-Nemo-Instruct-2407](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407): Very strong open-source large language model.
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ |  |
+| **parameters** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;best_of** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;decoder_input_details** | _boolean_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;details** | _boolean_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;do_sample** | _boolean_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;frequency_penalty** | _number_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;grammar** | _object_ | One of the following: |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#1)** |  |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;type*** | _enum_ | Possible values: json. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;value*** | _object_ | A string that represents a [JSON Schema](https://json-schema.org/).  JSON Schema is a declarative language that allows to annotate JSON documents with types and descriptions. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#2)** |  |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;type*** | _enum_ | Possible values: regex. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;value*** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;max_new_tokens** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;repetition_penalty** | _number_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;return_full_text** | _boolean_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;seed** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;stop** | _string[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;temperature** | _number_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_n_tokens** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_p** | _number_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;truncate** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;typical_p** | _number_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;watermark** | _boolean_ |  |
+| **stream** | _boolean_ |  |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+Output type depends on the `stream` input parameter.
+If `stream` is `false` (default), the response will be a JSON object with the following fields:
+
+| Body |  |
+| :--- | :--- | :--- |
+| **details** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;best_of_sequences** | _object[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;finish_reason** | _enum_ | Possible values: length, eos_token, stop_sequence. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;generated_text** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;generated_tokens** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;prefill** | _object[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;id** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;logprob** | _number_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;text** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;seed** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;tokens** | _object[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;id** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;logprob** | _number_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;special** | _boolean_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;text** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_tokens** | _array[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;id** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;logprob** | _number_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;special** | _boolean_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;text** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;finish_reason** | _enum_ | Possible values: length, eos_token, stop_sequence. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;generated_tokens** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;prefill** | _object[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;id** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;logprob** | _number_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;text** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;seed** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;tokens** | _object[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;id** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;logprob** | _number_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;special** | _boolean_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;text** | _string_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_tokens** | _array[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;id** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;logprob** | _number_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;special** | _boolean_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;text** | _string_ |  |
+| **generated_text** | _string_ |  |
+
+
+If `stream` is `true`, generated tokens are returned as a stream, using Server-Sent Events (SSE).
+For more information about streaming, check out [this guide](https://huggingface.co/docs/text-generation-inference/conceptual/streaming).
+
+| Body |  |
+| :--- | :--- | :--- |
+| **details** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;finish_reason** | _enum_ | Possible values: length, eos_token, stop_sequence. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;generated_tokens** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;seed** | _integer_ |  |
+| **generated_text** | _string_ |  |
+| **index** | _integer_ |  |
+| **token** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;id** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;logprob** | _number_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;special** | _boolean_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;text** | _string_ |  |
+| **top_tokens** | _object[]_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;id** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;logprob** | _number_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;special** | _boolean_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;text** | _string_ |  |
+
+
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/google/gemma-2-2b-it \
+	-X POST \
+	-d '{"inputs": "Can you please let us know more details about your "}' \
+	-H 'Content-Type: application/json' \
+	-H "Authorization: Bearer hf_***"
+
+```
+</curl>
+
+<python>
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/google/gemma-2-2b-it"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+	
+output = query({
+	"inputs": "Can you please let us know more details about your ",
+})
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation).
+</python>
+
+<js>
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://api-inference.huggingface.co/models/google/gemma-2-2b-it",
+		{
+			headers: {
+				Authorization: "Bearer hf_***"
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({"inputs": "Can you please let us know more details about your "}).then((response) => {
+	console.log(JSON.stringify(response));
+});
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#textgeneration).
+</js>
+
+</inferencesnippet>
+
+
diff --git a/docs/api-inference/tasks/text_to_image.md b/docs/api-inference/tasks/text_to_image.md
index 810c8f68e..6697ca877 100644
--- a/docs/api-inference/tasks/text_to_image.md
+++ b/docs/api-inference/tasks/text_to_image.md
@@ -1,4 +1,4 @@
-## Text-to-image
+## Text to Image
 
 Generate an image based on a given text prompt.
 
@@ -23,29 +23,32 @@ This is only a subset of the supported models. Find the model that suits you bes
 
 | Payload |  |  |
 | :--- | :--- | :--- |
-| **inputs** | _string, required_ | The input text data (sometimes called "prompt" |
-| **parameters** | _object, optional_ | Additional inference parameters for Text To Image |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;guidance_scale** | _number, optional_ | For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;negative_prompt** | _array, optional_ | One or several prompt to guide what NOT to include in image generation. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_inference_steps** | _integer, optional_ | For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;target_size** | _object, optional_ | The size in pixel of the output image |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;width** | _integer, required_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;height** | _integer, required_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;scheduler** | _string, optional_ | For diffusion models. Override the scheduler with a compatible one |
+| **inputs*** | _string_ | The input text data (sometimes called "prompt") |
+| **parameters** | _object_ | Additional inference parameters for Text To Image |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;guidance_scale** | _number_ | For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;negative_prompt** | _string[]_ | One or several prompt to guide what NOT to include in image generation. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_inference_steps** | _integer_ | For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;target_size** | _object_ | The size in pixel of the output image |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;width*** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;height*** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;scheduler** | _string_ | For diffusion models. Override the scheduler with a compatible one |
 
 
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
 | Headers |   |    |
 | :--- | :--- | :--- |
-| **authorization** | _string, optional_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
-| **x-use-cache** | _boolean, optional, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
-| **x-wait-for-model** | _boolean, optional, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
 
+For more information about Inference API headers, check out the parameters [guide](../parameters).
 
 #### Response
 
 | Body |  |
-| :--- | :--- |
-| **image** | The generated image |
+| :--- | :--- | :--- |
+| **image** | _object_ | The generated image |
 
 
 ### Using the API
diff --git a/scripts/api-inference/scripts/generate.ts b/scripts/api-inference/scripts/generate.ts
index 73144662a..9ef681c07 100644
--- a/scripts/api-inference/scripts/generate.ts
+++ b/scripts/api-inference/scripts/generate.ts
@@ -4,6 +4,19 @@ import * as fs from "node:fs/promises";
 import * as path from "node:path/posix";
 import type { JsonObject } from "type-fest";
 
+const TASKS: PipelineType[] = [
+  "fill-mask",
+  "image-to-image",
+  "question-answering",
+  "summarization",
+  "table-question-answering",
+  "text-classification",
+  "text-generation",
+  "text-to-image",
+];
+const TASKS_EXTENDED = [...TASKS, "chat-completion"];
+const SPECS_REVISION = "update-specification-for-docs";
+
 const inferenceSnippetLanguages = ["python", "js", "curl"] as const;
 type InferenceSnippetLanguage = (typeof inferenceSnippetLanguages)[number];
 
@@ -36,10 +49,17 @@ const TEMPLATE_DIR = path.join(ROOT_DIR, "templates");
 const DOCS_DIR = path.join(ROOT_DIR, "..", "..", "docs");
 const TASKS_DOCS_DIR = path.join(DOCS_DIR, "api-inference", "tasks");
 
-function readTemplate(templateName: string): Promise<string> {
+const NBSP = "&nbsp;"; // non-breaking space
+const TABLE_INDENT = NBSP.repeat(8);
+
+function readTemplate(
+  templateName: string,
+  namespace: string,
+): Promise<string> {
   const templateNameSnakeCase = templateName.replace(/-/g, "_");
   const templatePath = path.join(
     TEMPLATE_DIR,
+    namespace,
     `${templateNameSnakeCase}.handlebars`,
   );
   console.log(`   🔍 Reading ${templateNameSnakeCase}.handlebars`);
@@ -89,7 +109,7 @@ export function getInferenceSnippet(
   const modelData = {
     id,
     pipeline_tag,
-    mask_token: "",
+    mask_token: "[MASK]",
     library_name: "",
     config: {},
   };
@@ -105,8 +125,9 @@ export function getInferenceSnippet(
 type SpecNameType = "input" | "output" | "stream_output";
 
 const SPECS_URL_TEMPLATE = Handlebars.compile(
-  `https://raw.githubusercontent.com/huggingface/huggingface.js/main/packages/tasks/src/tasks/{{task}}/spec/{{name}}.json`,
+  `https://raw.githubusercontent.com/huggingface/huggingface.js/${SPECS_REVISION}/packages/tasks/src/tasks/{{task}}/spec/{{name}}.json`,
 );
+const COMMON_DEFINITIONS_URL = `https://raw.githubusercontent.com/huggingface/huggingface.js/${SPECS_REVISION}/packages/tasks/src/tasks/common-definitions.json`;
 
 async function fetchOneSpec(
   task: PipelineType,
@@ -131,41 +152,143 @@ async function fetchSpecs(
   };
 }
 
-function processPayloadSchema(schema: any, prefix: string = ""): JsonObject[] {
+async function fetchCommonDefinitions(): Promise<JsonObject> {
+  console.log(`   🕸️  Fetching common definitions`);
+  return fetch(COMMON_DEFINITIONS_URL).then((res) => res.json());
+}
+
+const COMMON_DEFINITIONS = await fetchCommonDefinitions();
+
+function processPayloadSchema(schema: any): JsonObject[] {
   let rows: JsonObject[] = [];
 
-  Object.entries(schema.properties || {}).forEach(
-    ([key, value]: [string, any]) => {
-      const isRequired = schema.required?.includes(key);
-      let type = value.type || "object";
+  // Helper function to resolve schema references
+  function resolveRef(ref: string) {
+    const refPath = ref.split("#/")[1].split("/");
+    let refSchema = ref.includes("common-definitions.json")
+      ? COMMON_DEFINITIONS
+      : schema;
+    for (const part of refPath) {
+      refSchema = refSchema[part];
+    }
+    return refSchema;
+  }
+
+  // Helper function to process a schema node
+  function processSchemaNode(
+    key: string,
+    value: any,
+    required: boolean,
+    parentPrefix: string,
+  ): void {
+    const isRequired = required;
+    let type = value.type || "object";
+    let description = value.description || "";
+
+    if (value.$ref) {
+      // Resolve the reference
+      value = resolveRef(value.$ref);
+      type = value.type || "object";
+      description = value.description || "";
+    }
 
-      if (value.$ref) {
-        // Handle references
-        const refSchemaKey = value.$ref.split("/").pop();
-        value = schema.$defs?.[refSchemaKey!];
+    if (value.enum) {
+      type = "enum";
+      description = `Possible values: ${value.enum.join(", ")}.`;
+    }
+
+    const isObject = type === "object" && value.properties;
+    const isArray = type === "array" && value.items;
+    const isCombinator = value.oneOf || value.allOf || value.anyOf;
+    const addRow =
+      !(isCombinator && isCombinator.length === 1) &&
+      !description.includes("UNUSED") &&
+      !key.includes("SKIP") &&
+      key.length > 0;
+
+    if (isCombinator && isCombinator.length > 1) {
+      description = "One of the following:";
+    }
+
+    if (isArray) {
+      if (value.items.$ref) {
+        type = "object[]";
+      } else if (value.items.type) {
+        type = `${value.items.type}[]`;
       }
+    }
 
-      const description = value.description || "";
-      const isObject = type === "object" && value.properties;
+    if (addRow) {
+      // Add the row to the table except if combination with only one option
+      if (key.includes("(#")) {
+        // If it's a combination, no need to re-specify the type
+        type = "";
+      }
       const row = {
-        name: `${prefix}${key}`,
+        name: `${parentPrefix}${key}`,
         type: type,
-        description: description,
-        required: isRequired ? "required" : "optional",
+        description: description.replace(/\n/g, " "),
+        required: isRequired,
       };
       rows.push(row);
+    }
 
-      if (isObject) {
-        // Recursively process nested objects
-        rows = rows.concat(
-          processPayloadSchema(
-            value,
-            prefix + "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;",
-          ),
-        );
+    if (isObject) {
+      // Recursively process nested objects
+      Object.entries(value.properties || {}).forEach(
+        ([nestedKey, nestedValue]) => {
+          const nestedRequired = value.required?.includes(nestedKey);
+          processSchemaNode(
+            nestedKey,
+            nestedValue,
+            nestedRequired,
+            parentPrefix + TABLE_INDENT,
+          );
+        },
+      );
+    } else if (isArray) {
+      // Process array items
+      processSchemaNode("SKIP", value.items, false, parentPrefix);
+    } else if (isCombinator) {
+      // Process combinators like oneOf, allOf, anyOf
+      const combinators = value.oneOf || value.allOf || value.anyOf;
+      if (combinators.length === 1) {
+        // If there is only one option, process it directly
+        processSchemaNode(key, combinators[0], isRequired, parentPrefix);
+      } else {
+        // If there are multiple options, process each one as options
+        combinators.forEach((subSchema: any, index: number) => {
+          processSchemaNode(
+            `${NBSP}(#${index + 1})`,
+            subSchema,
+            isRequired,
+            parentPrefix + TABLE_INDENT,
+          );
+        });
       }
-    },
-  );
+    }
+  }
+
+  // Start processing based on the root type of the schema
+  if (schema.type === "array") {
+    // If the root schema is an array, process its items
+    const row = {
+      name: "(array)",
+      type: `${schema.items.type}[]`,
+      description:
+        schema.items.description ||
+        `Output is an array of ${schema.items.type}s.`,
+      required: true,
+    };
+    rows.push(row);
+    processSchemaNode("", schema.items, false, "");
+  } else {
+    // Otherwise, start with the root object
+    Object.entries(schema.properties || {}).forEach(([key, value]) => {
+      const required = schema.required?.includes(key);
+      processSchemaNode(key, value, required, "");
+    });
+  }
 
   return rows;
 }
@@ -184,23 +307,21 @@ const TIP_LIST_MODELS_LINK_TEMPLATE = Handlebars.compile(
   `This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag={{task}}&sort=trending).`,
 );
 
-const SPECS_HEADERS = await readTemplate("specs-headers");
+const SPECS_HEADERS = await readTemplate("specs-headers", "common");
 const SNIPPETS_TEMPLATE = Handlebars.compile(
-  await readTemplate("snippets-template"),
+  await readTemplate("snippets-template", "common"),
 );
 const SPECS_PAYLOAD_TEMPLATE = Handlebars.compile(
-  await readTemplate("specs-payload"),
+  await readTemplate("specs-payload", "common"),
 );
 const SPECS_OUTPUT_TEMPLATE = Handlebars.compile(
-  await readTemplate("specs-output"),
+  await readTemplate("specs-output", "common"),
 );
 
 ////////////////////
 //// Data utils ////
 ////////////////////
 
-const TASKS: PipelineType[] = ["image-to-image", "text-to-image"];
-
 const DATA: {
   constants: {
     specsHeaders: string;
@@ -238,12 +359,16 @@ await Promise.all(
           id: string;
           description: string;
           inference: string | undefined;
+          config: JsonObject | undefined;
         }) => {
           console.log(`   ⚡ Checking inference status ${model.id}`);
-          const modelData = await fetch(
-            `https://huggingface.co/api/models/${model.id}?expand[]=inference`,
-          ).then((res) => res.json());
+          let url = `https://huggingface.co/api/models/${model.id}?expand[]=inference`;
+          if (task === "text-generation") {
+            url += "&expand[]=config";
+          }
+          const modelData = await fetch(url).then((res) => res.json());
           model.inference = modelData.inference;
+          model.config = modelData.config;
         },
       ),
     );
@@ -273,7 +398,8 @@ TASKS.forEach((task) => {
 
 // Render specs
 await Promise.all(
-  TASKS.map(async (task) => {
+  TASKS_EXTENDED.map(async (task) => {
+    // @ts-ignore
     const specs = await fetchSpecs(task);
     DATA.specs[task] = {
       input: specs.input
@@ -297,6 +423,45 @@ TASKS.forEach((task) => {
   DATA.tips.listModelsLink[task] = TIP_LIST_MODELS_LINK_TEMPLATE({ task });
 });
 
+///////////////////////////////////////////////
+//// Data for chat-completion special case ////
+///////////////////////////////////////////////
+
+function fetchChatCompletion() {
+  // Recommended models based on text-generation
+  DATA.models["chat-completion"] = DATA.models["text-generation"].filter(
+    // @ts-ignore
+    (model) => model.config?.tokenizer_config?.chat_template,
+  );
+
+  // Snippet specific to chat completion
+  const mainModel = DATA.models["chat-completion"][0];
+  const mainModelData = {
+    // @ts-ignore
+    id: mainModel.id,
+    pipeline_tag: "text-generation",
+    mask_token: "",
+    library_name: "",
+    // @ts-ignore
+    config: mainModel.config,
+  };
+  const taskSnippets = {
+    // @ts-ignore
+    curl: GET_SNIPPET_FN["curl"](mainModelData, "hf_***"),
+    // @ts-ignore
+    python: GET_SNIPPET_FN["python"](mainModelData, "hf_***"),
+    // @ts-ignore
+    javascript: GET_SNIPPET_FN["js"](mainModelData, "hf_***"),
+  };
+  DATA.snippets["chat-completion"] = SNIPPETS_TEMPLATE({
+    taskSnippets,
+    taskSnakeCase: "chat-completion".replace("-", "_"),
+    taskAttached: "chat-completion".replace("-", ""),
+  });
+}
+
+fetchChatCompletion();
+
 /////////////////////////
 //// Rendering utils ////
 /////////////////////////
@@ -306,12 +471,12 @@ async function renderTemplate(
   data: JsonObject,
 ): Promise<string> {
   console.log(`🎨  Rendering ${templateName}`);
-  const template = Handlebars.compile(await readTemplate(templateName));
+  const template = Handlebars.compile(await readTemplate(templateName, "task"));
   return template(data);
 }
 
 await Promise.all(
-  TASKS.map(async (task) => {
+  TASKS_EXTENDED.map(async (task) => {
     // @ts-ignore
     const rendered = await renderTemplate(task, DATA);
     await writeTaskDoc(task, rendered);
diff --git a/scripts/api-inference/templates/snippets_template.handlebars b/scripts/api-inference/templates/common/snippets_template.handlebars
similarity index 100%
rename from scripts/api-inference/templates/snippets_template.handlebars
rename to scripts/api-inference/templates/common/snippets_template.handlebars
diff --git a/scripts/api-inference/templates/common/specs_headers.handlebars b/scripts/api-inference/templates/common/specs_headers.handlebars
new file mode 100644
index 000000000..32b6e9d94
--- /dev/null
+++ b/scripts/api-inference/templates/common/specs_headers.handlebars
@@ -0,0 +1,9 @@
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
\ No newline at end of file
diff --git a/scripts/api-inference/templates/common/specs_output.handlebars b/scripts/api-inference/templates/common/specs_output.handlebars
new file mode 100644
index 000000000..7d0e7b4c0
--- /dev/null
+++ b/scripts/api-inference/templates/common/specs_output.handlebars
@@ -0,0 +1,9 @@
+| Body |  |
+| :--- | :--- | :--- |
+{{#each schema}}
+{{#if type}}
+| **{{{name}}}** | _{{type}}_ | {{{description}}} |
+{{else}}
+| **{{{name}}}** | | {{{description}}} |
+{{/if}}
+{{/each}}
\ No newline at end of file
diff --git a/scripts/api-inference/templates/common/specs_payload.handlebars b/scripts/api-inference/templates/common/specs_payload.handlebars
new file mode 100644
index 000000000..6459be5d9
--- /dev/null
+++ b/scripts/api-inference/templates/common/specs_payload.handlebars
@@ -0,0 +1,9 @@
+| Payload |  |  |
+| :--- | :--- | :--- |
+{{#each schema}}
+{{#if type}}
+| **{{{name}}}{{#if required}}*{{/if}}** | _{{type}}_ | {{{description}}} |
+{{else}}
+| **{{{name}}}** |  | {{{description}}} |
+{{/if}}
+{{/each}}
\ No newline at end of file
diff --git a/scripts/api-inference/templates/specs_headers.handlebars b/scripts/api-inference/templates/specs_headers.handlebars
deleted file mode 100644
index 44b28ecc8..000000000
--- a/scripts/api-inference/templates/specs_headers.handlebars
+++ /dev/null
@@ -1,5 +0,0 @@
-| Headers |   |    |
-| :--- | :--- | :--- |
-| **authorization** | _string, optional_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
-| **x-use-cache** | _boolean, optional, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
-| **x-wait-for-model** | _boolean, optional, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
diff --git a/scripts/api-inference/templates/specs_output.handlebars b/scripts/api-inference/templates/specs_output.handlebars
deleted file mode 100644
index 7f3391b98..000000000
--- a/scripts/api-inference/templates/specs_output.handlebars
+++ /dev/null
@@ -1,5 +0,0 @@
-| Body |  |
-| :--- | :--- |
-{{#each schema}}
-| **{{{name}}}** | {{{description}}} |
-{{/each}}
\ No newline at end of file
diff --git a/scripts/api-inference/templates/specs_payload.handlebars b/scripts/api-inference/templates/specs_payload.handlebars
deleted file mode 100644
index 70460b184..000000000
--- a/scripts/api-inference/templates/specs_payload.handlebars
+++ /dev/null
@@ -1,5 +0,0 @@
-| Payload |  |  |
-| :--- | :--- | :--- |
-{{#each schema}}
-| **{{{name}}}** | _{{type}}, {{required}}_ | {{{description}}} |
-{{/each}}
\ No newline at end of file
diff --git a/scripts/api-inference/templates/task/chat_completion.handlebars b/scripts/api-inference/templates/task/chat_completion.handlebars
new file mode 100644
index 000000000..f1274f5c5
--- /dev/null
+++ b/scripts/api-inference/templates/task/chat_completion.handlebars
@@ -0,0 +1,38 @@
+## Chat Completion
+
+Generate a response given a list of messages.
+This is a subtask of [`text-generation`](./text_generation) designed to generate responses in a conversational context.
+
+{{{tips.linksToTaskPage.chat-completion}}}
+
+### Recommended models
+
+{{#each models.chat-completion}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.chat-completion}}}
+
+### API specification
+
+#### Request
+
+{{{specs.chat-completion.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+Output type depends on the `stream` input parameter.
+If `stream` is `false` (default), the response will be a JSON object with the following fields:
+
+{{{specs.chat-completion.output}}}
+
+If `stream` is `true`, generated tokens are returned as a stream, using Server-Sent Events (SSE).
+For more information about streaming, check out [this guide](https://huggingface.co/docs/text-generation-inference/conceptual/streaming).
+
+{{{specs.chat-completion.stream_output}}}
+
+### Using the API
+
+{{{snippets.chat-completion}}}
diff --git a/scripts/api-inference/templates/task/fill_mask.handlebars b/scripts/api-inference/templates/task/fill_mask.handlebars
new file mode 100644
index 000000000..663d2ab9f
--- /dev/null
+++ b/scripts/api-inference/templates/task/fill_mask.handlebars
@@ -0,0 +1,29 @@
+## Fill-mask
+
+Mask filling is the task of predicting the right word (token to be precise) in the middle of a sequence.
+
+{{{tips.linksToTaskPage.fill-mask}}}
+
+### Recommended models
+
+{{#each models.fill-mask}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.fill-mask}}}
+
+### API specification
+
+#### Request
+
+{{{specs.fill-mask.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+{{{specs.fill-mask.output}}}
+
+### Using the API
+
+{{{snippets.fill-mask}}}
diff --git a/scripts/api-inference/templates/image_to_image.handlebars b/scripts/api-inference/templates/task/image_to_image.handlebars
similarity index 97%
rename from scripts/api-inference/templates/image_to_image.handlebars
rename to scripts/api-inference/templates/task/image_to_image.handlebars
index b432eab19..258dec814 100644
--- a/scripts/api-inference/templates/image_to_image.handlebars
+++ b/scripts/api-inference/templates/task/image_to_image.handlebars
@@ -1,4 +1,4 @@
-## Image-to-image
+## Image to Image
 
 Image-to-image is the task of transforming a source image to match the characteristics of a target image or a target image domain.
 Any image manipulation and enhancement is possible with image to image models.
diff --git a/scripts/api-inference/templates/task/question_answering.handlebars b/scripts/api-inference/templates/task/question_answering.handlebars
new file mode 100644
index 000000000..101d00fcc
--- /dev/null
+++ b/scripts/api-inference/templates/task/question_answering.handlebars
@@ -0,0 +1,29 @@
+## Question Answering
+
+Question Answering models can retrieve the answer to a question from a given text, which is useful for searching for an answer in a document.
+
+{{{tips.linksToTaskPage.question-answering}}}
+
+### Recommended models
+
+{{#each models.question-answering}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.question-answering}}}
+
+### API specification
+
+#### Request
+
+{{{specs.question-answering.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+{{{specs.question-answering.output}}}
+
+### Using the API
+
+{{{snippets.question-answering}}}
diff --git a/scripts/api-inference/templates/task/summarization.handlebars b/scripts/api-inference/templates/task/summarization.handlebars
new file mode 100644
index 000000000..890487215
--- /dev/null
+++ b/scripts/api-inference/templates/task/summarization.handlebars
@@ -0,0 +1,29 @@
+## Summarization
+
+Summarization is the task of producing a shorter version of a document while preserving its important information. Some models can extract text from the original input, while other models can generate entirely new text.
+
+{{{tips.linksToTaskPage.summarization}}}
+
+### Recommended models
+
+{{#each models.summarization}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.summarization}}}
+
+### API specification
+
+#### Request
+
+{{{specs.summarization.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+{{{specs.summarization.output}}}
+
+### Using the API
+
+{{{snippets.summarization}}}
diff --git a/scripts/api-inference/templates/task/table_question_answering.handlebars b/scripts/api-inference/templates/task/table_question_answering.handlebars
new file mode 100644
index 000000000..4ae8b53fc
--- /dev/null
+++ b/scripts/api-inference/templates/task/table_question_answering.handlebars
@@ -0,0 +1,29 @@
+## Table Question Answering
+
+Table Question Answering (Table QA) is the answering a question about an information on a given table.
+
+{{{tips.linksToTaskPage.table-question-answering}}}
+
+### Recommended models
+
+{{#each models.table-question-answering}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.table-question-answering}}}
+
+### API specification
+
+#### Request
+
+{{{specs.table-question-answering.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+{{{specs.table-question-answering.output}}}
+
+### Using the API
+
+{{{snippets.table-question-answering}}}
diff --git a/scripts/api-inference/templates/task/text_classification.handlebars b/scripts/api-inference/templates/task/text_classification.handlebars
new file mode 100644
index 000000000..99c3cabe8
--- /dev/null
+++ b/scripts/api-inference/templates/task/text_classification.handlebars
@@ -0,0 +1,29 @@
+## Text Classification
+
+Text Classification is the task of assigning a label or class to a given text. Some use cases are sentiment analysis, natural language inference, and assessing grammatical correctness.
+
+{{{tips.linksToTaskPage.text-classification}}}
+
+### Recommended models
+
+{{#each models.text-classification}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.text-classification}}}
+
+### API specification
+
+#### Request
+
+{{{specs.text-classification.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+{{{specs.text-classification.output}}}
+
+### Using the API
+
+{{{snippets.text-classification}}}
diff --git a/scripts/api-inference/templates/task/text_generation.handlebars b/scripts/api-inference/templates/task/text_generation.handlebars
new file mode 100644
index 000000000..85bbba97a
--- /dev/null
+++ b/scripts/api-inference/templates/task/text_generation.handlebars
@@ -0,0 +1,39 @@
+## Text Generation
+
+Generate text based on a prompt.
+
+If you are interested in a Chat Completion task, which generates a response based on a list of messages, check out the [`chat-completion`](./chat_completion) task.
+
+{{{tips.linksToTaskPage.text-generation}}}
+
+### Recommended models
+
+{{#each models.text-generation}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.text-generation}}}
+
+### API specification
+
+#### Request
+
+{{{specs.text-generation.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+Output type depends on the `stream` input parameter.
+If `stream` is `false` (default), the response will be a JSON object with the following fields:
+
+{{{specs.text-generation.output}}}
+
+If `stream` is `true`, generated tokens are returned as a stream, using Server-Sent Events (SSE).
+For more information about streaming, check out [this guide](https://huggingface.co/docs/text-generation-inference/conceptual/streaming).
+
+{{{specs.text-generation.stream_output}}}
+
+### Using the API
+
+{{{snippets.text-generation}}}
diff --git a/scripts/api-inference/templates/text_to_image.handlebars b/scripts/api-inference/templates/task/text_to_image.handlebars
similarity index 96%
rename from scripts/api-inference/templates/text_to_image.handlebars
rename to scripts/api-inference/templates/task/text_to_image.handlebars
index 6c9c568d1..6e6ffd0c6 100644
--- a/scripts/api-inference/templates/text_to_image.handlebars
+++ b/scripts/api-inference/templates/task/text_to_image.handlebars
@@ -1,4 +1,4 @@
-## Text-to-image
+## Text to Image
 
 Generate an image based on a given text prompt.
 

From 22c6baec60bdd9e3eae2f99497ebfc73fc946601 Mon Sep 17 00:00:00 2001
From: Lucain <lucain@huggingface.co>
Date: Thu, 29 Aug 2024 10:55:17 +0200
Subject: [PATCH 27/38] Filter out frozen models from API docs for tasks
 (#1396)

* Filter out frozen models

* use placeholder
---
 docs/api-inference/tasks/chat_completion.md          | 1 -
 docs/api-inference/tasks/image_to_image.md           | 4 ----
 docs/api-inference/tasks/question_answering.md       | 1 -
 docs/api-inference/tasks/summarization.md            | 1 -
 docs/api-inference/tasks/table_question_answering.md | 2 --
 docs/api-inference/tasks/text_generation.md          | 1 -
 docs/api-inference/tasks/text_to_image.md            | 1 -
 scripts/api-inference/scripts/generate.ts            | 9 +++++++--
 8 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/docs/api-inference/tasks/chat_completion.md b/docs/api-inference/tasks/chat_completion.md
index c01fe9ac1..160a41f33 100644
--- a/docs/api-inference/tasks/chat_completion.md
+++ b/docs/api-inference/tasks/chat_completion.md
@@ -10,7 +10,6 @@ This is a subtask of [`text-generation`](./text_generation) designed to generate
 - [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions.
 - [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
 - [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct): Small yet powerful text generation model.
-- [AI-MO/NuminaMath-7B-TIR](https://huggingface.co/AI-MO/NuminaMath-7B-TIR): A very powerful model that can solve mathematical problems.
 - [HuggingFaceH4/starchat2-15b-v0.1](https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1): Strong coding assistant model.
 - [mistralai/Mistral-Nemo-Instruct-2407](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407): Very strong open-source large language model.
 
diff --git a/docs/api-inference/tasks/image_to_image.md b/docs/api-inference/tasks/image_to_image.md
index eb197489e..29731c4aa 100644
--- a/docs/api-inference/tasks/image_to_image.md
+++ b/docs/api-inference/tasks/image_to_image.md
@@ -17,10 +17,6 @@ For more details about the `image-to-image` task, check out its [dedicated page]
 
 ### Recommended models
 
-- [fal/AuraSR-v2](https://huggingface.co/fal/AuraSR-v2): An image-to-image model to improve image resolution.
-- [keras-io/super-resolution](https://huggingface.co/keras-io/super-resolution): A model that increases the resolution of an image.
-- [lambdalabs/sd-image-variations-diffusers](https://huggingface.co/lambdalabs/sd-image-variations-diffusers): A model that creates a set of variations of the input image in the style of DALL-E using Stable Diffusion.
-- [mfidabel/controlnet-segment-anything](https://huggingface.co/mfidabel/controlnet-segment-anything): A model that generates images based on segments in the input image and the text prompt.
 - [timbrooks/instruct-pix2pix](https://huggingface.co/timbrooks/instruct-pix2pix): A model that takes an image and an instruction to edit the image.
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-to-image&sort=trending).
diff --git a/docs/api-inference/tasks/question_answering.md b/docs/api-inference/tasks/question_answering.md
index 3f724c9c2..e22b2e2b5 100644
--- a/docs/api-inference/tasks/question_answering.md
+++ b/docs/api-inference/tasks/question_answering.md
@@ -11,7 +11,6 @@ For more details about the `question-answering` task, check out its [dedicated p
 ### Recommended models
 
 - [deepset/roberta-base-squad2](https://huggingface.co/deepset/roberta-base-squad2): A robust baseline model for most question answering domains.
-- [google/tapas-base-finetuned-wtq](https://huggingface.co/google/tapas-base-finetuned-wtq): A special model that can answer questions from tables!
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=question-answering&sort=trending).
 
diff --git a/docs/api-inference/tasks/summarization.md b/docs/api-inference/tasks/summarization.md
index f0ed74b66..266e43192 100644
--- a/docs/api-inference/tasks/summarization.md
+++ b/docs/api-inference/tasks/summarization.md
@@ -11,7 +11,6 @@ For more details about the `summarization` task, check out its [dedicated page](
 ### Recommended models
 
 - [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn): A strong summarization model trained on English news articles. Excels at generating factual summaries.
-- [google/bigbird-pegasus-large-pubmed](https://huggingface.co/google/bigbird-pegasus-large-pubmed): A summarization model trained on medical articles.
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=summarization&sort=trending).
 
diff --git a/docs/api-inference/tasks/table_question_answering.md b/docs/api-inference/tasks/table_question_answering.md
index e3122e425..192578319 100644
--- a/docs/api-inference/tasks/table_question_answering.md
+++ b/docs/api-inference/tasks/table_question_answering.md
@@ -10,8 +10,6 @@ For more details about the `table-question-answering` task, check out its [dedic
 
 ### Recommended models
 
-- [microsoft/tapex-base](https://huggingface.co/microsoft/tapex-base): A table question answering model that is capable of neural SQL execution, i.e., employ TAPEX to execute a SQL query on a given table.
-- [google/tapas-base-finetuned-wtq](https://huggingface.co/google/tapas-base-finetuned-wtq): A robust table question answering model.
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=table-question-answering&sort=trending).
 
diff --git a/docs/api-inference/tasks/text_generation.md b/docs/api-inference/tasks/text_generation.md
index fb3e41b3f..0976b1749 100644
--- a/docs/api-inference/tasks/text_generation.md
+++ b/docs/api-inference/tasks/text_generation.md
@@ -16,7 +16,6 @@ For more details about the `text-generation` task, check out its [dedicated page
 - [bigcode/starcoder](https://huggingface.co/bigcode/starcoder): A code generation model that can generate code in 80+ languages.
 - [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
 - [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct): Small yet powerful text generation model.
-- [AI-MO/NuminaMath-7B-TIR](https://huggingface.co/AI-MO/NuminaMath-7B-TIR): A very powerful model that can solve mathematical problems.
 - [HuggingFaceH4/starchat2-15b-v0.1](https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1): Strong coding assistant model.
 - [mistralai/Mistral-Nemo-Instruct-2407](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407): Very strong open-source large language model.
 
diff --git a/docs/api-inference/tasks/text_to_image.md b/docs/api-inference/tasks/text_to_image.md
index 6697ca877..4d0d83757 100644
--- a/docs/api-inference/tasks/text_to_image.md
+++ b/docs/api-inference/tasks/text_to_image.md
@@ -12,7 +12,6 @@ For more details about the `text-to-image` task, check out its [dedicated page](
 
 - [black-forest-labs/FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev): One of the most powerful image generation models that can generate realistic outputs.
 - [latent-consistency/lcm-lora-sdxl](https://huggingface.co/latent-consistency/lcm-lora-sdxl): A powerful yet fast image generation model.
-- [Kwai-Kolors/Kolors](https://huggingface.co/Kwai-Kolors/Kolors): Text-to-image model for photorealistic generation.
 - [stabilityai/stable-diffusion-3-medium-diffusers](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers): A powerful text-to-image model.
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-to-image&sort=trending).
diff --git a/scripts/api-inference/scripts/generate.ts b/scripts/api-inference/scripts/generate.ts
index 9ef681c07..3c86a24b1 100644
--- a/scripts/api-inference/scripts/generate.ts
+++ b/scripts/api-inference/scripts/generate.ts
@@ -377,13 +377,18 @@ await Promise.all(
 
 // Fetch recommended models
 TASKS.forEach((task) => {
-  DATA.models[task] = TASKS_DATA[task].models;
+  DATA.models[task] = TASKS_DATA[task].models.filter(
+    (model: { inference: string }) =>
+      ["cold", "loading", "warm"].includes(model.inference),
+  );
 });
 
 // Fetch snippets
 // TODO: render snippets only if they are available
 TASKS.forEach((task) => {
-  const mainModel = TASKS_DATA[task].models[0].id;
+  // Let's take as example the first available model that is recommended.
+  // Otherwise, fallback to "<REPO_ID>".
+  const mainModel = DATA.models[task][0]?.id || "<REPO_ID>";
   const taskSnippets = {
     curl: getInferenceSnippet(mainModel, task, "curl"),
     python: getInferenceSnippet(mainModel, task, "python"),

From 4039c7eb276f14139e86d851259074e9631ad11e Mon Sep 17 00:00:00 2001
From: Lucain <lucain@huggingface.co>
Date: Thu, 29 Aug 2024 11:09:09 +0200
Subject: [PATCH 28/38] New api docs suggestions (#1397)

* show as diff

* reorder toctree

* wording update

* diff
---
 docs/api-inference/_toctree.yml        |  4 ++--
 docs/api-inference/parameters.md       | 24 ++++++++++++------------
 docs/api-inference/supported_models.md |  2 +-
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
index 247a96201..004542b96 100644
--- a/docs/api-inference/_toctree.yml
+++ b/docs/api-inference/_toctree.yml
@@ -1,10 +1,10 @@
 - sections:
   - local: index
     title: Serverless Inference API
-  - local: supported_models
-    title: Supported Models
   - local: getting_started
     title: Getting Started
+  - local: supported_models
+    title: Supported Models
   - local: rate_limits
     title: Rate Limits
   title: Getting Started
diff --git a/docs/api-inference/parameters.md b/docs/api-inference/parameters.md
index 905420fbe..f1a1b9df4 100644
--- a/docs/api-inference/parameters.md
+++ b/docs/api-inference/parameters.md
@@ -21,25 +21,25 @@ To do this, you can add `x-use-cache:false` to the request headers. For example
 <inferencesnippet>
 
 <curl>
-```bash
+```diff
 curl https://api-inference.huggingface.co/models/MODEL_ID \
     -X POST \
     -d '{"inputs": "Can you please let us know more details about your "}' \
     -H "Authorization: Bearer hf_***" \
     -H "Content-Type: application/json" \
-    -H "x-use-cache: false"
++   -H "x-use-cache: false"
 ```
 </curl>
 
 <python>
-```python
+```diff
 import requests
 
 API_URL = "https://api-inference.huggingface.co/models/MODEL_ID"
 headers = {
     "Authorization": "Bearer hf_***",
     "Content-Type": "application/json",
-    "x-use-cache": "false"
++   "x-use-cache": "false"
 }
 data = {
     "inputs": "Can you please let us know more details about your "
@@ -51,7 +51,7 @@ print(response.json())
 </python>
 
 <js>
-```js
+```diff
 import fetch from "node-fetch";
 
 async function query(data) {
@@ -62,7 +62,7 @@ async function query(data) {
             headers: {
                 Authorization: `Bearer hf_***`,
                 "Content-Type": "application/json",
-                "x-use-cache": "false"
++               "x-use-cache": "false"
             },
             body: JSON.stringify(data),
         }
@@ -91,25 +91,25 @@ When a model is warm, it is ready to be used and you will get a response relativ
 <inferencesnippet>
 
 <curl>
-```bash
+```diff
 curl https://api-inference.huggingface.co/models/MODEL_ID \
     -X POST \
     -d '{"inputs": "Can you please let us know more details about your "}' \
     -H "Authorization: Bearer hf_***" \
     -H "Content-Type: application/json" \
-    -H "x-wait-for-model: true"
++   -H "x-wait-for-model: true"
 ```
 </curl>
 
 <python>
-```python
+```diff
 import requests
 
 API_URL = "https://api-inference.huggingface.co/models/MODEL_ID"
 headers = {
     "Authorization": "Bearer hf_***",
     "Content-Type": "application/json",
-    "x-wait-for-model": "true"
++   "x-wait-for-model": "true"
 }
 data = {
     "inputs": "Can you please let us know more details about your "
@@ -121,7 +121,7 @@ print(response.json())
 </python>
 
 <js>
-```js
+```diff
 import fetch from "node-fetch";
 
 async function query(data) {
@@ -132,7 +132,7 @@ async function query(data) {
             headers: {
                 Authorization: `Bearer hf_***`,
                 "Content-Type": "application/json",
-                "x-wait-for-model": "true"
++               "x-wait-for-model": "true"
             },
             body: JSON.stringify(data),
         }
diff --git a/docs/api-inference/supported_models.md b/docs/api-inference/supported_models.md
index 866531a06..f3138699b 100644
--- a/docs/api-inference/supported_models.md
+++ b/docs/api-inference/supported_models.md
@@ -25,4 +25,4 @@ In addition to thousands of public models available in the Hub, PRO and Enterpri
 
 ## Running Private Models
 
-The free Serverless API is designed to run popular public models. If you have a private model, you can use [Inference Endpoints](https://huggingface.co/docs/inference/endpoints) to deploy your model.
+The free Serverless API is designed to run popular public models. If you have a private model, you can use [Inference Endpoints](https://huggingface.co/docs/inference/endpoints) to deploy it.

From 49e8f67e43693b89e7c6c59bc54fc5ae256c8243 Mon Sep 17 00:00:00 2001
From: Lucain <lucain@huggingface.co>
Date: Fri, 30 Aug 2024 10:13:24 +0200
Subject: [PATCH 29/38] Add comment header on each task page (#1400)

* Add comment header on each task page

* add huggingface.co/api/tasks
---
 docs/api-inference/tasks/chat_completion.md   | 14 +++++++++++++
 docs/api-inference/tasks/fill_mask.md         | 14 +++++++++++++
 docs/api-inference/tasks/image_to_image.md    | 18 +++++++++++++++--
 .../api-inference/tasks/question_answering.md | 14 +++++++++++++
 docs/api-inference/tasks/summarization.md     | 19 ++++++++++++++++++
 .../tasks/table_question_answering.md         | 20 ++++++++++++++++---
 .../tasks/text_classification.md              | 17 ++++++++++++++--
 docs/api-inference/tasks/text_generation.md   | 14 +++++++++++++
 docs/api-inference/tasks/text_to_image.md     | 14 +++++++++++++
 scripts/api-inference/scripts/generate.ts     |  7 ++++++-
 .../templates/common/page_header.handlebars   | 11 ++++++++++
 11 files changed, 154 insertions(+), 8 deletions(-)
 create mode 100644 scripts/api-inference/templates/common/page_header.handlebars

diff --git a/docs/api-inference/tasks/chat_completion.md b/docs/api-inference/tasks/chat_completion.md
index 160a41f33..37a1c59a5 100644
--- a/docs/api-inference/tasks/chat_completion.md
+++ b/docs/api-inference/tasks/chat_completion.md
@@ -1,3 +1,17 @@
+<!---
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/chat_completion.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/chat-completion/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/chat-completion/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
+--->
+
 ## Chat Completion
 
 Generate a response given a list of messages.
diff --git a/docs/api-inference/tasks/fill_mask.md b/docs/api-inference/tasks/fill_mask.md
index 197fef37c..efe000c8f 100644
--- a/docs/api-inference/tasks/fill_mask.md
+++ b/docs/api-inference/tasks/fill_mask.md
@@ -1,3 +1,17 @@
+<!---
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/fill_mask.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/fill-mask/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/fill-mask/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
+--->
+
 ## Fill-mask
 
 Mask filling is the task of predicting the right word (token to be precise) in the middle of a sequence.
diff --git a/docs/api-inference/tasks/image_to_image.md b/docs/api-inference/tasks/image_to_image.md
index 29731c4aa..6c9037663 100644
--- a/docs/api-inference/tasks/image_to_image.md
+++ b/docs/api-inference/tasks/image_to_image.md
@@ -1,3 +1,17 @@
+<!---
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/image_to_image.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/image-to-image/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/image-to-image/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
+--->
+
 ## Image to Image
 
 Image-to-image is the task of transforming a source image to match the characteristics of a target image or a target image domain.
@@ -27,12 +41,12 @@ This is only a subset of the supported models. Find the model that suits you bes
 
 | Payload |  |  |
 | :--- | :--- | :--- |
-| **inputs*** | _object_ | The input image data |
+| **inputs*** | _string_ | The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload. |
 | **parameters** | _object_ | Additional inference parameters for Image To Image |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;guidance_scale** | _number_ | For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality. |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;negative_prompt** | _string[]_ | One or several prompt to guide what NOT to include in image generation. |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_inference_steps** | _integer_ | For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;target_size** | _object_ | The size in pixel of the output image |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;target_size** | _object_ | The size in pixel of the output image. |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;width*** | _integer_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;height*** | _integer_ |  |
 
diff --git a/docs/api-inference/tasks/question_answering.md b/docs/api-inference/tasks/question_answering.md
index e22b2e2b5..ff7e471e4 100644
--- a/docs/api-inference/tasks/question_answering.md
+++ b/docs/api-inference/tasks/question_answering.md
@@ -1,3 +1,17 @@
+<!---
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/question_answering.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/question-answering/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/question-answering/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
+--->
+
 ## Question Answering
 
 Question Answering models can retrieve the answer to a question from a given text, which is useful for searching for an answer in a document.
diff --git a/docs/api-inference/tasks/summarization.md b/docs/api-inference/tasks/summarization.md
index 266e43192..4cf5d706e 100644
--- a/docs/api-inference/tasks/summarization.md
+++ b/docs/api-inference/tasks/summarization.md
@@ -1,3 +1,17 @@
+<!---
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/summarization.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/summarization/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/summarization/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
+--->
+
 ## Summarization
 
 Summarization is the task of producing a shorter version of a document while preserving its important information. Some models can extract text from the original input, while other models can generate entirely new text.
@@ -20,6 +34,11 @@ This is only a subset of the supported models. Find the model that suits you bes
 
 | Payload |  |  |
 | :--- | :--- | :--- |
+| **inputs*** | _string_ | The input text to summarize. |
+| **parameters** | _object_ | Additional inference parameters for summarization. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;clean_up_tokenization_spaces** | _boolean_ | Whether to clean up the potential extra spaces in the text output. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;truncation** | _enum_ | Possible values: do_not_truncate, longest_first, only_first, only_second. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;generate_parameters** | _object_ | Additional parametrization of the text generation algorithm. |
 
 
 Some options can be configured by passing headers to the Inference API. Here are the available headers:
diff --git a/docs/api-inference/tasks/table_question_answering.md b/docs/api-inference/tasks/table_question_answering.md
index 192578319..f33677312 100644
--- a/docs/api-inference/tasks/table_question_answering.md
+++ b/docs/api-inference/tasks/table_question_answering.md
@@ -1,3 +1,17 @@
+<!---
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/table_question_answering.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/table-question-answering/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/table-question-answering/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
+--->
+
 ## Table Question Answering
 
 Table Question Answering (Table QA) is the answering a question about an information on a given table.
@@ -53,7 +67,7 @@ For more information about Inference API headers, check out the parameters [guid
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/microsoft/tapex-base \
+curl https://api-inference.huggingface.co/models/<REPO_ID> \
 	-X POST \
 	-d '{"inputs": { "query": "How many stars does the transformers repository have?", "table": { "Repository": ["Transformers", "Datasets", "Tokenizers"], "Stars": ["36542", "4512", "3934"], "Contributors": ["651", "77", "34"], "Programming language": [ "Python", "Python", "Rust, Python and NodeJS" ] } }}' \
 	-H 'Content-Type: application/json' \
@@ -66,7 +80,7 @@ curl https://api-inference.huggingface.co/models/microsoft/tapex-base \
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/microsoft/tapex-base"
+API_URL = "https://api-inference.huggingface.co/models/<REPO_ID>"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -97,7 +111,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/microsoft/tapex-base",
+		"https://api-inference.huggingface.co/models/<REPO_ID>",
 		{
 			headers: {
 				Authorization: "Bearer hf_***"
diff --git a/docs/api-inference/tasks/text_classification.md b/docs/api-inference/tasks/text_classification.md
index 8fffb6654..8d4a8a8e1 100644
--- a/docs/api-inference/tasks/text_classification.md
+++ b/docs/api-inference/tasks/text_classification.md
@@ -1,3 +1,17 @@
+<!---
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/text_classification.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/text-classification/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/text-classification/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
+--->
+
 ## Text Classification
 
 Text Classification is the task of assigning a label or class to a given text. Some use cases are sentiment analysis, natural language inference, and assessing grammatical correctness.
@@ -11,7 +25,6 @@ For more details about the `text-classification` task, check out its [dedicated
 ### Recommended models
 
 - [distilbert-base-uncased-finetuned-sst-2-english](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english): A robust model trained for sentiment analysis.
-- [roberta-large-mnli](https://huggingface.co/roberta-large-mnli): Multi-genre natural language inference model.
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-classification&sort=trending).
 
@@ -41,7 +54,7 @@ For more information about Inference API headers, check out the parameters [guid
 
 | Body |  |
 | :--- | :--- | :--- |
-| **(array)** | _undefined[]_ | Output is an array of undefineds. |
+| **(array)** | _object[]_ | Output is an array of objects. |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The predicted class label. |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The corresponding probability. |
 
diff --git a/docs/api-inference/tasks/text_generation.md b/docs/api-inference/tasks/text_generation.md
index 0976b1749..aeb0a2d90 100644
--- a/docs/api-inference/tasks/text_generation.md
+++ b/docs/api-inference/tasks/text_generation.md
@@ -1,3 +1,17 @@
+<!---
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/text_generation.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/text-generation/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/text-generation/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
+--->
+
 ## Text Generation
 
 Generate text based on a prompt.
diff --git a/docs/api-inference/tasks/text_to_image.md b/docs/api-inference/tasks/text_to_image.md
index 4d0d83757..7fbf31c6f 100644
--- a/docs/api-inference/tasks/text_to_image.md
+++ b/docs/api-inference/tasks/text_to_image.md
@@ -1,3 +1,17 @@
+<!---
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/text_to_image.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/text-to-image/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/text-to-image/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
+--->
+
 ## Text to Image
 
 Generate an image based on a given text prompt.
diff --git a/scripts/api-inference/scripts/generate.ts b/scripts/api-inference/scripts/generate.ts
index 3c86a24b1..e81435a8a 100644
--- a/scripts/api-inference/scripts/generate.ts
+++ b/scripts/api-inference/scripts/generate.ts
@@ -70,9 +70,11 @@ function writeTaskDoc(templateName: string, content: string): Promise<void> {
   const templateNameSnakeCase = templateName.replace(/-/g, "_");
   const taskDocPath = path.join(TASKS_DOCS_DIR, `${templateNameSnakeCase}.md`);
   console.log(`   💾 Saving to ${taskDocPath}`);
+  const header = PAGE_HEADER({task:templateName, taskSnakeCase: templateNameSnakeCase});
+  const contentWithHeader = `<!---\n${header}\n--->\n\n${content}`;
   return fs
     .mkdir(TASKS_DOCS_DIR, { recursive: true })
-    .then(() => fs.writeFile(taskDocPath, content, { encoding: "utf-8" }));
+    .then(() => fs.writeFile(taskDocPath, contentWithHeader, { encoding: "utf-8" }));
 }
 
 /////////////////////////
@@ -308,6 +310,9 @@ const TIP_LIST_MODELS_LINK_TEMPLATE = Handlebars.compile(
 );
 
 const SPECS_HEADERS = await readTemplate("specs-headers", "common");
+const PAGE_HEADER = Handlebars.compile(
+  await readTemplate("page-header", "common"),
+);
 const SNIPPETS_TEMPLATE = Handlebars.compile(
   await readTemplate("snippets-template", "common"),
 );
diff --git a/scripts/api-inference/templates/common/page_header.handlebars b/scripts/api-inference/templates/common/page_header.handlebars
new file mode 100644
index 000000000..f93c13c43
--- /dev/null
+++ b/scripts/api-inference/templates/common/page_header.handlebars
@@ -0,0 +1,11 @@
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/{{taskSnakeCase}}.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/{{task}}/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/{{task}}/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
\ No newline at end of file

From 20c17d00b14e1d46ecdec8d2e9f1cc41e9701ea2 Mon Sep 17 00:00:00 2001
From: Lucain <lucain@huggingface.co>
Date: Fri, 30 Aug 2024 10:26:18 +0200
Subject: [PATCH 30/38] Add even more tasks: token classification, translation
 and zero shot classification (#1398)

* Add token classification

* add translation task

* add zero shot classification

* more parameters

* More tasks more tasks more tasks! (#1399)

* add ASR

* fix early stopping parameter

* regenrate

* add audio_classification

* Image classification

* Object detection

* image segementation

* unknown when we don't know

* gen

* feature extraction

* update
---
 docs/api-inference/_toctree.yml               |  18 +++
 .../tasks/audio_classification.md             | 110 +++++++++++++++
 .../tasks/automatic_speech_recognition.md     | 128 +++++++++++++++++
 docs/api-inference/tasks/chat_completion.md   |   8 +-
 .../api-inference/tasks/feature_extraction.md | 111 +++++++++++++++
 .../tasks/image_classification.md             | 111 +++++++++++++++
 .../api-inference/tasks/image_segmentation.md | 114 ++++++++++++++++
 docs/api-inference/tasks/image_to_image.md    |   2 +-
 docs/api-inference/tasks/object_detection.md  | 116 ++++++++++++++++
 docs/api-inference/tasks/text_generation.md   |   4 +-
 docs/api-inference/tasks/text_to_image.md     |   2 +-
 .../tasks/token_classification.md             | 129 ++++++++++++++++++
 docs/api-inference/tasks/translation.md       | 113 +++++++++++++++
 .../tasks/zero_shot_classification.md         | 114 ++++++++++++++++
 scripts/api-inference/scripts/generate.ts     |  18 ++-
 .../task/audio_classification.handlebars      |  29 ++++
 .../automatic_speech_recognition.handlebars   |  28 ++++
 .../task/feature_extraction.handlebars        |  30 ++++
 .../task/image_classification.handlebars      |  29 ++++
 .../task/image_segmentation.handlebars        |  29 ++++
 .../task/object_detection.handlebars          |  29 ++++
 .../task/token_classification.handlebars      |  37 +++++
 .../templates/task/translation.handlebars     |  29 ++++
 .../task/zero_shot_classification.handlebars  |  29 ++++
 24 files changed, 1355 insertions(+), 12 deletions(-)
 create mode 100644 docs/api-inference/tasks/audio_classification.md
 create mode 100644 docs/api-inference/tasks/automatic_speech_recognition.md
 create mode 100644 docs/api-inference/tasks/feature_extraction.md
 create mode 100644 docs/api-inference/tasks/image_classification.md
 create mode 100644 docs/api-inference/tasks/image_segmentation.md
 create mode 100644 docs/api-inference/tasks/object_detection.md
 create mode 100644 docs/api-inference/tasks/token_classification.md
 create mode 100644 docs/api-inference/tasks/translation.md
 create mode 100644 docs/api-inference/tasks/zero_shot_classification.md
 create mode 100644 scripts/api-inference/templates/task/audio_classification.handlebars
 create mode 100644 scripts/api-inference/templates/task/automatic_speech_recognition.handlebars
 create mode 100644 scripts/api-inference/templates/task/feature_extraction.handlebars
 create mode 100644 scripts/api-inference/templates/task/image_classification.handlebars
 create mode 100644 scripts/api-inference/templates/task/image_segmentation.handlebars
 create mode 100644 scripts/api-inference/templates/task/object_detection.handlebars
 create mode 100644 scripts/api-inference/templates/task/token_classification.handlebars
 create mode 100644 scripts/api-inference/templates/task/translation.handlebars
 create mode 100644 scripts/api-inference/templates/task/zero_shot_classification.handlebars

diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
index 004542b96..fc03265c1 100644
--- a/docs/api-inference/_toctree.yml
+++ b/docs/api-inference/_toctree.yml
@@ -12,12 +12,24 @@
   - local: parameters
     title: Parameters
   - sections:
+    - local: tasks/audio_classification
+      title: Audio Classification
+    - local: tasks/automatic_speech_recognition
+      title: Automatic Speech Recognition
     - local: tasks/chat_completion
       title: Chat Completion
+    - local: tasks/feature_extraction
+      title: Feature Extraction
     - local: tasks/fill_mask
       title: Fill Mask
+    - local: tasks/image_classification
+      title: Image Classification
+    - local: tasks/image_segmentation
+      title: Image Segmentation
     - local: tasks/image_to_image
       title: Image to Image
+    - local: tasks/object_detection
+      title: Object Detection
     - local: tasks/question_answering
       title: Question Answering
     - local: tasks/summarization
@@ -30,5 +42,11 @@
       title: Text Generation
     - local: tasks/text_to_image
       title: Text to Image
+    - local: tasks/token_classification
+      title: Token Classification
+    - local: tasks/translation
+      title: Translation
+    - local: tasks/zero_shot_classification
+      title: Zero Shot Classification
     title: Detailed Task Parameters
   title: API Reference
\ No newline at end of file
diff --git a/docs/api-inference/tasks/audio_classification.md b/docs/api-inference/tasks/audio_classification.md
new file mode 100644
index 000000000..b4be60e6a
--- /dev/null
+++ b/docs/api-inference/tasks/audio_classification.md
@@ -0,0 +1,110 @@
+## Audio Classification
+
+Audio classification is the task of assigning a label or class to a given audio. It can be used for recognizing which command a user is giving or the emotion of a statement, as well as identifying a speaker.
+
+<Tip>
+
+For more details about the `audio-classification` task, check out its [dedicated page](https://huggingface.co/tasks/audio-classification)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=audio-classification&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The input audio data as a base64-encoded string. If no `parameters` are provided, you can also provide the audio data as a raw bytes payload. |
+| **parameters** | _object_ | Additional inference parameters for Audio Classification |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;function_to_apply** | _enum_ | Possible values: sigmoid, softmax, none. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ | When specified, limits the output to the top K most probable classes. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | Output is an array of objects. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The predicted class label. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The corresponding probability. |
+
+
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/<REPO_ID> \
+	-X POST \
+	--data-binary '@sample1.flac' \
+	-H "Authorization: Bearer hf_***"
+
+```
+</curl>
+
+<python>
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/<REPO_ID>"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(filename):
+    with open(filename, "rb") as f:
+        data = f.read()
+    response = requests.post(API_URL, headers=headers, data=data)
+    return response.json()
+
+output = query("sample1.flac")
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.audio_classification).
+</python>
+
+<js>
+```js
+async function query(filename) {
+	const data = fs.readFileSync(filename);
+	const response = await fetch(
+		"https://api-inference.huggingface.co/models/<REPO_ID>",
+		{
+			headers: {
+				Authorization: "Bearer hf_***"
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: data,
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query("sample1.flac").then((response) => {
+	console.log(JSON.stringify(response));
+});
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#audioclassification).
+</js>
+
+</inferencesnippet>
+
+
diff --git a/docs/api-inference/tasks/automatic_speech_recognition.md b/docs/api-inference/tasks/automatic_speech_recognition.md
new file mode 100644
index 000000000..ae84ed195
--- /dev/null
+++ b/docs/api-inference/tasks/automatic_speech_recognition.md
@@ -0,0 +1,128 @@
+## Automatic Speech Recognition
+
+Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.
+<Tip>
+
+For more details about the `automatic-speech-recognition` task, check out its [dedicated page](https://huggingface.co/tasks/automatic-speech-recognition)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+- [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3): A powerful ASR model by OpenAI.
+- [facebook/seamless-m4t-v2-large](https://huggingface.co/facebook/seamless-m4t-v2-large): An end-to-end model that performs ASR and Speech Translation by MetaAI.
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=automatic-speech-recognition&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The input audio data as a base64-encoded string. If no `parameters` are provided, you can also provide the audio data as a raw bytes payload. |
+| **parameters** | _object_ | Additional inference parameters for Automatic Speech Recognition |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;return_timestamps** | _boolean_ | Whether to output corresponding timestamps with the generated text |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;generate** | _object_ | Ad-hoc parametrization of the text generation process |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;temperature** | _number_ | The value used to modulate the next token probabilities. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ | The number of highest probability vocabulary tokens to keep for top-k-filtering. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_p** | _number_ | If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;typical_p** | _number_ |  Local typicality measures how similar the conditional probability of predicting a target token next is to the expected conditional probability of predicting a random token next, given the partial text already generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that add up to typical_p or higher are kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;epsilon_cutoff** | _number_ | If set to float strictly between 0 and 1, only tokens with a conditional probability greater than epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;eta_cutoff** | _number_ | Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;max_length** | _integer_ | The maximum length (in tokens) of the generated text, including the input. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;max_new_tokens** | _integer_ | The maximum number of tokens to generate. Takes precedence over maxLength. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;min_length** | _integer_ | The minimum length (in tokens) of the generated text, including the input. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;min_new_tokens** | _integer_ | The minimum number of tokens to generate. Takes precedence over maxLength. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;do_sample** | _boolean_ | Whether to use sampling instead of greedy decoding when generating new tokens. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;early_stopping** | _enum_ | Possible values: never, true, false. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_beams** | _integer_ | Number of beams to use for beam search. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_beam_groups** | _integer_ | Number of groups to divide num_beams into in order to ensure diversity among different groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;penalty_alpha** | _number_ | The value balances the model confidence and the degeneration penalty in contrastive search decoding. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;use_cache** | _boolean_ | Whether the model should use the past last key/values attentions to speed up decoding |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **text** | _string_ | The recognized text. |
+| **chunks** | _object[]_ | When returnTimestamps is enabled, chunks contains a list of audio chunks identified by the model. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;text** | _string_ | A chunk of text identified by the model |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;timestamps** | _number[]_ | The start and end timestamps corresponding with the text |
+
+
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/openai/whisper-large-v3 \
+	-X POST \
+	--data-binary '@sample1.flac' \
+	-H "Authorization: Bearer hf_***"
+
+```
+</curl>
+
+<python>
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(filename):
+    with open(filename, "rb") as f:
+        data = f.read()
+    response = requests.post(API_URL, headers=headers, data=data)
+    return response.json()
+
+output = query("sample1.flac")
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.automatic_speech-recognition).
+</python>
+
+<js>
+```js
+async function query(filename) {
+	const data = fs.readFileSync(filename);
+	const response = await fetch(
+		"https://api-inference.huggingface.co/models/openai/whisper-large-v3",
+		{
+			headers: {
+				Authorization: "Bearer hf_***"
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: data,
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query("sample1.flac").then((response) => {
+	console.log(JSON.stringify(response));
+});
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#automaticspeech-recognition).
+</js>
+
+</inferencesnippet>
+
+
diff --git a/docs/api-inference/tasks/chat_completion.md b/docs/api-inference/tasks/chat_completion.md
index 37a1c59a5..585e31e70 100644
--- a/docs/api-inference/tasks/chat_completion.md
+++ b/docs/api-inference/tasks/chat_completion.md
@@ -44,7 +44,7 @@ This is a subtask of [`text-generation`](./text_generation) designed to generate
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;role*** | _string_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;tool_calls** | _object[]_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;function*** | _object_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;arguments*** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;arguments*** | _unknown_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;description** | _string_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;name*** | _string_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;id*** | _integer_ |  |
@@ -54,14 +54,14 @@ This is a subtask of [`text-generation`](./text_generation) designed to generate
 | **stop** | _string[]_ | Up to 4 sequences where the API will stop generating further tokens. |
 | **stream** | _boolean_ |  |
 | **temperature** | _number_ | What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.  We generally recommend altering this or `top_p` but not both. |
-| **tool_choice** | _object_ | One of the following: |
+| **tool_choice** | _unknown_ | One of the following: |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#1)** |  |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;FunctionName*** | _string_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#2)** |  | Possible values: OneOf. |
 | **tool_prompt** | _string_ | A prompt to be appended before the tools |
 | **tools** | _object[]_ | A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;function*** | _object_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;arguments*** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;arguments*** | _unknown_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;description** | _string_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;name*** | _string_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;type*** | _string_ |  |
@@ -102,7 +102,7 @@ If `stream` is `false` (default), the response will be a JSON object with the fo
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;role** | _string_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;tool_calls** | _object[]_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;function** | _object_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;arguments** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;arguments** | _unknown_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;description** | _string_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;name** | _string_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;id** | _integer_ |  |
diff --git a/docs/api-inference/tasks/feature_extraction.md b/docs/api-inference/tasks/feature_extraction.md
new file mode 100644
index 000000000..76e61ca08
--- /dev/null
+++ b/docs/api-inference/tasks/feature_extraction.md
@@ -0,0 +1,111 @@
+## Feature Extraction
+
+Feature extraction is the task of converting a text into a vector (often called "embedding").
+Extracting features is useful for subtasks like sentence similarity, reranking and retrieval augmented generation (RAG).
+
+<Tip>
+
+For more details about the `feature-extraction` task, check out its [dedicated page](https://huggingface.co/tasks/feature-extraction)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+- [thenlper/gte-large](https://huggingface.co/thenlper/gte-large): A powerful feature extraction model for natural language processing tasks.
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=feature-extraction&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The text to embed. |
+| **normalize** | _boolean_ |  |
+| **prompt_name** | _string_ | The name of the prompt that should be used by for encoding. If not set, no prompt will be applied.  Must be a key in the `Sentence Transformers` configuration `prompts` dictionary.  For example if ``prompt_name`` is "query" and the ``prompts`` is {"query": "query: ", ...}, then the sentence "What is the capital of France?" will be encoded as "query: What is the capital of France?" because the prompt text will be prepended before any text to encode. |
+| **truncate** | _boolean_ |  |
+| **truncation_direction** | _enum_ | Possible values: Left, Right. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _array[]_ | Output is an array of arrays. |
+
+
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/thenlper/gte-large \
+	-X POST \
+	-d '{"inputs": "Today is a sunny day and I will get some ice cream."}' \
+	-H 'Content-Type: application/json' \
+	-H "Authorization: Bearer hf_***"
+
+```
+</curl>
+
+<python>
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/thenlper/gte-large"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+	
+output = query({
+	"inputs": "Today is a sunny day and I will get some ice cream.",
+})
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.feature_extraction).
+</python>
+
+<js>
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://api-inference.huggingface.co/models/thenlper/gte-large",
+		{
+			headers: {
+				Authorization: "Bearer hf_***"
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({"inputs": "Today is a sunny day and I will get some ice cream."}).then((response) => {
+	console.log(JSON.stringify(response));
+});
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#featureextraction).
+</js>
+
+</inferencesnippet>
+
+
diff --git a/docs/api-inference/tasks/image_classification.md b/docs/api-inference/tasks/image_classification.md
new file mode 100644
index 000000000..0825ce381
--- /dev/null
+++ b/docs/api-inference/tasks/image_classification.md
@@ -0,0 +1,111 @@
+## Image Classification
+
+Image classification is the task of assigning a label or class to an entire image. Images are expected to have only one class for each image. Image classification models take an image as input and return a prediction about which class the image belongs to.
+
+<Tip>
+
+For more details about the `image-classification` task, check out its [dedicated page](https://huggingface.co/tasks/image-classification)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+- [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224): A strong image classification model.
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-classification&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload. |
+| **parameters** | _object_ | Additional inference parameters for Image Classification |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;function_to_apply** | _enum_ | Possible values: sigmoid, softmax, none. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ | When specified, limits the output to the top K most probable classes. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | Output is an array of objects. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The predicted class label. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The corresponding probability. |
+
+
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/google/vit-base-patch16-224 \
+	-X POST \
+	--data-binary '@cats.jpg' \
+	-H "Authorization: Bearer hf_***"
+
+```
+</curl>
+
+<python>
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/google/vit-base-patch16-224"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(filename):
+    with open(filename, "rb") as f:
+        data = f.read()
+    response = requests.post(API_URL, headers=headers, data=data)
+    return response.json()
+
+output = query("cats.jpg")
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.image_classification).
+</python>
+
+<js>
+```js
+async function query(filename) {
+	const data = fs.readFileSync(filename);
+	const response = await fetch(
+		"https://api-inference.huggingface.co/models/google/vit-base-patch16-224",
+		{
+			headers: {
+				Authorization: "Bearer hf_***"
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: data,
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query("cats.jpg").then((response) => {
+	console.log(JSON.stringify(response));
+});
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#imageclassification).
+</js>
+
+</inferencesnippet>
+
+
diff --git a/docs/api-inference/tasks/image_segmentation.md b/docs/api-inference/tasks/image_segmentation.md
new file mode 100644
index 000000000..eb4dfd506
--- /dev/null
+++ b/docs/api-inference/tasks/image_segmentation.md
@@ -0,0 +1,114 @@
+## Image Segmentation
+
+Image Segmentation divides an image into segments where each pixel in the image is mapped to an object. This task has multiple variants such as instance segmentation, panoptic segmentation and semantic segmentation.
+
+<Tip>
+
+For more details about the `image-segmentation` task, check out its [dedicated page](https://huggingface.co/tasks/image-segmentation)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+- [nvidia/segformer-b0-finetuned-ade-512-512](https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512): Semantic segmentation model trained on ADE20k benchmark dataset with 512x512 resolution.
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-segmentation&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload. |
+| **parameters** | _object_ | Additional inference parameters for Image Segmentation |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;mask_threshold** | _number_ | Threshold to use when turning the predicted masks into binary values. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;overlap_mask_area_threshold** | _number_ | Mask overlap threshold to eliminate small, disconnected segments. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;subtask** | _enum_ | Possible values: instance, panoptic, semantic. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;threshold** | _number_ | Probability threshold to filter out predicted masks. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | A predicted mask / segment |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The label of the predicted segment. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;mask** | _string_ | The corresponding mask as a black-and-white image (base64-encoded). |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The score or confidence degree the model has. |
+
+
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512 \
+	-X POST \
+	--data-binary '@cats.jpg' \
+	-H "Authorization: Bearer hf_***"
+
+```
+</curl>
+
+<python>
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(filename):
+    with open(filename, "rb") as f:
+        data = f.read()
+    response = requests.post(API_URL, headers=headers, data=data)
+    return response.json()
+
+output = query("cats.jpg")
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.image_segmentation).
+</python>
+
+<js>
+```js
+async function query(filename) {
+	const data = fs.readFileSync(filename);
+	const response = await fetch(
+		"https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512",
+		{
+			headers: {
+				Authorization: "Bearer hf_***"
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: data,
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query("cats.jpg").then((response) => {
+	console.log(JSON.stringify(response));
+});
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#imagesegmentation).
+</js>
+
+</inferencesnippet>
+
+
diff --git a/docs/api-inference/tasks/image_to_image.md b/docs/api-inference/tasks/image_to_image.md
index 6c9037663..c116548d2 100644
--- a/docs/api-inference/tasks/image_to_image.md
+++ b/docs/api-inference/tasks/image_to_image.md
@@ -65,7 +65,7 @@ For more information about Inference API headers, check out the parameters [guid
 
 | Body |  |
 | :--- | :--- | :--- |
-| **image** | _object_ | The output image |
+| **image** | _unknown_ | The output image returned as raw bytes in the payload. |
 
 
 ### Using the API
diff --git a/docs/api-inference/tasks/object_detection.md b/docs/api-inference/tasks/object_detection.md
new file mode 100644
index 000000000..6c83e973f
--- /dev/null
+++ b/docs/api-inference/tasks/object_detection.md
@@ -0,0 +1,116 @@
+## Object detection
+
+Object Detection models allow users to identify objects of certain defined classes. Object detection models receive an image as input and output the images with bounding boxes and labels on detected objects.
+
+<Tip>
+
+For more details about the `object-detection` task, check out its [dedicated page](https://huggingface.co/tasks/object-detection)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+- [facebook/detr-resnet-50](https://huggingface.co/facebook/detr-resnet-50): Solid object detection model trained on the benchmark dataset COCO 2017.
+- [microsoft/beit-base-patch16-224-pt22k-ft22k](https://huggingface.co/microsoft/beit-base-patch16-224-pt22k-ft22k): Strong object detection model trained on ImageNet-21k dataset.
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=object-detection&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload. |
+| **parameters** | _object_ | Additional inference parameters for Object Detection |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;threshold** | _number_ | The probability necessary to make a prediction. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | Output is an array of objects. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The predicted label for the bounding box. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The associated score / probability. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;box** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;xmin** | _integer_ | The x-coordinate of the top-left corner of the bounding box. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;xmax** | _integer_ | The x-coordinate of the bottom-right corner of the bounding box. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;ymin** | _integer_ | The y-coordinate of the top-left corner of the bounding box. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;ymax** | _integer_ | The y-coordinate of the bottom-right corner of the bounding box. |
+
+
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/facebook/detr-resnet-50 \
+	-X POST \
+	--data-binary '@cats.jpg' \
+	-H "Authorization: Bearer hf_***"
+
+```
+</curl>
+
+<python>
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/facebook/detr-resnet-50"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(filename):
+    with open(filename, "rb") as f:
+        data = f.read()
+    response = requests.post(API_URL, headers=headers, data=data)
+    return response.json()
+
+output = query("cats.jpg")
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.object_detection).
+</python>
+
+<js>
+```js
+async function query(filename) {
+	const data = fs.readFileSync(filename);
+	const response = await fetch(
+		"https://api-inference.huggingface.co/models/facebook/detr-resnet-50",
+		{
+			headers: {
+				Authorization: "Bearer hf_***"
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: data,
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query("cats.jpg").then((response) => {
+	console.log(JSON.stringify(response));
+});
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#objectdetection).
+</js>
+
+</inferencesnippet>
+
+
diff --git a/docs/api-inference/tasks/text_generation.md b/docs/api-inference/tasks/text_generation.md
index aeb0a2d90..2b75a6222 100644
--- a/docs/api-inference/tasks/text_generation.md
+++ b/docs/api-inference/tasks/text_generation.md
@@ -48,10 +48,10 @@ This is only a subset of the supported models. Find the model that suits you bes
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;details** | _boolean_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;do_sample** | _boolean_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;frequency_penalty** | _number_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;grammar** | _object_ | One of the following: |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;grammar** | _unknown_ | One of the following: |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#1)** |  |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;type*** | _enum_ | Possible values: json. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;value*** | _object_ | A string that represents a [JSON Schema](https://json-schema.org/).  JSON Schema is a declarative language that allows to annotate JSON documents with types and descriptions. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;value*** | _unknown_ | A string that represents a [JSON Schema](https://json-schema.org/).  JSON Schema is a declarative language that allows to annotate JSON documents with types and descriptions. |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#2)** |  |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;type*** | _enum_ | Possible values: regex. |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;value*** | _string_ |  |
diff --git a/docs/api-inference/tasks/text_to_image.md b/docs/api-inference/tasks/text_to_image.md
index 7fbf31c6f..29783810a 100644
--- a/docs/api-inference/tasks/text_to_image.md
+++ b/docs/api-inference/tasks/text_to_image.md
@@ -61,7 +61,7 @@ For more information about Inference API headers, check out the parameters [guid
 
 | Body |  |
 | :--- | :--- | :--- |
-| **image** | _object_ | The generated image |
+| **image** | _unknown_ | The generated image returned as raw bytes in the payload. |
 
 
 ### Using the API
diff --git a/docs/api-inference/tasks/token_classification.md b/docs/api-inference/tasks/token_classification.md
new file mode 100644
index 000000000..df34ab08b
--- /dev/null
+++ b/docs/api-inference/tasks/token_classification.md
@@ -0,0 +1,129 @@
+## Token Classification
+
+Token classification is a task in which a label is assigned to some tokens in a text. Some popular token classification subtasks are Named Entity Recognition (NER) and Part-of-Speech (PoS) tagging.
+
+<Tip>
+
+For more details about the `token-classification` task, check out its [dedicated page](https://huggingface.co/tasks/token-classification)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+- [dslim/bert-base-NER](https://huggingface.co/dslim/bert-base-NER): A robust performance model to identify people, locations, organizations and names of miscellaneous entities.
+- [flair/ner-english](https://huggingface.co/flair/ner-english): Flair models are typically the state of the art in named entity recognition tasks.
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=token-classification&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The input text data |
+| **parameters** | _object_ | Additional inference parameters for Token Classification |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;ignore_labels** | _string[]_ | A list of labels to ignore |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;stride** | _integer_ | The number of overlapping tokens between chunks when splitting the input text. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;aggregation_strategy** | _string_ | One of the following: |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#1)** | _&#x27;none&#x27;_ | Do not aggregate tokens |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#2)** | _&#x27;simple&#x27;_ | Group consecutive tokens with the same label in a single entity. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#3)** | _&#x27;first&#x27;_ | Similar to "simple", also preserves word integrity (use the label predicted for the first token in a word). |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#4)** | _&#x27;average&#x27;_ | Similar to "simple", also preserves word integrity (uses the label with the highest score, averaged across the word's tokens). |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#5)** | _&#x27;max&#x27;_ | Similar to "simple", also preserves word integrity (uses the label with the highest score across the word's tokens). |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+Output type depends on the `stream` input parameter.
+If `stream` is `false` (default), the response will be a JSON object with the following fields:
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | Output is an array of objects. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;entity_group** | _string_ | The predicted label for that group of tokens |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The associated score / probability |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;word** | _string_ | The corresponding text |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;start** | _integer_ | The character position in the input where this group begins. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;end** | _integer_ | The character position in the input where this group ends. |
+
+
+If `stream` is `true`, generated tokens are returned as a stream, using Server-Sent Events (SSE).
+For more information about streaming, check out [this guide](https://huggingface.co/docs/token-classification-inference/conceptual/streaming).
+
+
+
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/dslim/bert-base-NER \
+	-X POST \
+	-d '{"inputs": "My name is Sarah Jessica Parker but you can call me Jessica"}' \
+	-H 'Content-Type: application/json' \
+	-H "Authorization: Bearer hf_***"
+
+```
+</curl>
+
+<python>
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/dslim/bert-base-NER"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+	
+output = query({
+	"inputs": "My name is Sarah Jessica Parker but you can call me Jessica",
+})
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.token_classification).
+</python>
+
+<js>
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://api-inference.huggingface.co/models/dslim/bert-base-NER",
+		{
+			headers: {
+				Authorization: "Bearer hf_***"
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({"inputs": "My name is Sarah Jessica Parker but you can call me Jessica"}).then((response) => {
+	console.log(JSON.stringify(response));
+});
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#tokenclassification).
+</js>
+
+</inferencesnippet>
+
+
diff --git a/docs/api-inference/tasks/translation.md b/docs/api-inference/tasks/translation.md
new file mode 100644
index 000000000..b92c0abe6
--- /dev/null
+++ b/docs/api-inference/tasks/translation.md
@@ -0,0 +1,113 @@
+## Translation
+
+Translation is the task of converting text from one language to another.
+
+<Tip>
+
+For more details about the `translation` task, check out its [dedicated page](https://huggingface.co/tasks/translation)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+- [Helsinki-NLP/opus-mt-en-fr](https://huggingface.co/Helsinki-NLP/opus-mt-en-fr): A model that translates from English to French.
+- [t5-base](https://huggingface.co/t5-base): A general-purpose Transformer that can be used to translate from English to German, French, or Romanian.
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=translation&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The text to translate. |
+| **parameters** | _object_ | Additional inference parameters for Translation |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;src_lang** | _string_ | The source language of the text. Required for models that can translate from multiple languages. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;tgt_lang** | _string_ | Target language to translate to. Required for models that can translate to multiple languages. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;clean_up_tokenization_spaces** | _boolean_ | Whether to clean up the potential extra spaces in the text output. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;truncation** | _enum_ | Possible values: do_not_truncate, longest_first, only_first, only_second. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;generate_parameters** | _object_ | Additional parametrization of the text generation algorithm. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **translation_text** | _string_ | The translated text. |
+
+
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/Helsinki-NLP/opus-mt-en-fr \
+	-X POST \
+	-d '{"inputs": "Меня зовут Вольфганг и я живу в Берлине"}' \
+	-H 'Content-Type: application/json' \
+	-H "Authorization: Bearer hf_***"
+
+```
+</curl>
+
+<python>
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/Helsinki-NLP/opus-mt-en-fr"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+	
+output = query({
+	"inputs": "Меня зовут Вольфганг и я живу в Берлине",
+})
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.translation).
+</python>
+
+<js>
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://api-inference.huggingface.co/models/Helsinki-NLP/opus-mt-en-fr",
+		{
+			headers: {
+				Authorization: "Bearer hf_***"
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({"inputs": "Меня зовут Вольфганг и я живу в Берлине"}).then((response) => {
+	console.log(JSON.stringify(response));
+});
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#translation).
+</js>
+
+</inferencesnippet>
+
+
diff --git a/docs/api-inference/tasks/zero_shot_classification.md b/docs/api-inference/tasks/zero_shot_classification.md
new file mode 100644
index 000000000..7460ddbdd
--- /dev/null
+++ b/docs/api-inference/tasks/zero_shot_classification.md
@@ -0,0 +1,114 @@
+## Zero-Shot Classification
+
+Zero-shot text classification is super useful to try out classification with zero code, you simply pass a sentence/paragraph and the possible labels for that sentence, and you get a result. The model has not been necessarily trained on the labels you provide, but it can still predict the correct label.
+
+<Tip>
+
+For more details about the `zero-shot-classification` task, check out its [dedicated page](https://huggingface.co/tasks/zero-shot-classification)! You will find examples and related materials.
+
+</Tip>
+
+### Recommended models
+
+- [facebook/bart-large-mnli](https://huggingface.co/facebook/bart-large-mnli): Powerful zero-shot text classification model
+
+This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=zero-shot-classification&sort=trending).
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _object_ | The input text data, with candidate labels |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;text*** | _string_ | The text to classify |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;candidateLabels*** | _string[]_ | The set of possible class labels to classify the text into. |
+| **parameters** | _object_ | Additional inference parameters for Zero Shot Classification |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;hypothesis_template** | _string_ | The sentence used in conjunction with candidateLabels to attempt the text classification by replacing the placeholder with the candidate labels. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;multi_label** | _boolean_ | Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of the label likelihoods for each sequence is 1. If true, the labels are considered independent and probabilities are normalized for each candidate. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | Output is an array of objects. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The predicted class label. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The corresponding probability. |
+
+
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/facebook/bart-large-mnli \
+	-X POST \
+	-d '{"inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!", "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}' \
+	-H 'Content-Type: application/json' \
+	-H "Authorization: Bearer hf_***"
+
+```
+</curl>
+
+<python>
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-mnli"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+
+output = query({
+    "inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!",
+    "parameters": {"candidate_labels": ["refund", "legal", "faq"]},
+})
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.zero_shot-classification).
+</python>
+
+<js>
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://api-inference.huggingface.co/models/facebook/bart-large-mnli",
+		{
+			headers: {
+				Authorization: "Bearer hf_***"
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({"inputs": "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!", "parameters": {"candidate_labels": ["refund", "legal", "faq"]}}).then((response) => {
+	console.log(JSON.stringify(response));
+});
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#zeroshot-classification).
+</js>
+
+</inferencesnippet>
+
+
diff --git a/scripts/api-inference/scripts/generate.ts b/scripts/api-inference/scripts/generate.ts
index e81435a8a..96e92d6c4 100644
--- a/scripts/api-inference/scripts/generate.ts
+++ b/scripts/api-inference/scripts/generate.ts
@@ -5,14 +5,23 @@ import * as path from "node:path/posix";
 import type { JsonObject } from "type-fest";
 
 const TASKS: PipelineType[] = [
+  "automatic-speech-recognition",
+  "audio-classification",
+  "feature-extraction",
   "fill-mask",
+  "image-classification",
+  "image-segmentation",
   "image-to-image",
+  "object-detection",
   "question-answering",
   "summarization",
   "table-question-answering",
   "text-classification",
   "text-generation",
   "text-to-image",
+  "token-classification",
+  "translation",
+  "zero-shot-classification",
 ];
 const TASKS_EXTENDED = [...TASKS, "chat-completion"];
 const SPECS_REVISION = "update-specification-for-docs";
@@ -184,13 +193,13 @@ function processPayloadSchema(schema: any): JsonObject[] {
     parentPrefix: string,
   ): void {
     const isRequired = required;
-    let type = value.type || "object";
+    let type = value.type || "unknown";
     let description = value.description || "";
 
     if (value.$ref) {
       // Resolve the reference
       value = resolveRef(value.$ref);
-      type = value.type || "object";
+      type = value.type || "unknown";
       description = value.description || "";
     }
 
@@ -223,8 +232,9 @@ function processPayloadSchema(schema: any): JsonObject[] {
     if (addRow) {
       // Add the row to the table except if combination with only one option
       if (key.includes("(#")) {
-        // If it's a combination, no need to re-specify the type
-        type = "";
+        // If it's a combination, no need to re-specify the type except if it's to
+        // specify a constant value.
+        type = value.const ? `'${value.const}'` : "";
       }
       const row = {
         name: `${parentPrefix}${key}`,
diff --git a/scripts/api-inference/templates/task/audio_classification.handlebars b/scripts/api-inference/templates/task/audio_classification.handlebars
new file mode 100644
index 000000000..5f866f728
--- /dev/null
+++ b/scripts/api-inference/templates/task/audio_classification.handlebars
@@ -0,0 +1,29 @@
+## Audio Classification
+
+Audio classification is the task of assigning a label or class to a given audio. It can be used for recognizing which command a user is giving or the emotion of a statement, as well as identifying a speaker.
+
+{{{tips.linksToTaskPage.audio-classification}}}
+
+### Recommended models
+
+{{#each models.audio-classification}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.audio-classification}}}
+
+### API specification
+
+#### Request
+
+{{{specs.audio-classification.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+{{{specs.audio-classification.output}}}
+
+### Using the API
+
+{{{snippets.audio-classification}}}
diff --git a/scripts/api-inference/templates/task/automatic_speech_recognition.handlebars b/scripts/api-inference/templates/task/automatic_speech_recognition.handlebars
new file mode 100644
index 000000000..008c65030
--- /dev/null
+++ b/scripts/api-inference/templates/task/automatic_speech_recognition.handlebars
@@ -0,0 +1,28 @@
+## Automatic Speech Recognition
+
+Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.
+{{{tips.linksToTaskPage.automatic-speech-recognition}}}
+
+### Recommended models
+
+{{#each models.automatic-speech-recognition}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.automatic-speech-recognition}}}
+
+### API specification
+
+#### Request
+
+{{{specs.automatic-speech-recognition.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+{{{specs.automatic-speech-recognition.output}}}
+
+### Using the API
+
+{{{snippets.automatic-speech-recognition}}}
diff --git a/scripts/api-inference/templates/task/feature_extraction.handlebars b/scripts/api-inference/templates/task/feature_extraction.handlebars
new file mode 100644
index 000000000..7e6f1b4be
--- /dev/null
+++ b/scripts/api-inference/templates/task/feature_extraction.handlebars
@@ -0,0 +1,30 @@
+## Feature Extraction
+
+Feature extraction is the task of converting a text into a vector (often called "embedding").
+Extracting features is useful for subtasks like sentence similarity, reranking and retrieval augmented generation (RAG).
+
+{{{tips.linksToTaskPage.feature-extraction}}}
+
+### Recommended models
+
+{{#each models.feature-extraction}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.feature-extraction}}}
+
+### API specification
+
+#### Request
+
+{{{specs.feature-extraction.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+{{{specs.feature-extraction.output}}}
+
+### Using the API
+
+{{{snippets.feature-extraction}}}
diff --git a/scripts/api-inference/templates/task/image_classification.handlebars b/scripts/api-inference/templates/task/image_classification.handlebars
new file mode 100644
index 000000000..abfa0a147
--- /dev/null
+++ b/scripts/api-inference/templates/task/image_classification.handlebars
@@ -0,0 +1,29 @@
+## Image Classification
+
+Image classification is the task of assigning a label or class to an entire image. Images are expected to have only one class for each image. Image classification models take an image as input and return a prediction about which class the image belongs to.
+
+{{{tips.linksToTaskPage.image-classification}}}
+
+### Recommended models
+
+{{#each models.image-classification}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.image-classification}}}
+
+### API specification
+
+#### Request
+
+{{{specs.image-classification.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+{{{specs.image-classification.output}}}
+
+### Using the API
+
+{{{snippets.image-classification}}}
diff --git a/scripts/api-inference/templates/task/image_segmentation.handlebars b/scripts/api-inference/templates/task/image_segmentation.handlebars
new file mode 100644
index 000000000..8f81ad5d2
--- /dev/null
+++ b/scripts/api-inference/templates/task/image_segmentation.handlebars
@@ -0,0 +1,29 @@
+## Image Segmentation
+
+Image Segmentation divides an image into segments where each pixel in the image is mapped to an object. This task has multiple variants such as instance segmentation, panoptic segmentation and semantic segmentation.
+
+{{{tips.linksToTaskPage.image-segmentation}}}
+
+### Recommended models
+
+{{#each models.image-segmentation}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.image-segmentation}}}
+
+### API specification
+
+#### Request
+
+{{{specs.image-segmentation.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+{{{specs.image-segmentation.output}}}
+
+### Using the API
+
+{{{snippets.image-segmentation}}}
diff --git a/scripts/api-inference/templates/task/object_detection.handlebars b/scripts/api-inference/templates/task/object_detection.handlebars
new file mode 100644
index 000000000..5e90a3092
--- /dev/null
+++ b/scripts/api-inference/templates/task/object_detection.handlebars
@@ -0,0 +1,29 @@
+## Object detection
+
+Object Detection models allow users to identify objects of certain defined classes. Object detection models receive an image as input and output the images with bounding boxes and labels on detected objects.
+
+{{{tips.linksToTaskPage.object-detection}}}
+
+### Recommended models
+
+{{#each models.object-detection}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.object-detection}}}
+
+### API specification
+
+#### Request
+
+{{{specs.object-detection.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+{{{specs.object-detection.output}}}
+
+### Using the API
+
+{{{snippets.object-detection}}}
diff --git a/scripts/api-inference/templates/task/token_classification.handlebars b/scripts/api-inference/templates/task/token_classification.handlebars
new file mode 100644
index 000000000..44f682145
--- /dev/null
+++ b/scripts/api-inference/templates/task/token_classification.handlebars
@@ -0,0 +1,37 @@
+## Token Classification
+
+Token classification is a task in which a label is assigned to some tokens in a text. Some popular token classification subtasks are Named Entity Recognition (NER) and Part-of-Speech (PoS) tagging.
+
+{{{tips.linksToTaskPage.token-classification}}}
+
+### Recommended models
+
+{{#each models.token-classification}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.token-classification}}}
+
+### API specification
+
+#### Request
+
+{{{specs.token-classification.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+Output type depends on the `stream` input parameter.
+If `stream` is `false` (default), the response will be a JSON object with the following fields:
+
+{{{specs.token-classification.output}}}
+
+If `stream` is `true`, generated tokens are returned as a stream, using Server-Sent Events (SSE).
+For more information about streaming, check out [this guide](https://huggingface.co/docs/token-classification-inference/conceptual/streaming).
+
+{{{specs.token-classification.stream_output}}}
+
+### Using the API
+
+{{{snippets.token-classification}}}
diff --git a/scripts/api-inference/templates/task/translation.handlebars b/scripts/api-inference/templates/task/translation.handlebars
new file mode 100644
index 000000000..02892102b
--- /dev/null
+++ b/scripts/api-inference/templates/task/translation.handlebars
@@ -0,0 +1,29 @@
+## Translation
+
+Translation is the task of converting text from one language to another.
+
+{{{tips.linksToTaskPage.translation}}}
+
+### Recommended models
+
+{{#each models.translation}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.translation}}}
+
+### API specification
+
+#### Request
+
+{{{specs.translation.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+{{{specs.translation.output}}}
+
+### Using the API
+
+{{{snippets.translation}}}
diff --git a/scripts/api-inference/templates/task/zero_shot_classification.handlebars b/scripts/api-inference/templates/task/zero_shot_classification.handlebars
new file mode 100644
index 000000000..fd631a656
--- /dev/null
+++ b/scripts/api-inference/templates/task/zero_shot_classification.handlebars
@@ -0,0 +1,29 @@
+## Zero-Shot Classification
+
+Zero-shot text classification is super useful to try out classification with zero code, you simply pass a sentence/paragraph and the possible labels for that sentence, and you get a result. The model has not been necessarily trained on the labels you provide, but it can still predict the correct label.
+
+{{{tips.linksToTaskPage.zero-shot-classification}}}
+
+### Recommended models
+
+{{#each models.zero-shot-classification}}
+- [{{this.id}}](https://huggingface.co/{{this.id}}): {{this.description}}
+{{/each}}
+
+{{{tips.listModelsLink.zero-shot-classification}}}
+
+### API specification
+
+#### Request
+
+{{{specs.zero-shot-classification.input}}}
+
+{{{constants.specsHeaders}}}
+
+#### Response
+
+{{{specs.zero-shot-classification.output}}}
+
+### Using the API
+
+{{{snippets.zero-shot-classification}}}

From 528ea95d8f2fbd5ef2c14ca2f477fc1dfe83c4f5 Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Fri, 30 Aug 2024 10:32:34 +0200
Subject: [PATCH 31/38] regenerate

---
 docs/api-inference/tasks/audio_classification.md   | 14 ++++++++++++++
 .../tasks/automatic_speech_recognition.md          | 14 ++++++++++++++
 docs/api-inference/tasks/feature_extraction.md     | 14 ++++++++++++++
 docs/api-inference/tasks/image_classification.md   | 14 ++++++++++++++
 docs/api-inference/tasks/image_segmentation.md     | 14 ++++++++++++++
 docs/api-inference/tasks/object_detection.md       | 14 ++++++++++++++
 docs/api-inference/tasks/token_classification.md   | 14 ++++++++++++++
 docs/api-inference/tasks/translation.md            | 14 ++++++++++++++
 .../tasks/zero_shot_classification.md              | 14 ++++++++++++++
 9 files changed, 126 insertions(+)

diff --git a/docs/api-inference/tasks/audio_classification.md b/docs/api-inference/tasks/audio_classification.md
index b4be60e6a..78aacebd3 100644
--- a/docs/api-inference/tasks/audio_classification.md
+++ b/docs/api-inference/tasks/audio_classification.md
@@ -1,3 +1,17 @@
+<!---
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/audio_classification.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/audio-classification/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/audio-classification/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
+--->
+
 ## Audio Classification
 
 Audio classification is the task of assigning a label or class to a given audio. It can be used for recognizing which command a user is giving or the emotion of a statement, as well as identifying a speaker.
diff --git a/docs/api-inference/tasks/automatic_speech_recognition.md b/docs/api-inference/tasks/automatic_speech_recognition.md
index ae84ed195..1362e0ccc 100644
--- a/docs/api-inference/tasks/automatic_speech_recognition.md
+++ b/docs/api-inference/tasks/automatic_speech_recognition.md
@@ -1,3 +1,17 @@
+<!---
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/automatic_speech_recognition.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/automatic-speech-recognition/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
+--->
+
 ## Automatic Speech Recognition
 
 Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.
diff --git a/docs/api-inference/tasks/feature_extraction.md b/docs/api-inference/tasks/feature_extraction.md
index 76e61ca08..0d2f46dcf 100644
--- a/docs/api-inference/tasks/feature_extraction.md
+++ b/docs/api-inference/tasks/feature_extraction.md
@@ -1,3 +1,17 @@
+<!---
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/feature_extraction.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/feature-extraction/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/feature-extraction/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
+--->
+
 ## Feature Extraction
 
 Feature extraction is the task of converting a text into a vector (often called "embedding").
diff --git a/docs/api-inference/tasks/image_classification.md b/docs/api-inference/tasks/image_classification.md
index 0825ce381..7060c35a8 100644
--- a/docs/api-inference/tasks/image_classification.md
+++ b/docs/api-inference/tasks/image_classification.md
@@ -1,3 +1,17 @@
+<!---
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/image_classification.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/image-classification/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/image-classification/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
+--->
+
 ## Image Classification
 
 Image classification is the task of assigning a label or class to an entire image. Images are expected to have only one class for each image. Image classification models take an image as input and return a prediction about which class the image belongs to.
diff --git a/docs/api-inference/tasks/image_segmentation.md b/docs/api-inference/tasks/image_segmentation.md
index eb4dfd506..c88e29655 100644
--- a/docs/api-inference/tasks/image_segmentation.md
+++ b/docs/api-inference/tasks/image_segmentation.md
@@ -1,3 +1,17 @@
+<!---
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/image_segmentation.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/image-segmentation/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/image-segmentation/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
+--->
+
 ## Image Segmentation
 
 Image Segmentation divides an image into segments where each pixel in the image is mapped to an object. This task has multiple variants such as instance segmentation, panoptic segmentation and semantic segmentation.
diff --git a/docs/api-inference/tasks/object_detection.md b/docs/api-inference/tasks/object_detection.md
index 6c83e973f..76b9dbcc5 100644
--- a/docs/api-inference/tasks/object_detection.md
+++ b/docs/api-inference/tasks/object_detection.md
@@ -1,3 +1,17 @@
+<!---
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/object_detection.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/object-detection/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/object-detection/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
+--->
+
 ## Object detection
 
 Object Detection models allow users to identify objects of certain defined classes. Object detection models receive an image as input and output the images with bounding boxes and labels on detected objects.
diff --git a/docs/api-inference/tasks/token_classification.md b/docs/api-inference/tasks/token_classification.md
index df34ab08b..1113cf39e 100644
--- a/docs/api-inference/tasks/token_classification.md
+++ b/docs/api-inference/tasks/token_classification.md
@@ -1,3 +1,17 @@
+<!---
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/token_classification.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/token-classification/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/token-classification/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
+--->
+
 ## Token Classification
 
 Token classification is a task in which a label is assigned to some tokens in a text. Some popular token classification subtasks are Named Entity Recognition (NER) and Part-of-Speech (PoS) tagging.
diff --git a/docs/api-inference/tasks/translation.md b/docs/api-inference/tasks/translation.md
index b92c0abe6..c924a8bd0 100644
--- a/docs/api-inference/tasks/translation.md
+++ b/docs/api-inference/tasks/translation.md
@@ -1,3 +1,17 @@
+<!---
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/translation.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/translation/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/translation/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
+--->
+
 ## Translation
 
 Translation is the task of converting text from one language to another.
diff --git a/docs/api-inference/tasks/zero_shot_classification.md b/docs/api-inference/tasks/zero_shot_classification.md
index 7460ddbdd..0c2f4dfa9 100644
--- a/docs/api-inference/tasks/zero_shot_classification.md
+++ b/docs/api-inference/tasks/zero_shot_classification.md
@@ -1,3 +1,17 @@
+<!---
+This markdown file has been generated from a script. Please do not edit it directly.
+For more details, check out:
+- the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/zero_shot_classification.handlebars
+- the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/zero-shot-classification/spec/input.json
+- the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/zero-shot-classification/spec/output.json
+- the snippets used to generate the example:
+  - curl: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/curl.ts
+  - python: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/python.ts
+  - javascript: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/snippets/js.ts
+- the "tasks" content for recommended models: https://huggingface.co/api/tasks
+--->
+
 ## Zero-Shot Classification
 
 Zero-shot text classification is super useful to try out classification with zero code, you simply pass a sentence/paragraph and the possible labels for that sentence, and you get a result. The model has not been necessarily trained on the labels you provide, but it can still predict the correct label.

From f267d8601180c03551cf1649e04a999861c98930 Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Fri, 30 Aug 2024 10:50:35 +0200
Subject: [PATCH 32/38] pull from main

---
 scripts/api-inference/scripts/generate.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/api-inference/scripts/generate.ts b/scripts/api-inference/scripts/generate.ts
index 96e92d6c4..c83b7c1c6 100644
--- a/scripts/api-inference/scripts/generate.ts
+++ b/scripts/api-inference/scripts/generate.ts
@@ -24,7 +24,7 @@ const TASKS: PipelineType[] = [
   "zero-shot-classification",
 ];
 const TASKS_EXTENDED = [...TASKS, "chat-completion"];
-const SPECS_REVISION = "update-specification-for-docs";
+const SPECS_REVISION = "main";
 
 const inferenceSnippetLanguages = ["python", "js", "curl"] as const;
 type InferenceSnippetLanguage = (typeof inferenceSnippetLanguages)[number];

From ed5e37b936c3de8e384e950ce38ec6520b8cc962 Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Wed, 4 Sep 2024 12:17:21 +0200
Subject: [PATCH 33/38] coding style

---
 scripts/api-inference/scripts/generate.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/api-inference/scripts/generate.ts b/scripts/api-inference/scripts/generate.ts
index c83b7c1c6..442a8ab7f 100644
--- a/scripts/api-inference/scripts/generate.ts
+++ b/scripts/api-inference/scripts/generate.ts
@@ -403,7 +403,7 @@ TASKS.forEach((task) => {
 TASKS.forEach((task) => {
   // Let's take as example the first available model that is recommended.
   // Otherwise, fallback to "<REPO_ID>".
-  const mainModel = DATA.models[task][0]?.id || "<REPO_ID>";
+  const mainModel = DATA.models[task][0]?.id ?? "<REPO_ID>";
   const taskSnippets = {
     curl: getInferenceSnippet(mainModel, task, "curl"),
     python: getInferenceSnippet(mainModel, task, "python"),

From 2e1e64d01c654619f7c94875a391b2d9d65e70c6 Mon Sep 17 00:00:00 2001
From: Omar Sanseviero <osanseviero@gmail.com>
Date: Wed, 4 Sep 2024 13:04:56 +0200
Subject: [PATCH 34/38] Update _redirects.yml

---
 docs/api-inference/_redirects.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/api-inference/_redirects.yml b/docs/api-inference/_redirects.yml
index f26e94330..aab354ba5 100644
--- a/docs/api-inference/_redirects.yml
+++ b/docs/api-inference/_redirects.yml
@@ -1,5 +1,5 @@
 quicktour: index
 detailed_parameters: parameters
-parallelism: TODO
+parallelism: getting_started
 usage: getting_started
-faq: index
\ No newline at end of file
+faq: index

From bf973e0f8a7310faa219931f973c838a3472b924 Mon Sep 17 00:00:00 2001
From: Lucain <lucain@huggingface.co>
Date: Wed, 4 Sep 2024 17:41:04 +0200
Subject: [PATCH 35/38] Rename all tasks '_' to '-' (#1405)

* Rename all tasks '_' to '-'

* also for other urls
---
 docs/api-inference/_toctree.yml               | 38 +++++++++----------
 ...{getting_started.md => getting-started.md} |  0
 .../{rate_limits.md => rate-limits.md}        |  0
 ...upported_models.md => supported-models.md} |  2 +-
 ...ssification.md => audio-classification.md} |  2 +-
 ...ion.md => automatic-speech-recognition.md} |  2 +-
 ...{chat_completion.md => chat-completion.md} |  2 +-
 ...re_extraction.md => feature-extraction.md} |  2 +-
 .../tasks/{fill_mask.md => fill-mask.md}      |  2 +-
 ...ssification.md => image-classification.md} |  2 +-
 ..._segmentation.md => image-segmentation.md} |  2 +-
 .../{image_to_image.md => image-to-image.md}  |  2 +-
 ...bject_detection.md => object-detection.md} |  2 +-
 ...ion_answering.md => question-answering.md} |  2 +-
 ...swering.md => table-question-answering.md} |  2 +-
 ...assification.md => text-classification.md} |  2 +-
 ...{text_generation.md => text-generation.md} |  2 +-
 .../{text_to_image.md => text-to-image.md}    |  9 +++--
 ...ssification.md => token-classification.md} |  2 +-
 ...ication.md => zero-shot-classification.md} |  2 +-
 scripts/api-inference/scripts/generate.ts     | 10 ++---
 ...ader.handlebars => page-header.handlebars} |  2 +-
 ...andlebars => snippets-template.handlebars} |  0
 ...rs.handlebars => specs-headers.handlebars} |  0
 ...put.handlebars => specs-output.handlebars} |  0
 ...ad.handlebars => specs-payload.handlebars} |  0
 ...lebars => audio-classification.handlebars} |  0
 ...> automatic-speech-recognition.handlebars} |  0
 ....handlebars => chat-completion.handlebars} |  0
 ...ndlebars => feature-extraction.handlebars} |  0
 ...l_mask.handlebars => fill-mask.handlebars} |  0
 ...lebars => image-classification.handlebars} |  0
 ...ndlebars => image-segmentation.handlebars} |  0
 ...e.handlebars => image-to-image.handlebars} |  0
 ...handlebars => object-detection.handlebars} |  0
 ...ndlebars => question-answering.handlebars} |  0
 ...rs => table-question-answering.handlebars} |  0
 ...dlebars => text-classification.handlebars} |  0
 ....handlebars => text-generation.handlebars} |  0
 ...ge.handlebars => text-to-image.handlebars} |  0
 ...lebars => token-classification.handlebars} |  0
 ...rs => zero-shot-classification.handlebars} |  0
 42 files changed, 45 insertions(+), 46 deletions(-)
 rename docs/api-inference/{getting_started.md => getting-started.md} (100%)
 rename docs/api-inference/{rate_limits.md => rate-limits.md} (100%)
 rename docs/api-inference/{supported_models.md => supported-models.md} (97%)
 rename docs/api-inference/tasks/{audio_classification.md => audio-classification.md} (99%)
 rename docs/api-inference/tasks/{automatic_speech_recognition.md => automatic-speech-recognition.md} (99%)
 rename docs/api-inference/tasks/{chat_completion.md => chat-completion.md} (99%)
 rename docs/api-inference/tasks/{feature_extraction.md => feature-extraction.md} (99%)
 rename docs/api-inference/tasks/{fill_mask.md => fill-mask.md} (99%)
 rename docs/api-inference/tasks/{image_classification.md => image-classification.md} (99%)
 rename docs/api-inference/tasks/{image_segmentation.md => image-segmentation.md} (99%)
 rename docs/api-inference/tasks/{image_to_image.md => image-to-image.md} (99%)
 rename docs/api-inference/tasks/{object_detection.md => object-detection.md} (99%)
 rename docs/api-inference/tasks/{question_answering.md => question-answering.md} (99%)
 rename docs/api-inference/tasks/{table_question_answering.md => table-question-answering.md} (99%)
 rename docs/api-inference/tasks/{text_classification.md => text-classification.md} (99%)
 rename docs/api-inference/tasks/{text_generation.md => text-generation.md} (99%)
 rename docs/api-inference/tasks/{text_to_image.md => text-to-image.md} (91%)
 rename docs/api-inference/tasks/{token_classification.md => token-classification.md} (99%)
 rename docs/api-inference/tasks/{zero_shot_classification.md => zero-shot-classification.md} (99%)
 rename scripts/api-inference/templates/common/{page_header.handlebars => page-header.handlebars} (96%)
 rename scripts/api-inference/templates/common/{snippets_template.handlebars => snippets-template.handlebars} (100%)
 rename scripts/api-inference/templates/common/{specs_headers.handlebars => specs-headers.handlebars} (100%)
 rename scripts/api-inference/templates/common/{specs_output.handlebars => specs-output.handlebars} (100%)
 rename scripts/api-inference/templates/common/{specs_payload.handlebars => specs-payload.handlebars} (100%)
 rename scripts/api-inference/templates/task/{audio_classification.handlebars => audio-classification.handlebars} (100%)
 rename scripts/api-inference/templates/task/{automatic_speech_recognition.handlebars => automatic-speech-recognition.handlebars} (100%)
 rename scripts/api-inference/templates/task/{chat_completion.handlebars => chat-completion.handlebars} (100%)
 rename scripts/api-inference/templates/task/{feature_extraction.handlebars => feature-extraction.handlebars} (100%)
 rename scripts/api-inference/templates/task/{fill_mask.handlebars => fill-mask.handlebars} (100%)
 rename scripts/api-inference/templates/task/{image_classification.handlebars => image-classification.handlebars} (100%)
 rename scripts/api-inference/templates/task/{image_segmentation.handlebars => image-segmentation.handlebars} (100%)
 rename scripts/api-inference/templates/task/{image_to_image.handlebars => image-to-image.handlebars} (100%)
 rename scripts/api-inference/templates/task/{object_detection.handlebars => object-detection.handlebars} (100%)
 rename scripts/api-inference/templates/task/{question_answering.handlebars => question-answering.handlebars} (100%)
 rename scripts/api-inference/templates/task/{table_question_answering.handlebars => table-question-answering.handlebars} (100%)
 rename scripts/api-inference/templates/task/{text_classification.handlebars => text-classification.handlebars} (100%)
 rename scripts/api-inference/templates/task/{text_generation.handlebars => text-generation.handlebars} (100%)
 rename scripts/api-inference/templates/task/{text_to_image.handlebars => text-to-image.handlebars} (100%)
 rename scripts/api-inference/templates/task/{token_classification.handlebars => token-classification.handlebars} (100%)
 rename scripts/api-inference/templates/task/{zero_shot_classification.handlebars => zero-shot-classification.handlebars} (100%)

diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
index fc03265c1..c3cea310e 100644
--- a/docs/api-inference/_toctree.yml
+++ b/docs/api-inference/_toctree.yml
@@ -1,52 +1,52 @@
 - sections:
   - local: index
     title: Serverless Inference API
-  - local: getting_started
+  - local: getting-started
     title: Getting Started
-  - local: supported_models
+  - local: supported-models
     title: Supported Models
-  - local: rate_limits
+  - local: rate-limits
     title: Rate Limits
   title: Getting Started
 - sections:
   - local: parameters
     title: Parameters
   - sections:
-    - local: tasks/audio_classification
+    - local: tasks/audio-classification
       title: Audio Classification
-    - local: tasks/automatic_speech_recognition
+    - local: tasks/automatic-speech-recognition
       title: Automatic Speech Recognition
-    - local: tasks/chat_completion
+    - local: tasks/chat-completion
       title: Chat Completion
-    - local: tasks/feature_extraction
+    - local: tasks/feature-extraction
       title: Feature Extraction
-    - local: tasks/fill_mask
+    - local: tasks/fill-mask
       title: Fill Mask
-    - local: tasks/image_classification
+    - local: tasks/image-classification
       title: Image Classification
-    - local: tasks/image_segmentation
+    - local: tasks/image-segmentation
       title: Image Segmentation
-    - local: tasks/image_to_image
+    - local: tasks/image-to-image
       title: Image to Image
-    - local: tasks/object_detection
+    - local: tasks/object-detection
       title: Object Detection
-    - local: tasks/question_answering
+    - local: tasks/question-answering
       title: Question Answering
     - local: tasks/summarization
       title: Summarization
-    - local: tasks/table_question_answering
+    - local: tasks/table-question-answering
       title: Table Question Answering
-    - local: tasks/text_classification
+    - local: tasks/text-classification
       title: Text Classification
-    - local: tasks/text_generation
+    - local: tasks/text-generation
       title: Text Generation
-    - local: tasks/text_to_image
+    - local: tasks/text-to-image
       title: Text to Image
-    - local: tasks/token_classification
+    - local: tasks/token-classification
       title: Token Classification
     - local: tasks/translation
       title: Translation
-    - local: tasks/zero_shot_classification
+    - local: tasks/zero-shot-classification
       title: Zero Shot Classification
     title: Detailed Task Parameters
   title: API Reference
\ No newline at end of file
diff --git a/docs/api-inference/getting_started.md b/docs/api-inference/getting-started.md
similarity index 100%
rename from docs/api-inference/getting_started.md
rename to docs/api-inference/getting-started.md
diff --git a/docs/api-inference/rate_limits.md b/docs/api-inference/rate-limits.md
similarity index 100%
rename from docs/api-inference/rate_limits.md
rename to docs/api-inference/rate-limits.md
diff --git a/docs/api-inference/supported_models.md b/docs/api-inference/supported-models.md
similarity index 97%
rename from docs/api-inference/supported_models.md
rename to docs/api-inference/supported-models.md
index f3138699b..94ad3e9f4 100644
--- a/docs/api-inference/supported_models.md
+++ b/docs/api-inference/supported-models.md
@@ -12,7 +12,7 @@ TODO: add screenshot
 
 ## What do I get with a PRO subscription?
 
-In addition to thousands of public models available in the Hub, PRO and Enterprise users get higher [rate limits](./rate_limits) and free access to the following models:
+In addition to thousands of public models available in the Hub, PRO and Enterprise users get higher [rate limits](./rate-limits) and free access to the following models:
 
 
 | Model                          | Size                                                                                                                                                                                       | Context Length | Use                                                          |
diff --git a/docs/api-inference/tasks/audio_classification.md b/docs/api-inference/tasks/audio-classification.md
similarity index 99%
rename from docs/api-inference/tasks/audio_classification.md
rename to docs/api-inference/tasks/audio-classification.md
index 78aacebd3..099afedf6 100644
--- a/docs/api-inference/tasks/audio_classification.md
+++ b/docs/api-inference/tasks/audio-classification.md
@@ -2,7 +2,7 @@
 This markdown file has been generated from a script. Please do not edit it directly.
 For more details, check out:
 - the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
-- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/audio_classification.handlebars
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/audio-classification.handlebars
 - the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/audio-classification/spec/input.json
 - the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/audio-classification/spec/output.json
 - the snippets used to generate the example:
diff --git a/docs/api-inference/tasks/automatic_speech_recognition.md b/docs/api-inference/tasks/automatic-speech-recognition.md
similarity index 99%
rename from docs/api-inference/tasks/automatic_speech_recognition.md
rename to docs/api-inference/tasks/automatic-speech-recognition.md
index 1362e0ccc..88e98003a 100644
--- a/docs/api-inference/tasks/automatic_speech_recognition.md
+++ b/docs/api-inference/tasks/automatic-speech-recognition.md
@@ -2,7 +2,7 @@
 This markdown file has been generated from a script. Please do not edit it directly.
 For more details, check out:
 - the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
-- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/automatic_speech_recognition.handlebars
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/automatic-speech-recognition.handlebars
 - the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json
 - the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/automatic-speech-recognition/spec/output.json
 - the snippets used to generate the example:
diff --git a/docs/api-inference/tasks/chat_completion.md b/docs/api-inference/tasks/chat-completion.md
similarity index 99%
rename from docs/api-inference/tasks/chat_completion.md
rename to docs/api-inference/tasks/chat-completion.md
index 585e31e70..561081309 100644
--- a/docs/api-inference/tasks/chat_completion.md
+++ b/docs/api-inference/tasks/chat-completion.md
@@ -2,7 +2,7 @@
 This markdown file has been generated from a script. Please do not edit it directly.
 For more details, check out:
 - the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
-- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/chat_completion.handlebars
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/chat-completion.handlebars
 - the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/chat-completion/spec/input.json
 - the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/chat-completion/spec/output.json
 - the snippets used to generate the example:
diff --git a/docs/api-inference/tasks/feature_extraction.md b/docs/api-inference/tasks/feature-extraction.md
similarity index 99%
rename from docs/api-inference/tasks/feature_extraction.md
rename to docs/api-inference/tasks/feature-extraction.md
index 0d2f46dcf..2762418b8 100644
--- a/docs/api-inference/tasks/feature_extraction.md
+++ b/docs/api-inference/tasks/feature-extraction.md
@@ -2,7 +2,7 @@
 This markdown file has been generated from a script. Please do not edit it directly.
 For more details, check out:
 - the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
-- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/feature_extraction.handlebars
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/feature-extraction.handlebars
 - the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/feature-extraction/spec/input.json
 - the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/feature-extraction/spec/output.json
 - the snippets used to generate the example:
diff --git a/docs/api-inference/tasks/fill_mask.md b/docs/api-inference/tasks/fill-mask.md
similarity index 99%
rename from docs/api-inference/tasks/fill_mask.md
rename to docs/api-inference/tasks/fill-mask.md
index efe000c8f..d9bdacc2a 100644
--- a/docs/api-inference/tasks/fill_mask.md
+++ b/docs/api-inference/tasks/fill-mask.md
@@ -2,7 +2,7 @@
 This markdown file has been generated from a script. Please do not edit it directly.
 For more details, check out:
 - the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
-- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/fill_mask.handlebars
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/fill-mask.handlebars
 - the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/fill-mask/spec/input.json
 - the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/fill-mask/spec/output.json
 - the snippets used to generate the example:
diff --git a/docs/api-inference/tasks/image_classification.md b/docs/api-inference/tasks/image-classification.md
similarity index 99%
rename from docs/api-inference/tasks/image_classification.md
rename to docs/api-inference/tasks/image-classification.md
index 7060c35a8..3f418adc9 100644
--- a/docs/api-inference/tasks/image_classification.md
+++ b/docs/api-inference/tasks/image-classification.md
@@ -2,7 +2,7 @@
 This markdown file has been generated from a script. Please do not edit it directly.
 For more details, check out:
 - the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
-- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/image_classification.handlebars
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/image-classification.handlebars
 - the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/image-classification/spec/input.json
 - the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/image-classification/spec/output.json
 - the snippets used to generate the example:
diff --git a/docs/api-inference/tasks/image_segmentation.md b/docs/api-inference/tasks/image-segmentation.md
similarity index 99%
rename from docs/api-inference/tasks/image_segmentation.md
rename to docs/api-inference/tasks/image-segmentation.md
index c88e29655..610da6fa5 100644
--- a/docs/api-inference/tasks/image_segmentation.md
+++ b/docs/api-inference/tasks/image-segmentation.md
@@ -2,7 +2,7 @@
 This markdown file has been generated from a script. Please do not edit it directly.
 For more details, check out:
 - the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
-- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/image_segmentation.handlebars
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/image-segmentation.handlebars
 - the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/image-segmentation/spec/input.json
 - the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/image-segmentation/spec/output.json
 - the snippets used to generate the example:
diff --git a/docs/api-inference/tasks/image_to_image.md b/docs/api-inference/tasks/image-to-image.md
similarity index 99%
rename from docs/api-inference/tasks/image_to_image.md
rename to docs/api-inference/tasks/image-to-image.md
index c116548d2..078c6f0f3 100644
--- a/docs/api-inference/tasks/image_to_image.md
+++ b/docs/api-inference/tasks/image-to-image.md
@@ -2,7 +2,7 @@
 This markdown file has been generated from a script. Please do not edit it directly.
 For more details, check out:
 - the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
-- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/image_to_image.handlebars
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/image-to-image.handlebars
 - the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/image-to-image/spec/input.json
 - the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/image-to-image/spec/output.json
 - the snippets used to generate the example:
diff --git a/docs/api-inference/tasks/object_detection.md b/docs/api-inference/tasks/object-detection.md
similarity index 99%
rename from docs/api-inference/tasks/object_detection.md
rename to docs/api-inference/tasks/object-detection.md
index 76b9dbcc5..57cf86143 100644
--- a/docs/api-inference/tasks/object_detection.md
+++ b/docs/api-inference/tasks/object-detection.md
@@ -2,7 +2,7 @@
 This markdown file has been generated from a script. Please do not edit it directly.
 For more details, check out:
 - the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
-- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/object_detection.handlebars
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/object-detection.handlebars
 - the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/object-detection/spec/input.json
 - the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/object-detection/spec/output.json
 - the snippets used to generate the example:
diff --git a/docs/api-inference/tasks/question_answering.md b/docs/api-inference/tasks/question-answering.md
similarity index 99%
rename from docs/api-inference/tasks/question_answering.md
rename to docs/api-inference/tasks/question-answering.md
index ff7e471e4..5fc2b9766 100644
--- a/docs/api-inference/tasks/question_answering.md
+++ b/docs/api-inference/tasks/question-answering.md
@@ -2,7 +2,7 @@
 This markdown file has been generated from a script. Please do not edit it directly.
 For more details, check out:
 - the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
-- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/question_answering.handlebars
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/question-answering.handlebars
 - the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/question-answering/spec/input.json
 - the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/question-answering/spec/output.json
 - the snippets used to generate the example:
diff --git a/docs/api-inference/tasks/table_question_answering.md b/docs/api-inference/tasks/table-question-answering.md
similarity index 99%
rename from docs/api-inference/tasks/table_question_answering.md
rename to docs/api-inference/tasks/table-question-answering.md
index f33677312..d1d115f3b 100644
--- a/docs/api-inference/tasks/table_question_answering.md
+++ b/docs/api-inference/tasks/table-question-answering.md
@@ -2,7 +2,7 @@
 This markdown file has been generated from a script. Please do not edit it directly.
 For more details, check out:
 - the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
-- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/table_question_answering.handlebars
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/table-question-answering.handlebars
 - the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/table-question-answering/spec/input.json
 - the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/table-question-answering/spec/output.json
 - the snippets used to generate the example:
diff --git a/docs/api-inference/tasks/text_classification.md b/docs/api-inference/tasks/text-classification.md
similarity index 99%
rename from docs/api-inference/tasks/text_classification.md
rename to docs/api-inference/tasks/text-classification.md
index 8d4a8a8e1..7f99c5cfd 100644
--- a/docs/api-inference/tasks/text_classification.md
+++ b/docs/api-inference/tasks/text-classification.md
@@ -2,7 +2,7 @@
 This markdown file has been generated from a script. Please do not edit it directly.
 For more details, check out:
 - the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
-- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/text_classification.handlebars
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/text-classification.handlebars
 - the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/text-classification/spec/input.json
 - the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/text-classification/spec/output.json
 - the snippets used to generate the example:
diff --git a/docs/api-inference/tasks/text_generation.md b/docs/api-inference/tasks/text-generation.md
similarity index 99%
rename from docs/api-inference/tasks/text_generation.md
rename to docs/api-inference/tasks/text-generation.md
index 2b75a6222..eca329d91 100644
--- a/docs/api-inference/tasks/text_generation.md
+++ b/docs/api-inference/tasks/text-generation.md
@@ -2,7 +2,7 @@
 This markdown file has been generated from a script. Please do not edit it directly.
 For more details, check out:
 - the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
-- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/text_generation.handlebars
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/text-generation.handlebars
 - the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/text-generation/spec/input.json
 - the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/text-generation/spec/output.json
 - the snippets used to generate the example:
diff --git a/docs/api-inference/tasks/text_to_image.md b/docs/api-inference/tasks/text-to-image.md
similarity index 91%
rename from docs/api-inference/tasks/text_to_image.md
rename to docs/api-inference/tasks/text-to-image.md
index 29783810a..0ac92293c 100644
--- a/docs/api-inference/tasks/text_to_image.md
+++ b/docs/api-inference/tasks/text-to-image.md
@@ -2,7 +2,7 @@
 This markdown file has been generated from a script. Please do not edit it directly.
 For more details, check out:
 - the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
-- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/text_to_image.handlebars
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/text-to-image.handlebars
 - the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/text-to-image/spec/input.json
 - the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/text-to-image/spec/output.json
 - the snippets used to generate the example:
@@ -38,13 +38,14 @@ This is only a subset of the supported models. Find the model that suits you bes
 | :--- | :--- | :--- |
 | **inputs*** | _string_ | The input text data (sometimes called "prompt") |
 | **parameters** | _object_ | Additional inference parameters for Text To Image |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;guidance_scale** | _number_ | For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;guidance_scale** | _number_ | A higher guidance scale value encourages the model to generate images closely linked to the text prompt, but values too high may cause saturation and other artifacts. |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;negative_prompt** | _string[]_ | One or several prompt to guide what NOT to include in image generation. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_inference_steps** | _integer_ | For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_inference_steps** | _integer_ | The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;target_size** | _object_ | The size in pixel of the output image |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;width*** | _integer_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;height*** | _integer_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;scheduler** | _string_ | For diffusion models. Override the scheduler with a compatible one |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;scheduler** | _string_ | Override the scheduler with a compatible one. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;seed** | _integer_ | Seed for the random number generator. |
 
 
 Some options can be configured by passing headers to the Inference API. Here are the available headers:
diff --git a/docs/api-inference/tasks/token_classification.md b/docs/api-inference/tasks/token-classification.md
similarity index 99%
rename from docs/api-inference/tasks/token_classification.md
rename to docs/api-inference/tasks/token-classification.md
index 1113cf39e..888ef8093 100644
--- a/docs/api-inference/tasks/token_classification.md
+++ b/docs/api-inference/tasks/token-classification.md
@@ -2,7 +2,7 @@
 This markdown file has been generated from a script. Please do not edit it directly.
 For more details, check out:
 - the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
-- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/token_classification.handlebars
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/token-classification.handlebars
 - the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/token-classification/spec/input.json
 - the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/token-classification/spec/output.json
 - the snippets used to generate the example:
diff --git a/docs/api-inference/tasks/zero_shot_classification.md b/docs/api-inference/tasks/zero-shot-classification.md
similarity index 99%
rename from docs/api-inference/tasks/zero_shot_classification.md
rename to docs/api-inference/tasks/zero-shot-classification.md
index 0c2f4dfa9..ab3404d62 100644
--- a/docs/api-inference/tasks/zero_shot_classification.md
+++ b/docs/api-inference/tasks/zero-shot-classification.md
@@ -2,7 +2,7 @@
 This markdown file has been generated from a script. Please do not edit it directly.
 For more details, check out:
 - the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
-- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/zero_shot_classification.handlebars
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/zero-shot-classification.handlebars
 - the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/zero-shot-classification/spec/input.json
 - the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/zero-shot-classification/spec/output.json
 - the snippets used to generate the example:
diff --git a/scripts/api-inference/scripts/generate.ts b/scripts/api-inference/scripts/generate.ts
index 442a8ab7f..286594bbb 100644
--- a/scripts/api-inference/scripts/generate.ts
+++ b/scripts/api-inference/scripts/generate.ts
@@ -65,21 +65,19 @@ function readTemplate(
   templateName: string,
   namespace: string,
 ): Promise<string> {
-  const templateNameSnakeCase = templateName.replace(/-/g, "_");
   const templatePath = path.join(
     TEMPLATE_DIR,
     namespace,
-    `${templateNameSnakeCase}.handlebars`,
+    `${templateName}.handlebars`,
   );
-  console.log(`   🔍 Reading ${templateNameSnakeCase}.handlebars`);
+  console.log(`   🔍 Reading ${templateName}.handlebars`);
   return fs.readFile(templatePath, { encoding: "utf-8" });
 }
 
 function writeTaskDoc(templateName: string, content: string): Promise<void> {
-  const templateNameSnakeCase = templateName.replace(/-/g, "_");
-  const taskDocPath = path.join(TASKS_DOCS_DIR, `${templateNameSnakeCase}.md`);
+  const taskDocPath = path.join(TASKS_DOCS_DIR, `${templateName}.md`);
   console.log(`   💾 Saving to ${taskDocPath}`);
-  const header = PAGE_HEADER({task:templateName, taskSnakeCase: templateNameSnakeCase});
+  const header = PAGE_HEADER({task:templateName});
   const contentWithHeader = `<!---\n${header}\n--->\n\n${content}`;
   return fs
     .mkdir(TASKS_DOCS_DIR, { recursive: true })
diff --git a/scripts/api-inference/templates/common/page_header.handlebars b/scripts/api-inference/templates/common/page-header.handlebars
similarity index 96%
rename from scripts/api-inference/templates/common/page_header.handlebars
rename to scripts/api-inference/templates/common/page-header.handlebars
index f93c13c43..54aa6c861 100644
--- a/scripts/api-inference/templates/common/page_header.handlebars
+++ b/scripts/api-inference/templates/common/page-header.handlebars
@@ -1,7 +1,7 @@
 This markdown file has been generated from a script. Please do not edit it directly.
 For more details, check out:
 - the `generate.ts` script: https://github.com/huggingface/hub-docs/blob/main/scripts/api-inference/scripts/generate.ts
-- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/{{taskSnakeCase}}.handlebars
+- the task template defining the sections in the page: https://github.com/huggingface/hub-docs/tree/main/scripts/api-inference/templates/task/{{task}}.handlebars
 - the input jsonschema specifications used to generate the input markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/{{task}}/spec/input.json
 - the output jsonschema specifications used to generate the output markdown table: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/tasks/{{task}}/spec/output.json
 - the snippets used to generate the example:
diff --git a/scripts/api-inference/templates/common/snippets_template.handlebars b/scripts/api-inference/templates/common/snippets-template.handlebars
similarity index 100%
rename from scripts/api-inference/templates/common/snippets_template.handlebars
rename to scripts/api-inference/templates/common/snippets-template.handlebars
diff --git a/scripts/api-inference/templates/common/specs_headers.handlebars b/scripts/api-inference/templates/common/specs-headers.handlebars
similarity index 100%
rename from scripts/api-inference/templates/common/specs_headers.handlebars
rename to scripts/api-inference/templates/common/specs-headers.handlebars
diff --git a/scripts/api-inference/templates/common/specs_output.handlebars b/scripts/api-inference/templates/common/specs-output.handlebars
similarity index 100%
rename from scripts/api-inference/templates/common/specs_output.handlebars
rename to scripts/api-inference/templates/common/specs-output.handlebars
diff --git a/scripts/api-inference/templates/common/specs_payload.handlebars b/scripts/api-inference/templates/common/specs-payload.handlebars
similarity index 100%
rename from scripts/api-inference/templates/common/specs_payload.handlebars
rename to scripts/api-inference/templates/common/specs-payload.handlebars
diff --git a/scripts/api-inference/templates/task/audio_classification.handlebars b/scripts/api-inference/templates/task/audio-classification.handlebars
similarity index 100%
rename from scripts/api-inference/templates/task/audio_classification.handlebars
rename to scripts/api-inference/templates/task/audio-classification.handlebars
diff --git a/scripts/api-inference/templates/task/automatic_speech_recognition.handlebars b/scripts/api-inference/templates/task/automatic-speech-recognition.handlebars
similarity index 100%
rename from scripts/api-inference/templates/task/automatic_speech_recognition.handlebars
rename to scripts/api-inference/templates/task/automatic-speech-recognition.handlebars
diff --git a/scripts/api-inference/templates/task/chat_completion.handlebars b/scripts/api-inference/templates/task/chat-completion.handlebars
similarity index 100%
rename from scripts/api-inference/templates/task/chat_completion.handlebars
rename to scripts/api-inference/templates/task/chat-completion.handlebars
diff --git a/scripts/api-inference/templates/task/feature_extraction.handlebars b/scripts/api-inference/templates/task/feature-extraction.handlebars
similarity index 100%
rename from scripts/api-inference/templates/task/feature_extraction.handlebars
rename to scripts/api-inference/templates/task/feature-extraction.handlebars
diff --git a/scripts/api-inference/templates/task/fill_mask.handlebars b/scripts/api-inference/templates/task/fill-mask.handlebars
similarity index 100%
rename from scripts/api-inference/templates/task/fill_mask.handlebars
rename to scripts/api-inference/templates/task/fill-mask.handlebars
diff --git a/scripts/api-inference/templates/task/image_classification.handlebars b/scripts/api-inference/templates/task/image-classification.handlebars
similarity index 100%
rename from scripts/api-inference/templates/task/image_classification.handlebars
rename to scripts/api-inference/templates/task/image-classification.handlebars
diff --git a/scripts/api-inference/templates/task/image_segmentation.handlebars b/scripts/api-inference/templates/task/image-segmentation.handlebars
similarity index 100%
rename from scripts/api-inference/templates/task/image_segmentation.handlebars
rename to scripts/api-inference/templates/task/image-segmentation.handlebars
diff --git a/scripts/api-inference/templates/task/image_to_image.handlebars b/scripts/api-inference/templates/task/image-to-image.handlebars
similarity index 100%
rename from scripts/api-inference/templates/task/image_to_image.handlebars
rename to scripts/api-inference/templates/task/image-to-image.handlebars
diff --git a/scripts/api-inference/templates/task/object_detection.handlebars b/scripts/api-inference/templates/task/object-detection.handlebars
similarity index 100%
rename from scripts/api-inference/templates/task/object_detection.handlebars
rename to scripts/api-inference/templates/task/object-detection.handlebars
diff --git a/scripts/api-inference/templates/task/question_answering.handlebars b/scripts/api-inference/templates/task/question-answering.handlebars
similarity index 100%
rename from scripts/api-inference/templates/task/question_answering.handlebars
rename to scripts/api-inference/templates/task/question-answering.handlebars
diff --git a/scripts/api-inference/templates/task/table_question_answering.handlebars b/scripts/api-inference/templates/task/table-question-answering.handlebars
similarity index 100%
rename from scripts/api-inference/templates/task/table_question_answering.handlebars
rename to scripts/api-inference/templates/task/table-question-answering.handlebars
diff --git a/scripts/api-inference/templates/task/text_classification.handlebars b/scripts/api-inference/templates/task/text-classification.handlebars
similarity index 100%
rename from scripts/api-inference/templates/task/text_classification.handlebars
rename to scripts/api-inference/templates/task/text-classification.handlebars
diff --git a/scripts/api-inference/templates/task/text_generation.handlebars b/scripts/api-inference/templates/task/text-generation.handlebars
similarity index 100%
rename from scripts/api-inference/templates/task/text_generation.handlebars
rename to scripts/api-inference/templates/task/text-generation.handlebars
diff --git a/scripts/api-inference/templates/task/text_to_image.handlebars b/scripts/api-inference/templates/task/text-to-image.handlebars
similarity index 100%
rename from scripts/api-inference/templates/task/text_to_image.handlebars
rename to scripts/api-inference/templates/task/text-to-image.handlebars
diff --git a/scripts/api-inference/templates/task/token_classification.handlebars b/scripts/api-inference/templates/task/token-classification.handlebars
similarity index 100%
rename from scripts/api-inference/templates/task/token_classification.handlebars
rename to scripts/api-inference/templates/task/token-classification.handlebars
diff --git a/scripts/api-inference/templates/task/zero_shot_classification.handlebars b/scripts/api-inference/templates/task/zero-shot-classification.handlebars
similarity index 100%
rename from scripts/api-inference/templates/task/zero_shot_classification.handlebars
rename to scripts/api-inference/templates/task/zero-shot-classification.handlebars

From 2b6f0511fb7d559196d5d1f7201ebe8e4ed3998b Mon Sep 17 00:00:00 2001
From: Lucain <lucain@huggingface.co>
Date: Thu, 5 Sep 2024 10:35:21 +0200
Subject: [PATCH 36/38] Update docs/api-inference/index.md
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Victor Muštar <victor.mustar@gmail.com>
---
 docs/api-inference/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/api-inference/index.md b/docs/api-inference/index.md
index 8b67979e3..cf73a33f3 100644
--- a/docs/api-inference/index.md
+++ b/docs/api-inference/index.md
@@ -1,6 +1,6 @@
 # Serverless Inference API
 
-**Instant Access to 800,000+ ML Models for Fast Prototyping**
+**Instant Access to thousands of ML Models for Fast Prototyping**
 
 Explore the most popular models for text, image, speech, and more — all with a simple API request. Build, test, and experiment without worrying about infrastructure or setup.
 

From 92baadc2eefa36b9a1c68e797e818441e922636e Mon Sep 17 00:00:00 2001
From: Lucain <lucain@huggingface.co>
Date: Thu, 5 Sep 2024 10:46:42 +0200
Subject: [PATCH 37/38] Apply feedback for "new_api_docs" (#1408)

* Update getting started examples

* Move snippets above specification

* custom link for finegrained token
---
 docs/api-inference/getting-started.md         |  67 +++++++--
 .../tasks/audio-classification.md             |  62 ++++-----
 .../tasks/automatic-speech-recognition.md     |  96 ++++++-------
 docs/api-inference/tasks/chat-completion.md   | 127 ++++++++---------
 .../api-inference/tasks/feature-extraction.md |  61 ++++----
 docs/api-inference/tasks/fill-mask.md         |  66 ++++-----
 .../tasks/image-classification.md             |  63 ++++-----
 .../api-inference/tasks/image-segmentation.md |  76 +++++-----
 docs/api-inference/tasks/image-to-image.md    |  13 +-
 docs/api-inference/tasks/object-detection.md  |  70 +++++-----
 .../api-inference/tasks/question-answering.md |  80 +++++------
 docs/api-inference/tasks/summarization.md     |  60 ++++----
 .../tasks/table-question-answering.md         |  66 ++++-----
 .../tasks/text-classification.md              |  62 ++++-----
 docs/api-inference/tasks/text-generation.md   | 130 +++++++++---------
 docs/api-inference/tasks/text-to-image.md     |  71 +++++-----
 .../tasks/token-classification.md             |  97 ++++++-------
 docs/api-inference/tasks/translation.md       |  64 ++++-----
 .../tasks/zero-shot-classification.md         |  66 ++++-----
 .../task/audio-classification.handlebars      |   8 +-
 .../automatic-speech-recognition.handlebars   |   8 +-
 .../templates/task/chat-completion.handlebars |   7 +-
 .../task/feature-extraction.handlebars        |   7 +-
 .../templates/task/fill-mask.handlebars       |   8 +-
 .../task/image-classification.handlebars      |   7 +-
 .../task/image-segmentation.handlebars        |   7 +-
 .../templates/task/image-to-image.handlebars  |   7 +-
 .../task/object-detection.handlebars          |   8 +-
 .../task/question-answering.handlebars        |   8 +-
 .../templates/task/summarization.handlebars   |   8 +-
 .../task/table-question-answering.handlebars  |   8 +-
 .../task/text-classification.handlebars       |   8 +-
 .../templates/task/text-generation.handlebars |   8 +-
 .../templates/task/text-to-image.handlebars   |   8 +-
 .../task/token-classification.handlebars      |   7 +-
 .../templates/task/translation.handlebars     |   8 +-
 .../task/zero-shot-classification.handlebars  |   8 +-
 37 files changed, 793 insertions(+), 742 deletions(-)

diff --git a/docs/api-inference/getting-started.md b/docs/api-inference/getting-started.md
index c0647b935..31a77e4b3 100644
--- a/docs/api-inference/getting-started.md
+++ b/docs/api-inference/getting-started.md
@@ -6,7 +6,7 @@ We'll do a minimal example using a [sentiment classification model](https://hugg
 
 ## Getting a Token
 
-Using the Serverless Inference API requires passing a user token in the request headers. You can get a token by signing up on the Hugging Face website and then going to the [tokens page](https://huggingface.co/settings/tokens). We recommend creating a `Fine-grained` token with the scope to `Make calls to the serverless Inference API`.
+Using the Serverless Inference API requires passing a user token in the request headers. You can get a token by signing up on the Hugging Face website and then going to the [tokens page](https://huggingface.co/settings/tokens/new?globalPermissions=inference.serverless.write&tokenType=fineGrained). We recommend creating a `Fine-grained` token with the scope to `Make calls to the serverless Inference API`.
 
 TODO: add screenshot
 For more details about user tokens, check out [this guide](https://huggingface.co/docs/hub/en/security-tokens).
@@ -14,11 +14,15 @@ For more details about user tokens, check out [this guide](https://huggingface.c
 ## cURL
 
 ```bash
-curl https://api-inference.huggingface.co/models/cardiffnlp/twitter-roberta-base-sentiment-latest \
-    -X POST \
-    -d '{"inputs": "Today is a nice day"}' \
-    -H "Authorization: Bearer hf_***" \
-    -H "Content-Type: application/json"
+curl 'https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3.1-8B-Instruct/v1/chat/completions' \
+-H "Authorization: Bearer hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \
+-H 'Content-Type: application/json' \
+-d '{
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "messages": [{"role": "user", "content": "What is the capital of France?"}],
+    "max_tokens": 500,
+    "stream": false
+}'
 ```
 
 ## Python
@@ -28,21 +32,35 @@ You can use the `requests` library to make a request to the Inference API.
 ```python
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/cardiffnlp/twitter-roberta-base-sentiment-latest"
-headers = {"Authorization": "Bearer hf_***"}
+API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3.1-8B-Instruct/v1/chat/completions"
+headers = {"Authorization": "Bearer hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
+payload = {
+    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "messages": [{"role": "user", "content": "What is the capital of France?"}],
+    "max_tokens": 500,
+    "stream": False
+}
 
-payload = {"inputs": "Today is a nice day"}
 response = requests.post(API_URL, headers=headers, json=payload)
 response.json()
 ```
 
-Hugging Face also provides a [`InferenceClient`](https://huggingface.co/docs/huggingface_hub/guides/inference) that handles inference, caching, async, and more. Make sure to install it with `pip install huggingface_hub` first
+Hugging Face also provides a [`InferenceClient`](https://huggingface.co/docs/huggingface_hub/guides/inference) that handles inference for you. Make sure to install it with `pip install huggingface_hub` first.
 
 ```python
 from huggingface_hub import InferenceClient
 
-client = InferenceClient(model="cardiffnlp/twitter-roberta-base-sentiment-latest", token="hf_***")
-client.text_classification("Today is a nice day")
+client = InferenceClient(
+    "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    token="hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
+)
+
+for message in client.chat_completion(
+	messages=[{"role": "user", "content": "What is the capital of France?"}],
+	max_tokens=500,
+	stream=True,
+):
+    print(message.choices[0].delta.content, end="")
 ```
 
 ## JavaScript
@@ -52,11 +70,11 @@ import fetch from "node-fetch";
 
 async function query(data) {
     const response = await fetch(
-        "https://api-inference.huggingface.co/models/MODEL_ID",
+        "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3.1-8B-Instruct/v1/chat/completions",
         {
             method: "POST",
             headers: {
-                Authorization: `Bearer cardiffnlp/twitter-roberta-base-sentiment-latest`,
+                Authorization: `Bearer hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`,
                 "Content-Type": "application/json",
             },
             body: JSON.stringify(data),
@@ -67,12 +85,31 @@ async function query(data) {
 }
 
 query({
-    inputs: "Today is a nice day"
+	"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+	"messages": [{"role": "user", "content": "What is the capital of France?"}],
+	"max_tokens": 500,
+	"stream": false
 }).then((response) => {
     console.log(JSON.stringify(response, null, 2));
 });
 ```
 
+Hugging Face also provides a [`HfInference`](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference) client that handles inference. Make sure to install it with `npm install @huggingface/inference` first.
+
+```js
+import { HfInference } from "@huggingface/inference";
+
+const inference = new HfInference("hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
+
+for await (const chunk of inference.chatCompletionStream({
+	model: "meta-llama/Meta-Llama-3.1-8B-Instruct",
+	messages: [{ role: "user", content: "What is the capital of France?" }],
+	max_tokens: 500,
+})) {
+	process.stdout.write(chunk.choices[0]?.delta?.content || "");
+}
+```
+
 ## Next Steps
 
 Now that you know the basics, you can explore the [API Reference](./parameters.md) to learn more about task-specific settings and parameters. 
\ No newline at end of file
diff --git a/docs/api-inference/tasks/audio-classification.md b/docs/api-inference/tasks/audio-classification.md
index 099afedf6..2819beed8 100644
--- a/docs/api-inference/tasks/audio-classification.md
+++ b/docs/api-inference/tasks/audio-classification.md
@@ -27,37 +27,6 @@ For more details about the `audio-classification` task, check out its [dedicated
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=audio-classification&sort=trending).
 
-### API specification
-
-#### Request
-
-| Payload |  |  |
-| :--- | :--- | :--- |
-| **inputs*** | _string_ | The input audio data as a base64-encoded string. If no `parameters` are provided, you can also provide the audio data as a raw bytes payload. |
-| **parameters** | _object_ | Additional inference parameters for Audio Classification |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;function_to_apply** | _enum_ | Possible values: sigmoid, softmax, none. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ | When specified, limits the output to the top K most probable classes. |
-
-
-Some options can be configured by passing headers to the Inference API. Here are the available headers:
-
-| Headers |   |    |
-| :--- | :--- | :--- |
-| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
-| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
-| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
-
-For more information about Inference API headers, check out the parameters [guide](../parameters).
-
-#### Response
-
-| Body |  |
-| :--- | :--- | :--- |
-| **(array)** | _object[]_ | Output is an array of objects. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The predicted class label. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The corresponding probability. |
-
-
 ### Using the API
 
 
@@ -122,3 +91,34 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 </inferencesnippet>
 
 
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The input audio data as a base64-encoded string. If no `parameters` are provided, you can also provide the audio data as a raw bytes payload. |
+| **parameters** | _object_ | Additional inference parameters for Audio Classification |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;function_to_apply** | _enum_ | Possible values: sigmoid, softmax, none. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ | When specified, limits the output to the top K most probable classes. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | Output is an array of objects. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The predicted class label. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The corresponding probability. |
+
diff --git a/docs/api-inference/tasks/automatic-speech-recognition.md b/docs/api-inference/tasks/automatic-speech-recognition.md
index 88e98003a..81a456f00 100644
--- a/docs/api-inference/tasks/automatic-speech-recognition.md
+++ b/docs/api-inference/tasks/automatic-speech-recognition.md
@@ -28,54 +28,6 @@ For more details about the `automatic-speech-recognition` task, check out its [d
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=automatic-speech-recognition&sort=trending).
 
-### API specification
-
-#### Request
-
-| Payload |  |  |
-| :--- | :--- | :--- |
-| **inputs*** | _string_ | The input audio data as a base64-encoded string. If no `parameters` are provided, you can also provide the audio data as a raw bytes payload. |
-| **parameters** | _object_ | Additional inference parameters for Automatic Speech Recognition |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;return_timestamps** | _boolean_ | Whether to output corresponding timestamps with the generated text |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;generate** | _object_ | Ad-hoc parametrization of the text generation process |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;temperature** | _number_ | The value used to modulate the next token probabilities. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ | The number of highest probability vocabulary tokens to keep for top-k-filtering. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_p** | _number_ | If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;typical_p** | _number_ |  Local typicality measures how similar the conditional probability of predicting a target token next is to the expected conditional probability of predicting a random token next, given the partial text already generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that add up to typical_p or higher are kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;epsilon_cutoff** | _number_ | If set to float strictly between 0 and 1, only tokens with a conditional probability greater than epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;eta_cutoff** | _number_ | Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;max_length** | _integer_ | The maximum length (in tokens) of the generated text, including the input. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;max_new_tokens** | _integer_ | The maximum number of tokens to generate. Takes precedence over maxLength. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;min_length** | _integer_ | The minimum length (in tokens) of the generated text, including the input. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;min_new_tokens** | _integer_ | The minimum number of tokens to generate. Takes precedence over maxLength. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;do_sample** | _boolean_ | Whether to use sampling instead of greedy decoding when generating new tokens. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;early_stopping** | _enum_ | Possible values: never, true, false. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_beams** | _integer_ | Number of beams to use for beam search. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_beam_groups** | _integer_ | Number of groups to divide num_beams into in order to ensure diversity among different groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;penalty_alpha** | _number_ | The value balances the model confidence and the degeneration penalty in contrastive search decoding. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;use_cache** | _boolean_ | Whether the model should use the past last key/values attentions to speed up decoding |
-
-
-Some options can be configured by passing headers to the Inference API. Here are the available headers:
-
-| Headers |   |    |
-| :--- | :--- | :--- |
-| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
-| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
-| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
-
-For more information about Inference API headers, check out the parameters [guide](../parameters).
-
-#### Response
-
-| Body |  |
-| :--- | :--- | :--- |
-| **text** | _string_ | The recognized text. |
-| **chunks** | _object[]_ | When returnTimestamps is enabled, chunks contains a list of audio chunks identified by the model. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;text** | _string_ | A chunk of text identified by the model |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;timestamps** | _number[]_ | The start and end timestamps corresponding with the text |
-
-
 ### Using the API
 
 
@@ -140,3 +92,51 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 </inferencesnippet>
 
 
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The input audio data as a base64-encoded string. If no `parameters` are provided, you can also provide the audio data as a raw bytes payload. |
+| **parameters** | _object_ | Additional inference parameters for Automatic Speech Recognition |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;return_timestamps** | _boolean_ | Whether to output corresponding timestamps with the generated text |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;generate** | _object_ | Ad-hoc parametrization of the text generation process |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;temperature** | _number_ | The value used to modulate the next token probabilities. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ | The number of highest probability vocabulary tokens to keep for top-k-filtering. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_p** | _number_ | If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;typical_p** | _number_ |  Local typicality measures how similar the conditional probability of predicting a target token next is to the expected conditional probability of predicting a random token next, given the partial text already generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that add up to typical_p or higher are kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;epsilon_cutoff** | _number_ | If set to float strictly between 0 and 1, only tokens with a conditional probability greater than epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;eta_cutoff** | _number_ | Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;max_length** | _integer_ | The maximum length (in tokens) of the generated text, including the input. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;max_new_tokens** | _integer_ | The maximum number of tokens to generate. Takes precedence over maxLength. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;min_length** | _integer_ | The minimum length (in tokens) of the generated text, including the input. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;min_new_tokens** | _integer_ | The minimum number of tokens to generate. Takes precedence over maxLength. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;do_sample** | _boolean_ | Whether to use sampling instead of greedy decoding when generating new tokens. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;early_stopping** | _enum_ | Possible values: never, true, false. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_beams** | _integer_ | Number of beams to use for beam search. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_beam_groups** | _integer_ | Number of groups to divide num_beams into in order to ensure diversity among different groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;penalty_alpha** | _number_ | The value balances the model confidence and the degeneration penalty in contrastive search decoding. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;use_cache** | _boolean_ | Whether the model should use the past last key/values attentions to speed up decoding |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **text** | _string_ | The recognized text. |
+| **chunks** | _object[]_ | When returnTimestamps is enabled, chunks contains a list of audio chunks identified by the model. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;text** | _string_ | A chunk of text identified by the model |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;timestamps** | _number[]_ | The start and end timestamps corresponding with the text |
+
diff --git a/docs/api-inference/tasks/chat-completion.md b/docs/api-inference/tasks/chat-completion.md
index 561081309..213cf5cae 100644
--- a/docs/api-inference/tasks/chat-completion.md
+++ b/docs/api-inference/tasks/chat-completion.md
@@ -29,6 +29,70 @@ This is a subtask of [`text-generation`](./text_generation) designed to generate
 
 
 
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl 'https://api-inference.huggingface.co/models/google/gemma-2-2b-it/v1/chat/completions' \
+-H "Authorization: Bearer hf_***" \
+-H 'Content-Type: application/json' \
+-d '{
+	"model": "google/gemma-2-2b-it",
+	"messages": [{"role": "user", "content": "What is the capital of France?"}],
+	"max_tokens": 500,
+	"stream": false
+}'
+
+```
+</curl>
+
+<python>
+```py
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    "google/gemma-2-2b-it",
+    token="hf_***",
+)
+
+for message in client.chat_completion(
+	messages=[{"role": "user", "content": "What is the capital of France?"}],
+	max_tokens=500,
+	stream=True,
+):
+    print(message.choices[0].delta.content, end="")
+
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion).
+</python>
+
+<js>
+```js
+import { HfInference } from "@huggingface/inference";
+
+const inference = new HfInference("hf_***");
+
+for await (const chunk of inference.chatCompletionStream({
+	model: "google/gemma-2-2b-it",
+	messages: [{ role: "user", content: "What is the capital of France?" }],
+	max_tokens: 500,
+})) {
+	process.stdout.write(chunk.choices[0]?.delta?.content || "");
+}
+
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#chatcompletion).
+</js>
+
+</inferencesnippet>
+
+
+
 ### API specification
 
 #### Request
@@ -150,66 +214,3 @@ For more information about streaming, check out [this guide](https://huggingface
 | **system_fingerprint** | _string_ |  |
 
 
-### Using the API
-
-
-<inferencesnippet>
-
-<curl>
-```bash
-curl 'https://api-inference.huggingface.co/models/google/gemma-2-2b-it/v1/chat/completions' \
--H "Authorization: Bearer hf_***" \
--H 'Content-Type: application/json' \
--d '{
-	"model": "google/gemma-2-2b-it",
-	"messages": [{"role": "user", "content": "What is the capital of France?"}],
-	"max_tokens": 500,
-	"stream": false
-}'
-
-```
-</curl>
-
-<python>
-```py
-from huggingface_hub import InferenceClient
-
-client = InferenceClient(
-    "google/gemma-2-2b-it",
-    token="hf_***",
-)
-
-for message in client.chat_completion(
-	messages=[{"role": "user", "content": "What is the capital of France?"}],
-	max_tokens=500,
-	stream=True,
-):
-    print(message.choices[0].delta.content, end="")
-
-```
-
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion).
-</python>
-
-<js>
-```js
-import { HfInference } from "@huggingface/inference";
-
-const inference = new HfInference("hf_***");
-
-for await (const chunk of inference.chatCompletionStream({
-	model: "google/gemma-2-2b-it",
-	messages: [{ role: "user", content: "What is the capital of France?" }],
-	max_tokens: 500,
-})) {
-	process.stdout.write(chunk.choices[0]?.delta?.content || "");
-}
-
-```
-
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#chatcompletion).
-</js>
-
-</inferencesnippet>
-
-
diff --git a/docs/api-inference/tasks/feature-extraction.md b/docs/api-inference/tasks/feature-extraction.md
index 2762418b8..5c37b4e9c 100644
--- a/docs/api-inference/tasks/feature-extraction.md
+++ b/docs/api-inference/tasks/feature-extraction.md
@@ -29,36 +29,6 @@ For more details about the `feature-extraction` task, check out its [dedicated p
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=feature-extraction&sort=trending).
 
-### API specification
-
-#### Request
-
-| Payload |  |  |
-| :--- | :--- | :--- |
-| **inputs*** | _string_ | The text to embed. |
-| **normalize** | _boolean_ |  |
-| **prompt_name** | _string_ | The name of the prompt that should be used by for encoding. If not set, no prompt will be applied.  Must be a key in the `Sentence Transformers` configuration `prompts` dictionary.  For example if ``prompt_name`` is "query" and the ``prompts`` is {"query": "query: ", ...}, then the sentence "What is the capital of France?" will be encoded as "query: What is the capital of France?" because the prompt text will be prepended before any text to encode. |
-| **truncate** | _boolean_ |  |
-| **truncation_direction** | _enum_ | Possible values: Left, Right. |
-
-
-Some options can be configured by passing headers to the Inference API. Here are the available headers:
-
-| Headers |   |    |
-| :--- | :--- | :--- |
-| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
-| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
-| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
-
-For more information about Inference API headers, check out the parameters [guide](../parameters).
-
-#### Response
-
-| Body |  |
-| :--- | :--- | :--- |
-| **(array)** | _array[]_ | Output is an array of arrays. |
-
-
 ### Using the API
 
 
@@ -123,3 +93,34 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 </inferencesnippet>
 
 
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The text to embed. |
+| **normalize** | _boolean_ |  |
+| **prompt_name** | _string_ | The name of the prompt that should be used by for encoding. If not set, no prompt will be applied.  Must be a key in the `Sentence Transformers` configuration `prompts` dictionary.  For example if ``prompt_name`` is "query" and the ``prompts`` is {"query": "query: ", ...}, then the sentence "What is the capital of France?" will be encoded as "query: What is the capital of France?" because the prompt text will be prepended before any text to encode. |
+| **truncate** | _boolean_ |  |
+| **truncation_direction** | _enum_ | Possible values: Left, Right. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _array[]_ | Output is an array of arrays. |
+
+
diff --git a/docs/api-inference/tasks/fill-mask.md b/docs/api-inference/tasks/fill-mask.md
index d9bdacc2a..54b93832e 100644
--- a/docs/api-inference/tasks/fill-mask.md
+++ b/docs/api-inference/tasks/fill-mask.md
@@ -29,39 +29,6 @@ For more details about the `fill-mask` task, check out its [dedicated page](http
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=fill-mask&sort=trending).
 
-### API specification
-
-#### Request
-
-| Payload |  |  |
-| :--- | :--- | :--- |
-| **inputs*** | _string_ | The text with masked tokens |
-| **parameters** | _object_ | Additional inference parameters for Fill Mask |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ | When passed, overrides the number of predictions to return. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;targets** | _string[]_ | When passed, the model will limit the scores to the passed targets instead of looking up in the whole vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first resulting token will be used (with a warning, and that might be slower). |
-
-
-Some options can be configured by passing headers to the Inference API. Here are the available headers:
-
-| Headers |   |    |
-| :--- | :--- | :--- |
-| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
-| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
-| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
-
-For more information about Inference API headers, check out the parameters [guide](../parameters).
-
-#### Response
-
-| Body |  |
-| :--- | :--- | :--- |
-| **(array)** | _object[]_ | Output is an array of objects. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;sequence** | _string_ | The corresponding input with the mask token prediction. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The corresponding probability |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;token** | _integer_ | The predicted token id (to replace the masked one). |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;token_str** | _string_ | The predicted token (to replace the masked one). |
-
-
 ### Using the API
 
 
@@ -126,3 +93,36 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 </inferencesnippet>
 
 
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The text with masked tokens |
+| **parameters** | _object_ | Additional inference parameters for Fill Mask |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ | When passed, overrides the number of predictions to return. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;targets** | _string[]_ | When passed, the model will limit the scores to the passed targets instead of looking up in the whole vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first resulting token will be used (with a warning, and that might be slower). |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | Output is an array of objects. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;sequence** | _string_ | The corresponding input with the mask token prediction. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The corresponding probability |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;token** | _integer_ | The predicted token id (to replace the masked one). |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;token_str** | _string_ | The predicted token (to replace the masked one). |
+
diff --git a/docs/api-inference/tasks/image-classification.md b/docs/api-inference/tasks/image-classification.md
index 3f418adc9..c906acd75 100644
--- a/docs/api-inference/tasks/image-classification.md
+++ b/docs/api-inference/tasks/image-classification.md
@@ -28,37 +28,6 @@ For more details about the `image-classification` task, check out its [dedicated
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-classification&sort=trending).
 
-### API specification
-
-#### Request
-
-| Payload |  |  |
-| :--- | :--- | :--- |
-| **inputs*** | _string_ | The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload. |
-| **parameters** | _object_ | Additional inference parameters for Image Classification |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;function_to_apply** | _enum_ | Possible values: sigmoid, softmax, none. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ | When specified, limits the output to the top K most probable classes. |
-
-
-Some options can be configured by passing headers to the Inference API. Here are the available headers:
-
-| Headers |   |    |
-| :--- | :--- | :--- |
-| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
-| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
-| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
-
-For more information about Inference API headers, check out the parameters [guide](../parameters).
-
-#### Response
-
-| Body |  |
-| :--- | :--- | :--- |
-| **(array)** | _object[]_ | Output is an array of objects. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The predicted class label. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The corresponding probability. |
-
-
 ### Using the API
 
 
@@ -123,3 +92,35 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 </inferencesnippet>
 
 
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload. |
+| **parameters** | _object_ | Additional inference parameters for Image Classification |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;function_to_apply** | _enum_ | Possible values: sigmoid, softmax, none. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ | When specified, limits the output to the top K most probable classes. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | Output is an array of objects. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The predicted class label. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The corresponding probability. |
+
+
diff --git a/docs/api-inference/tasks/image-segmentation.md b/docs/api-inference/tasks/image-segmentation.md
index 610da6fa5..d5956506b 100644
--- a/docs/api-inference/tasks/image-segmentation.md
+++ b/docs/api-inference/tasks/image-segmentation.md
@@ -24,44 +24,11 @@ For more details about the `image-segmentation` task, check out its [dedicated p
 
 ### Recommended models
 
+- [facebook/detr-resnet-50-panoptic](https://huggingface.co/facebook/detr-resnet-50-panoptic): Solid panoptic segmentation model trained on the COCO 2017 benchmark dataset.
 - [nvidia/segformer-b0-finetuned-ade-512-512](https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512): Semantic segmentation model trained on ADE20k benchmark dataset with 512x512 resolution.
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-segmentation&sort=trending).
 
-### API specification
-
-#### Request
-
-| Payload |  |  |
-| :--- | :--- | :--- |
-| **inputs*** | _string_ | The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload. |
-| **parameters** | _object_ | Additional inference parameters for Image Segmentation |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;mask_threshold** | _number_ | Threshold to use when turning the predicted masks into binary values. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;overlap_mask_area_threshold** | _number_ | Mask overlap threshold to eliminate small, disconnected segments. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;subtask** | _enum_ | Possible values: instance, panoptic, semantic. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;threshold** | _number_ | Probability threshold to filter out predicted masks. |
-
-
-Some options can be configured by passing headers to the Inference API. Here are the available headers:
-
-| Headers |   |    |
-| :--- | :--- | :--- |
-| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
-| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
-| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
-
-For more information about Inference API headers, check out the parameters [guide](../parameters).
-
-#### Response
-
-| Body |  |
-| :--- | :--- | :--- |
-| **(array)** | _object[]_ | A predicted mask / segment |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The label of the predicted segment. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;mask** | _string_ | The corresponding mask as a black-and-white image (base64-encoded). |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The score or confidence degree the model has. |
-
-
 ### Using the API
 
 
@@ -69,7 +36,7 @@ For more information about Inference API headers, check out the parameters [guid
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512 \
+curl https://api-inference.huggingface.co/models/facebook/detr-resnet-50-panoptic \
 	-X POST \
 	--data-binary '@cats.jpg' \
 	-H "Authorization: Bearer hf_***"
@@ -81,7 +48,7 @@ curl https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-a
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512"
+API_URL = "https://api-inference.huggingface.co/models/facebook/detr-resnet-50-panoptic"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
@@ -101,7 +68,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 async function query(filename) {
 	const data = fs.readFileSync(filename);
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512",
+		"https://api-inference.huggingface.co/models/facebook/detr-resnet-50-panoptic",
 		{
 			headers: {
 				Authorization: "Bearer hf_***"
@@ -126,3 +93,38 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 </inferencesnippet>
 
 
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload. |
+| **parameters** | _object_ | Additional inference parameters for Image Segmentation |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;mask_threshold** | _number_ | Threshold to use when turning the predicted masks into binary values. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;overlap_mask_area_threshold** | _number_ | Mask overlap threshold to eliminate small, disconnected segments. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;subtask** | _enum_ | Possible values: instance, panoptic, semantic. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;threshold** | _number_ | Probability threshold to filter out predicted masks. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | A predicted mask / segment |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The label of the predicted segment. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;mask** | _string_ | The corresponding mask as a black-and-white image (base64-encoded). |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The score or confidence degree the model has. |
+
+
diff --git a/docs/api-inference/tasks/image-to-image.md b/docs/api-inference/tasks/image-to-image.md
index 078c6f0f3..9605c0a64 100644
--- a/docs/api-inference/tasks/image-to-image.md
+++ b/docs/api-inference/tasks/image-to-image.md
@@ -35,6 +35,13 @@ For more details about the `image-to-image` task, check out its [dedicated page]
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-to-image&sort=trending).
 
+### Using the API
+
+
+No snippet available for this task.
+
+
+
 ### API specification
 
 #### Request
@@ -68,9 +75,3 @@ For more information about Inference API headers, check out the parameters [guid
 | **image** | _unknown_ | The output image returned as raw bytes in the payload. |
 
 
-### Using the API
-
-
-No snippet available for this task.
-
-
diff --git a/docs/api-inference/tasks/object-detection.md b/docs/api-inference/tasks/object-detection.md
index 57cf86143..c3ba578f1 100644
--- a/docs/api-inference/tasks/object-detection.md
+++ b/docs/api-inference/tasks/object-detection.md
@@ -29,41 +29,6 @@ For more details about the `object-detection` task, check out its [dedicated pag
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=object-detection&sort=trending).
 
-### API specification
-
-#### Request
-
-| Payload |  |  |
-| :--- | :--- | :--- |
-| **inputs*** | _string_ | The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload. |
-| **parameters** | _object_ | Additional inference parameters for Object Detection |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;threshold** | _number_ | The probability necessary to make a prediction. |
-
-
-Some options can be configured by passing headers to the Inference API. Here are the available headers:
-
-| Headers |   |    |
-| :--- | :--- | :--- |
-| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
-| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
-| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
-
-For more information about Inference API headers, check out the parameters [guide](../parameters).
-
-#### Response
-
-| Body |  |
-| :--- | :--- | :--- |
-| **(array)** | _object[]_ | Output is an array of objects. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The predicted label for the bounding box. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The associated score / probability. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;box** | _object_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;xmin** | _integer_ | The x-coordinate of the top-left corner of the bounding box. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;xmax** | _integer_ | The x-coordinate of the bottom-right corner of the bounding box. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;ymin** | _integer_ | The y-coordinate of the top-left corner of the bounding box. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;ymax** | _integer_ | The y-coordinate of the bottom-right corner of the bounding box. |
-
-
 ### Using the API
 
 
@@ -128,3 +93,38 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 </inferencesnippet>
 
 
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload. |
+| **parameters** | _object_ | Additional inference parameters for Object Detection |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;threshold** | _number_ | The probability necessary to make a prediction. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | Output is an array of objects. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The predicted label for the bounding box. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The associated score / probability. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;box** | _object_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;xmin** | _integer_ | The x-coordinate of the top-left corner of the bounding box. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;xmax** | _integer_ | The x-coordinate of the bottom-right corner of the bounding box. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;ymin** | _integer_ | The y-coordinate of the top-left corner of the bounding box. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;ymax** | _integer_ | The y-coordinate of the bottom-right corner of the bounding box. |
+
diff --git a/docs/api-inference/tasks/question-answering.md b/docs/api-inference/tasks/question-answering.md
index 5fc2b9766..0a62e9a33 100644
--- a/docs/api-inference/tasks/question-answering.md
+++ b/docs/api-inference/tasks/question-answering.md
@@ -28,46 +28,6 @@ For more details about the `question-answering` task, check out its [dedicated p
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=question-answering&sort=trending).
 
-### API specification
-
-#### Request
-
-| Payload |  |  |
-| :--- | :--- | :--- |
-| **inputs*** | _object_ | One (context, question) pair to answer |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;context*** | _string_ | The context to be used for answering the question |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;question*** | _string_ | The question to be answered |
-| **parameters** | _object_ | Additional inference parameters for Question Answering |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ | The number of answers to return (will be chosen by order of likelihood). Note that we return less than topk answers if there are not enough options available within the context. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;doc_stride** | _integer_ | If the context is too long to fit with the question for the model, it will be split in several chunks with some overlap. This argument controls the size of that overlap. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;max_answer_len** | _integer_ | The maximum length of predicted answers (e.g., only answers with a shorter length are considered). |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;max_seq_len** | _integer_ | The maximum length of the total sentence (context + question) in tokens of each chunk passed to the model. The context will be split in several chunks (using docStride as overlap) if needed. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;max_question_len** | _integer_ | The maximum length of the question after tokenization. It will be truncated if needed. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;handle_impossible_answer** | _boolean_ | Whether to accept impossible as an answer. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;align_to_words** | _boolean_ | Attempts to align the answer to real words. Improves quality on space separated languages. Might hurt on non-space-separated languages (like Japanese or Chinese) |
-
-
-Some options can be configured by passing headers to the Inference API. Here are the available headers:
-
-| Headers |   |    |
-| :--- | :--- | :--- |
-| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
-| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
-| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
-
-For more information about Inference API headers, check out the parameters [guide](../parameters).
-
-#### Response
-
-| Body |  |
-| :--- | :--- | :--- |
-| **(array)** | _object[]_ | Output is an array of objects. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;answer** | _string_ | The answer to the question. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The probability associated to the answer. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;start** | _integer_ | The character position in the input where the answer begins. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;end** | _integer_ | The character position in the input where the answer ends. |
-
-
 ### Using the API
 
 
@@ -138,3 +98,43 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 </inferencesnippet>
 
 
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _object_ | One (context, question) pair to answer |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;context*** | _string_ | The context to be used for answering the question |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;question*** | _string_ | The question to be answered |
+| **parameters** | _object_ | Additional inference parameters for Question Answering |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ | The number of answers to return (will be chosen by order of likelihood). Note that we return less than topk answers if there are not enough options available within the context. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;doc_stride** | _integer_ | If the context is too long to fit with the question for the model, it will be split in several chunks with some overlap. This argument controls the size of that overlap. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;max_answer_len** | _integer_ | The maximum length of predicted answers (e.g., only answers with a shorter length are considered). |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;max_seq_len** | _integer_ | The maximum length of the total sentence (context + question) in tokens of each chunk passed to the model. The context will be split in several chunks (using docStride as overlap) if needed. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;max_question_len** | _integer_ | The maximum length of the question after tokenization. It will be truncated if needed. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;handle_impossible_answer** | _boolean_ | Whether to accept impossible as an answer. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;align_to_words** | _boolean_ | Attempts to align the answer to real words. Improves quality on space separated languages. Might hurt on non-space-separated languages (like Japanese or Chinese) |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | Output is an array of objects. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;answer** | _string_ | The answer to the question. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The probability associated to the answer. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;start** | _integer_ | The character position in the input where the answer begins. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;end** | _integer_ | The character position in the input where the answer ends. |
+
diff --git a/docs/api-inference/tasks/summarization.md b/docs/api-inference/tasks/summarization.md
index 4cf5d706e..c10a1828b 100644
--- a/docs/api-inference/tasks/summarization.md
+++ b/docs/api-inference/tasks/summarization.md
@@ -28,36 +28,6 @@ For more details about the `summarization` task, check out its [dedicated page](
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=summarization&sort=trending).
 
-### API specification
-
-#### Request
-
-| Payload |  |  |
-| :--- | :--- | :--- |
-| **inputs*** | _string_ | The input text to summarize. |
-| **parameters** | _object_ | Additional inference parameters for summarization. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;clean_up_tokenization_spaces** | _boolean_ | Whether to clean up the potential extra spaces in the text output. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;truncation** | _enum_ | Possible values: do_not_truncate, longest_first, only_first, only_second. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;generate_parameters** | _object_ | Additional parametrization of the text generation algorithm. |
-
-
-Some options can be configured by passing headers to the Inference API. Here are the available headers:
-
-| Headers |   |    |
-| :--- | :--- | :--- |
-| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
-| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
-| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
-
-For more information about Inference API headers, check out the parameters [guide](../parameters).
-
-#### Response
-
-| Body |  |
-| :--- | :--- | :--- |
-| **summary_text** | _string_ | The summarized text. |
-
-
 ### Using the API
 
 
@@ -122,3 +92,33 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 </inferencesnippet>
 
 
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The input text to summarize. |
+| **parameters** | _object_ | Additional inference parameters for summarization. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;clean_up_tokenization_spaces** | _boolean_ | Whether to clean up the potential extra spaces in the text output. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;truncation** | _enum_ | Possible values: do_not_truncate, longest_first, only_first, only_second. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;generate_parameters** | _object_ | Additional parametrization of the text generation algorithm. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **summary_text** | _string_ | The summarized text. |
+
diff --git a/docs/api-inference/tasks/table-question-answering.md b/docs/api-inference/tasks/table-question-answering.md
index d1d115f3b..3eb659892 100644
--- a/docs/api-inference/tasks/table-question-answering.md
+++ b/docs/api-inference/tasks/table-question-answering.md
@@ -27,39 +27,6 @@ For more details about the `table-question-answering` task, check out its [dedic
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=table-question-answering&sort=trending).
 
-### API specification
-
-#### Request
-
-| Payload |  |  |
-| :--- | :--- | :--- |
-| **inputs*** | _object_ | One (table, question) pair to answer |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;table*** | _object_ | The table to serve as context for the questions |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;question*** | _string_ | The question to be answered about the table |
-| **parameters** | _object_ | Additional inference parameters for Table Question Answering |
-
-
-Some options can be configured by passing headers to the Inference API. Here are the available headers:
-
-| Headers |   |    |
-| :--- | :--- | :--- |
-| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
-| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
-| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
-
-For more information about Inference API headers, check out the parameters [guide](../parameters).
-
-#### Response
-
-| Body |  |
-| :--- | :--- | :--- |
-| **(array)** | _object[]_ | Output is an array of objects. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;answer** | _string_ | The answer of the question given the table. If there is an aggregator, the answer will be preceded by `AGGREGATOR >`. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;coordinates** | _array[]_ | Coordinates of the cells of the answers. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;cells** | _string[]_ | List of strings made up of the answer cell values. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;aggregator** | _string_ | If the model has an aggregator, this returns the aggregator. |
-
-
 ### Using the API
 
 
@@ -148,3 +115,36 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 </inferencesnippet>
 
 
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _object_ | One (table, question) pair to answer |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;table*** | _object_ | The table to serve as context for the questions |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;question*** | _string_ | The question to be answered about the table |
+| **parameters** | _object_ | Additional inference parameters for Table Question Answering |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | Output is an array of objects. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;answer** | _string_ | The answer of the question given the table. If there is an aggregator, the answer will be preceded by `AGGREGATOR >`. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;coordinates** | _array[]_ | Coordinates of the cells of the answers. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;cells** | _string[]_ | List of strings made up of the answer cell values. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;aggregator** | _string_ | If the model has an aggregator, this returns the aggregator. |
+
diff --git a/docs/api-inference/tasks/text-classification.md b/docs/api-inference/tasks/text-classification.md
index 7f99c5cfd..bf932c4f3 100644
--- a/docs/api-inference/tasks/text-classification.md
+++ b/docs/api-inference/tasks/text-classification.md
@@ -28,37 +28,6 @@ For more details about the `text-classification` task, check out its [dedicated
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-classification&sort=trending).
 
-### API specification
-
-#### Request
-
-| Payload |  |  |
-| :--- | :--- | :--- |
-| **inputs*** | _string_ | The text to classify |
-| **parameters** | _object_ | Additional inference parameters for Text Classification |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;function_to_apply** | _enum_ | Possible values: sigmoid, softmax, none. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ | When specified, limits the output to the top K most probable classes. |
-
-
-Some options can be configured by passing headers to the Inference API. Here are the available headers:
-
-| Headers |   |    |
-| :--- | :--- | :--- |
-| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
-| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
-| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
-
-For more information about Inference API headers, check out the parameters [guide](../parameters).
-
-#### Response
-
-| Body |  |
-| :--- | :--- | :--- |
-| **(array)** | _object[]_ | Output is an array of objects. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The predicted class label. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The corresponding probability. |
-
-
 ### Using the API
 
 
@@ -123,3 +92,34 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 </inferencesnippet>
 
 
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The text to classify |
+| **parameters** | _object_ | Additional inference parameters for Text Classification |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;function_to_apply** | _enum_ | Possible values: sigmoid, softmax, none. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;top_k** | _integer_ | When specified, limits the output to the top K most probable classes. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | Output is an array of objects. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The predicted class label. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The corresponding probability. |
+
diff --git a/docs/api-inference/tasks/text-generation.md b/docs/api-inference/tasks/text-generation.md
index eca329d91..22ee84e1a 100644
--- a/docs/api-inference/tasks/text-generation.md
+++ b/docs/api-inference/tasks/text-generation.md
@@ -35,6 +35,71 @@ For more details about the `text-generation` task, check out its [dedicated page
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending).
 
+### Using the API
+
+
+<inferencesnippet>
+
+<curl>
+```bash
+curl https://api-inference.huggingface.co/models/google/gemma-2-2b-it \
+	-X POST \
+	-d '{"inputs": "Can you please let us know more details about your "}' \
+	-H 'Content-Type: application/json' \
+	-H "Authorization: Bearer hf_***"
+
+```
+</curl>
+
+<python>
+```py
+import requests
+
+API_URL = "https://api-inference.huggingface.co/models/google/gemma-2-2b-it"
+headers = {"Authorization": "Bearer hf_***"}
+
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+	
+output = query({
+	"inputs": "Can you please let us know more details about your ",
+})
+```
+
+To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation).
+</python>
+
+<js>
+```js
+async function query(data) {
+	const response = await fetch(
+		"https://api-inference.huggingface.co/models/google/gemma-2-2b-it",
+		{
+			headers: {
+				Authorization: "Bearer hf_***"
+				"Content-Type": "application/json",
+			},
+			method: "POST",
+			body: JSON.stringify(data),
+		}
+	);
+	const result = await response.json();
+	return result;
+}
+
+query({"inputs": "Can you please let us know more details about your "}).then((response) => {
+	console.log(JSON.stringify(response));
+});
+```
+
+To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#textgeneration).
+</js>
+
+</inferencesnippet>
+
+
+
 ### API specification
 
 #### Request
@@ -149,68 +214,3 @@ For more information about streaming, check out [this guide](https://huggingface
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;special** | _boolean_ |  |
 | **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;text** | _string_ |  |
 
-
-### Using the API
-
-
-<inferencesnippet>
-
-<curl>
-```bash
-curl https://api-inference.huggingface.co/models/google/gemma-2-2b-it \
-	-X POST \
-	-d '{"inputs": "Can you please let us know more details about your "}' \
-	-H 'Content-Type: application/json' \
-	-H "Authorization: Bearer hf_***"
-
-```
-</curl>
-
-<python>
-```py
-import requests
-
-API_URL = "https://api-inference.huggingface.co/models/google/gemma-2-2b-it"
-headers = {"Authorization": "Bearer hf_***"}
-
-def query(payload):
-	response = requests.post(API_URL, headers=headers, json=payload)
-	return response.json()
-	
-output = query({
-	"inputs": "Can you please let us know more details about your ",
-})
-```
-
-To use the Python client, see `huggingface_hub`'s [package reference](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation).
-</python>
-
-<js>
-```js
-async function query(data) {
-	const response = await fetch(
-		"https://api-inference.huggingface.co/models/google/gemma-2-2b-it",
-		{
-			headers: {
-				Authorization: "Bearer hf_***"
-				"Content-Type": "application/json",
-			},
-			method: "POST",
-			body: JSON.stringify(data),
-		}
-	);
-	const result = await response.json();
-	return result;
-}
-
-query({"inputs": "Can you please let us know more details about your "}).then((response) => {
-	console.log(JSON.stringify(response));
-});
-```
-
-To use the JavaScript client, see `huggingface.js`'s [package reference](https://huggingface.co/docs/huggingface.js/inference/classes/HfInference#textgeneration).
-</js>
-
-</inferencesnippet>
-
-
diff --git a/docs/api-inference/tasks/text-to-image.md b/docs/api-inference/tasks/text-to-image.md
index 0ac92293c..77d525d41 100644
--- a/docs/api-inference/tasks/text-to-image.md
+++ b/docs/api-inference/tasks/text-to-image.md
@@ -26,45 +26,11 @@ For more details about the `text-to-image` task, check out its [dedicated page](
 
 - [black-forest-labs/FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev): One of the most powerful image generation models that can generate realistic outputs.
 - [latent-consistency/lcm-lora-sdxl](https://huggingface.co/latent-consistency/lcm-lora-sdxl): A powerful yet fast image generation model.
+- [Kwai-Kolors/Kolors](https://huggingface.co/Kwai-Kolors/Kolors): Text-to-image model for photorealistic generation.
 - [stabilityai/stable-diffusion-3-medium-diffusers](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers): A powerful text-to-image model.
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-to-image&sort=trending).
 
-### API specification
-
-#### Request
-
-| Payload |  |  |
-| :--- | :--- | :--- |
-| **inputs*** | _string_ | The input text data (sometimes called "prompt") |
-| **parameters** | _object_ | Additional inference parameters for Text To Image |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;guidance_scale** | _number_ | A higher guidance scale value encourages the model to generate images closely linked to the text prompt, but values too high may cause saturation and other artifacts. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;negative_prompt** | _string[]_ | One or several prompt to guide what NOT to include in image generation. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_inference_steps** | _integer_ | The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;target_size** | _object_ | The size in pixel of the output image |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;width*** | _integer_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;height*** | _integer_ |  |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;scheduler** | _string_ | Override the scheduler with a compatible one. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;seed** | _integer_ | Seed for the random number generator. |
-
-
-Some options can be configured by passing headers to the Inference API. Here are the available headers:
-
-| Headers |   |    |
-| :--- | :--- | :--- |
-| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
-| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
-| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
-
-For more information about Inference API headers, check out the parameters [guide](../parameters).
-
-#### Response
-
-| Body |  |
-| :--- | :--- | :--- |
-| **image** | _unknown_ | The generated image returned as raw bytes in the payload. |
-
-
 ### Using the API
 
 
@@ -131,3 +97,38 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 </inferencesnippet>
 
 
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The input text data (sometimes called "prompt") |
+| **parameters** | _object_ | Additional inference parameters for Text To Image |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;guidance_scale** | _number_ | A higher guidance scale value encourages the model to generate images closely linked to the text prompt, but values too high may cause saturation and other artifacts. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;negative_prompt** | _string[]_ | One or several prompt to guide what NOT to include in image generation. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;num_inference_steps** | _integer_ | The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;target_size** | _object_ | The size in pixel of the output image |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;width*** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;height*** | _integer_ |  |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;scheduler** | _string_ | Override the scheduler with a compatible one. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;seed** | _integer_ | Seed for the random number generator. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **image** | _unknown_ | The generated image returned as raw bytes in the payload. |
+
diff --git a/docs/api-inference/tasks/token-classification.md b/docs/api-inference/tasks/token-classification.md
index 888ef8093..9da5edcb2 100644
--- a/docs/api-inference/tasks/token-classification.md
+++ b/docs/api-inference/tasks/token-classification.md
@@ -29,54 +29,6 @@ For more details about the `token-classification` task, check out its [dedicated
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=token-classification&sort=trending).
 
-### API specification
-
-#### Request
-
-| Payload |  |  |
-| :--- | :--- | :--- |
-| **inputs*** | _string_ | The input text data |
-| **parameters** | _object_ | Additional inference parameters for Token Classification |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;ignore_labels** | _string[]_ | A list of labels to ignore |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;stride** | _integer_ | The number of overlapping tokens between chunks when splitting the input text. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;aggregation_strategy** | _string_ | One of the following: |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#1)** | _&#x27;none&#x27;_ | Do not aggregate tokens |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#2)** | _&#x27;simple&#x27;_ | Group consecutive tokens with the same label in a single entity. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#3)** | _&#x27;first&#x27;_ | Similar to "simple", also preserves word integrity (use the label predicted for the first token in a word). |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#4)** | _&#x27;average&#x27;_ | Similar to "simple", also preserves word integrity (uses the label with the highest score, averaged across the word's tokens). |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#5)** | _&#x27;max&#x27;_ | Similar to "simple", also preserves word integrity (uses the label with the highest score across the word's tokens). |
-
-
-Some options can be configured by passing headers to the Inference API. Here are the available headers:
-
-| Headers |   |    |
-| :--- | :--- | :--- |
-| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
-| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
-| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
-
-For more information about Inference API headers, check out the parameters [guide](../parameters).
-
-#### Response
-
-Output type depends on the `stream` input parameter.
-If `stream` is `false` (default), the response will be a JSON object with the following fields:
-
-| Body |  |
-| :--- | :--- | :--- |
-| **(array)** | _object[]_ | Output is an array of objects. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;entity_group** | _string_ | The predicted label for that group of tokens |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The associated score / probability |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;word** | _string_ | The corresponding text |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;start** | _integer_ | The character position in the input where this group begins. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;end** | _integer_ | The character position in the input where this group ends. |
-
-
-If `stream` is `true`, generated tokens are returned as a stream, using Server-Sent Events (SSE).
-For more information about streaming, check out [this guide](https://huggingface.co/docs/token-classification-inference/conceptual/streaming).
-
-
-
 ### Using the API
 
 
@@ -141,3 +93,52 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 </inferencesnippet>
 
 
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The input text data |
+| **parameters** | _object_ | Additional inference parameters for Token Classification |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;ignore_labels** | _string[]_ | A list of labels to ignore |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;stride** | _integer_ | The number of overlapping tokens between chunks when splitting the input text. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;aggregation_strategy** | _string_ | One of the following: |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#1)** | _&#x27;none&#x27;_ | Do not aggregate tokens |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#2)** | _&#x27;simple&#x27;_ | Group consecutive tokens with the same label in a single entity. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#3)** | _&#x27;first&#x27;_ | Similar to "simple", also preserves word integrity (use the label predicted for the first token in a word). |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#4)** | _&#x27;average&#x27;_ | Similar to "simple", also preserves word integrity (uses the label with the highest score, averaged across the word's tokens). |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;(#5)** | _&#x27;max&#x27;_ | Similar to "simple", also preserves word integrity (uses the label with the highest score across the word's tokens). |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+Output type depends on the `stream` input parameter.
+If `stream` is `false` (default), the response will be a JSON object with the following fields:
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | Output is an array of objects. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;entity_group** | _string_ | The predicted label for that group of tokens |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The associated score / probability |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;word** | _string_ | The corresponding text |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;start** | _integer_ | The character position in the input where this group begins. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;end** | _integer_ | The character position in the input where this group ends. |
+
+
+If `stream` is `true`, generated tokens are returned as a stream, using Server-Sent Events (SSE).
+For more information about streaming, check out [this guide](https://huggingface.co/docs/token-classification-inference/conceptual/streaming).
+
+
+
diff --git a/docs/api-inference/tasks/translation.md b/docs/api-inference/tasks/translation.md
index c924a8bd0..1b6284abe 100644
--- a/docs/api-inference/tasks/translation.md
+++ b/docs/api-inference/tasks/translation.md
@@ -29,38 +29,6 @@ For more details about the `translation` task, check out its [dedicated page](ht
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=translation&sort=trending).
 
-### API specification
-
-#### Request
-
-| Payload |  |  |
-| :--- | :--- | :--- |
-| **inputs*** | _string_ | The text to translate. |
-| **parameters** | _object_ | Additional inference parameters for Translation |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;src_lang** | _string_ | The source language of the text. Required for models that can translate from multiple languages. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;tgt_lang** | _string_ | Target language to translate to. Required for models that can translate to multiple languages. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;clean_up_tokenization_spaces** | _boolean_ | Whether to clean up the potential extra spaces in the text output. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;truncation** | _enum_ | Possible values: do_not_truncate, longest_first, only_first, only_second. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;generate_parameters** | _object_ | Additional parametrization of the text generation algorithm. |
-
-
-Some options can be configured by passing headers to the Inference API. Here are the available headers:
-
-| Headers |   |    |
-| :--- | :--- | :--- |
-| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
-| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
-| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
-
-For more information about Inference API headers, check out the parameters [guide](../parameters).
-
-#### Response
-
-| Body |  |
-| :--- | :--- | :--- |
-| **translation_text** | _string_ | The translated text. |
-
-
 ### Using the API
 
 
@@ -125,3 +93,35 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 </inferencesnippet>
 
 
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _string_ | The text to translate. |
+| **parameters** | _object_ | Additional inference parameters for Translation |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;src_lang** | _string_ | The source language of the text. Required for models that can translate from multiple languages. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;tgt_lang** | _string_ | Target language to translate to. Required for models that can translate to multiple languages. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;clean_up_tokenization_spaces** | _boolean_ | Whether to clean up the potential extra spaces in the text output. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;truncation** | _enum_ | Possible values: do_not_truncate, longest_first, only_first, only_second. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;generate_parameters** | _object_ | Additional parametrization of the text generation algorithm. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **translation_text** | _string_ | The translated text. |
+
diff --git a/docs/api-inference/tasks/zero-shot-classification.md b/docs/api-inference/tasks/zero-shot-classification.md
index ab3404d62..89b505be1 100644
--- a/docs/api-inference/tasks/zero-shot-classification.md
+++ b/docs/api-inference/tasks/zero-shot-classification.md
@@ -28,39 +28,6 @@ For more details about the `zero-shot-classification` task, check out its [dedic
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=zero-shot-classification&sort=trending).
 
-### API specification
-
-#### Request
-
-| Payload |  |  |
-| :--- | :--- | :--- |
-| **inputs*** | _object_ | The input text data, with candidate labels |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;text*** | _string_ | The text to classify |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;candidateLabels*** | _string[]_ | The set of possible class labels to classify the text into. |
-| **parameters** | _object_ | Additional inference parameters for Zero Shot Classification |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;hypothesis_template** | _string_ | The sentence used in conjunction with candidateLabels to attempt the text classification by replacing the placeholder with the candidate labels. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;multi_label** | _boolean_ | Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of the label likelihoods for each sequence is 1. If true, the labels are considered independent and probabilities are normalized for each candidate. |
-
-
-Some options can be configured by passing headers to the Inference API. Here are the available headers:
-
-| Headers |   |    |
-| :--- | :--- | :--- |
-| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
-| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
-| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
-
-For more information about Inference API headers, check out the parameters [guide](../parameters).
-
-#### Response
-
-| Body |  |
-| :--- | :--- | :--- |
-| **(array)** | _object[]_ | Output is an array of objects. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The predicted class label. |
-| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The corresponding probability. |
-
-
 ### Using the API
 
 
@@ -126,3 +93,36 @@ To use the JavaScript client, see `huggingface.js`'s [package reference](https:/
 </inferencesnippet>
 
 
+
+### API specification
+
+#### Request
+
+| Payload |  |  |
+| :--- | :--- | :--- |
+| **inputs*** | _object_ | The input text data, with candidate labels |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;text*** | _string_ | The text to classify |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;candidateLabels*** | _string[]_ | The set of possible class labels to classify the text into. |
+| **parameters** | _object_ | Additional inference parameters for Zero Shot Classification |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;hypothesis_template** | _string_ | The sentence used in conjunction with candidateLabels to attempt the text classification by replacing the placeholder with the candidate labels. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;multi_label** | _boolean_ | Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of the label likelihoods for each sequence is 1. If true, the labels are considered independent and probabilities are normalized for each candidate. |
+
+
+Some options can be configured by passing headers to the Inference API. Here are the available headers:
+
+| Headers |   |    |
+| :--- | :--- | :--- |
+| **authorization** | _string_ | Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with Inference API permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens). |
+| **x-use-cache** | _boolean, default to `true`_ | There is a cache layer on the inference API to speed up requests we have already seen. Most models can use those results as they are deterministic (meaning the outputs will be the same anyway). However, if you use a nondeterministic model, you can set this parameter to prevent the caching mechanism from being used, resulting in a real new query. Read more about caching [here](../parameters#caching]). |
+| **x-wait-for-model** | _boolean, default to `false`_ | If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error, as it will limit hanging in your application to known places. Read more about model availability [here](../overview#eligibility]). |
+
+For more information about Inference API headers, check out the parameters [guide](../parameters).
+
+#### Response
+
+| Body |  |
+| :--- | :--- | :--- |
+| **(array)** | _object[]_ | Output is an array of objects. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;label** | _string_ | The predicted class label. |
+| **&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;score** | _number_ | The corresponding probability. |
+
diff --git a/scripts/api-inference/templates/task/audio-classification.handlebars b/scripts/api-inference/templates/task/audio-classification.handlebars
index 5f866f728..9567f39ca 100644
--- a/scripts/api-inference/templates/task/audio-classification.handlebars
+++ b/scripts/api-inference/templates/task/audio-classification.handlebars
@@ -12,6 +12,10 @@ Audio classification is the task of assigning a label or class to a given audio.
 
 {{{tips.listModelsLink.audio-classification}}}
 
+### Using the API
+
+{{{snippets.audio-classification}}}
+
 ### API specification
 
 #### Request
@@ -23,7 +27,3 @@ Audio classification is the task of assigning a label or class to a given audio.
 #### Response
 
 {{{specs.audio-classification.output}}}
-
-### Using the API
-
-{{{snippets.audio-classification}}}
diff --git a/scripts/api-inference/templates/task/automatic-speech-recognition.handlebars b/scripts/api-inference/templates/task/automatic-speech-recognition.handlebars
index 008c65030..8e200fd2a 100644
--- a/scripts/api-inference/templates/task/automatic-speech-recognition.handlebars
+++ b/scripts/api-inference/templates/task/automatic-speech-recognition.handlebars
@@ -11,6 +11,10 @@ Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the t
 
 {{{tips.listModelsLink.automatic-speech-recognition}}}
 
+### Using the API
+
+{{{snippets.automatic-speech-recognition}}}
+
 ### API specification
 
 #### Request
@@ -22,7 +26,3 @@ Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the t
 #### Response
 
 {{{specs.automatic-speech-recognition.output}}}
-
-### Using the API
-
-{{{snippets.automatic-speech-recognition}}}
diff --git a/scripts/api-inference/templates/task/chat-completion.handlebars b/scripts/api-inference/templates/task/chat-completion.handlebars
index f1274f5c5..fd2d189fa 100644
--- a/scripts/api-inference/templates/task/chat-completion.handlebars
+++ b/scripts/api-inference/templates/task/chat-completion.handlebars
@@ -13,6 +13,10 @@ This is a subtask of [`text-generation`](./text_generation) designed to generate
 
 {{{tips.listModelsLink.chat-completion}}}
 
+### Using the API
+
+{{{snippets.chat-completion}}}
+
 ### API specification
 
 #### Request
@@ -33,6 +37,3 @@ For more information about streaming, check out [this guide](https://huggingface
 
 {{{specs.chat-completion.stream_output}}}
 
-### Using the API
-
-{{{snippets.chat-completion}}}
diff --git a/scripts/api-inference/templates/task/feature-extraction.handlebars b/scripts/api-inference/templates/task/feature-extraction.handlebars
index 7e6f1b4be..adc28262d 100644
--- a/scripts/api-inference/templates/task/feature-extraction.handlebars
+++ b/scripts/api-inference/templates/task/feature-extraction.handlebars
@@ -13,6 +13,10 @@ Extracting features is useful for subtasks like sentence similarity, reranking a
 
 {{{tips.listModelsLink.feature-extraction}}}
 
+### Using the API
+
+{{{snippets.feature-extraction}}}
+
 ### API specification
 
 #### Request
@@ -25,6 +29,3 @@ Extracting features is useful for subtasks like sentence similarity, reranking a
 
 {{{specs.feature-extraction.output}}}
 
-### Using the API
-
-{{{snippets.feature-extraction}}}
diff --git a/scripts/api-inference/templates/task/fill-mask.handlebars b/scripts/api-inference/templates/task/fill-mask.handlebars
index 663d2ab9f..c9c131e22 100644
--- a/scripts/api-inference/templates/task/fill-mask.handlebars
+++ b/scripts/api-inference/templates/task/fill-mask.handlebars
@@ -12,6 +12,10 @@ Mask filling is the task of predicting the right word (token to be precise) in t
 
 {{{tips.listModelsLink.fill-mask}}}
 
+### Using the API
+
+{{{snippets.fill-mask}}}
+
 ### API specification
 
 #### Request
@@ -23,7 +27,3 @@ Mask filling is the task of predicting the right word (token to be precise) in t
 #### Response
 
 {{{specs.fill-mask.output}}}
-
-### Using the API
-
-{{{snippets.fill-mask}}}
diff --git a/scripts/api-inference/templates/task/image-classification.handlebars b/scripts/api-inference/templates/task/image-classification.handlebars
index abfa0a147..88461e7be 100644
--- a/scripts/api-inference/templates/task/image-classification.handlebars
+++ b/scripts/api-inference/templates/task/image-classification.handlebars
@@ -12,6 +12,10 @@ Image classification is the task of assigning a label or class to an entire imag
 
 {{{tips.listModelsLink.image-classification}}}
 
+### Using the API
+
+{{{snippets.image-classification}}}
+
 ### API specification
 
 #### Request
@@ -24,6 +28,3 @@ Image classification is the task of assigning a label or class to an entire imag
 
 {{{specs.image-classification.output}}}
 
-### Using the API
-
-{{{snippets.image-classification}}}
diff --git a/scripts/api-inference/templates/task/image-segmentation.handlebars b/scripts/api-inference/templates/task/image-segmentation.handlebars
index 8f81ad5d2..e4cec3a01 100644
--- a/scripts/api-inference/templates/task/image-segmentation.handlebars
+++ b/scripts/api-inference/templates/task/image-segmentation.handlebars
@@ -12,6 +12,10 @@ Image Segmentation divides an image into segments where each pixel in the image
 
 {{{tips.listModelsLink.image-segmentation}}}
 
+### Using the API
+
+{{{snippets.image-segmentation}}}
+
 ### API specification
 
 #### Request
@@ -24,6 +28,3 @@ Image Segmentation divides an image into segments where each pixel in the image
 
 {{{specs.image-segmentation.output}}}
 
-### Using the API
-
-{{{snippets.image-segmentation}}}
diff --git a/scripts/api-inference/templates/task/image-to-image.handlebars b/scripts/api-inference/templates/task/image-to-image.handlebars
index 258dec814..93d5f6f00 100644
--- a/scripts/api-inference/templates/task/image-to-image.handlebars
+++ b/scripts/api-inference/templates/task/image-to-image.handlebars
@@ -19,6 +19,10 @@ Use cases heavily depend on the model and the dataset it was trained on, but som
 
 {{{tips.listModelsLink.image-to-image}}}
 
+### Using the API
+
+{{{snippets.image-to-image}}}
+
 ### API specification
 
 #### Request
@@ -31,6 +35,3 @@ Use cases heavily depend on the model and the dataset it was trained on, but som
 
 {{{specs.image-to-image.output}}}
 
-### Using the API
-
-{{{snippets.image-to-image}}}
diff --git a/scripts/api-inference/templates/task/object-detection.handlebars b/scripts/api-inference/templates/task/object-detection.handlebars
index 5e90a3092..f3b4e085b 100644
--- a/scripts/api-inference/templates/task/object-detection.handlebars
+++ b/scripts/api-inference/templates/task/object-detection.handlebars
@@ -12,6 +12,10 @@ Object Detection models allow users to identify objects of certain defined class
 
 {{{tips.listModelsLink.object-detection}}}
 
+### Using the API
+
+{{{snippets.object-detection}}}
+
 ### API specification
 
 #### Request
@@ -23,7 +27,3 @@ Object Detection models allow users to identify objects of certain defined class
 #### Response
 
 {{{specs.object-detection.output}}}
-
-### Using the API
-
-{{{snippets.object-detection}}}
diff --git a/scripts/api-inference/templates/task/question-answering.handlebars b/scripts/api-inference/templates/task/question-answering.handlebars
index 101d00fcc..3ca4e93d3 100644
--- a/scripts/api-inference/templates/task/question-answering.handlebars
+++ b/scripts/api-inference/templates/task/question-answering.handlebars
@@ -12,6 +12,10 @@ Question Answering models can retrieve the answer to a question from a given tex
 
 {{{tips.listModelsLink.question-answering}}}
 
+### Using the API
+
+{{{snippets.question-answering}}}
+
 ### API specification
 
 #### Request
@@ -23,7 +27,3 @@ Question Answering models can retrieve the answer to a question from a given tex
 #### Response
 
 {{{specs.question-answering.output}}}
-
-### Using the API
-
-{{{snippets.question-answering}}}
diff --git a/scripts/api-inference/templates/task/summarization.handlebars b/scripts/api-inference/templates/task/summarization.handlebars
index 890487215..1df382189 100644
--- a/scripts/api-inference/templates/task/summarization.handlebars
+++ b/scripts/api-inference/templates/task/summarization.handlebars
@@ -12,6 +12,10 @@ Summarization is the task of producing a shorter version of a document while pre
 
 {{{tips.listModelsLink.summarization}}}
 
+### Using the API
+
+{{{snippets.summarization}}}
+
 ### API specification
 
 #### Request
@@ -23,7 +27,3 @@ Summarization is the task of producing a shorter version of a document while pre
 #### Response
 
 {{{specs.summarization.output}}}
-
-### Using the API
-
-{{{snippets.summarization}}}
diff --git a/scripts/api-inference/templates/task/table-question-answering.handlebars b/scripts/api-inference/templates/task/table-question-answering.handlebars
index 4ae8b53fc..087ff53bf 100644
--- a/scripts/api-inference/templates/task/table-question-answering.handlebars
+++ b/scripts/api-inference/templates/task/table-question-answering.handlebars
@@ -12,6 +12,10 @@ Table Question Answering (Table QA) is the answering a question about an informa
 
 {{{tips.listModelsLink.table-question-answering}}}
 
+### Using the API
+
+{{{snippets.table-question-answering}}}
+
 ### API specification
 
 #### Request
@@ -23,7 +27,3 @@ Table Question Answering (Table QA) is the answering a question about an informa
 #### Response
 
 {{{specs.table-question-answering.output}}}
-
-### Using the API
-
-{{{snippets.table-question-answering}}}
diff --git a/scripts/api-inference/templates/task/text-classification.handlebars b/scripts/api-inference/templates/task/text-classification.handlebars
index 99c3cabe8..123d1f92a 100644
--- a/scripts/api-inference/templates/task/text-classification.handlebars
+++ b/scripts/api-inference/templates/task/text-classification.handlebars
@@ -12,6 +12,10 @@ Text Classification is the task of assigning a label or class to a given text. S
 
 {{{tips.listModelsLink.text-classification}}}
 
+### Using the API
+
+{{{snippets.text-classification}}}
+
 ### API specification
 
 #### Request
@@ -23,7 +27,3 @@ Text Classification is the task of assigning a label or class to a given text. S
 #### Response
 
 {{{specs.text-classification.output}}}
-
-### Using the API
-
-{{{snippets.text-classification}}}
diff --git a/scripts/api-inference/templates/task/text-generation.handlebars b/scripts/api-inference/templates/task/text-generation.handlebars
index 85bbba97a..9720cc175 100644
--- a/scripts/api-inference/templates/task/text-generation.handlebars
+++ b/scripts/api-inference/templates/task/text-generation.handlebars
@@ -14,6 +14,10 @@ If you are interested in a Chat Completion task, which generates a response base
 
 {{{tips.listModelsLink.text-generation}}}
 
+### Using the API
+
+{{{snippets.text-generation}}}
+
 ### API specification
 
 #### Request
@@ -33,7 +37,3 @@ If `stream` is `true`, generated tokens are returned as a stream, using Server-S
 For more information about streaming, check out [this guide](https://huggingface.co/docs/text-generation-inference/conceptual/streaming).
 
 {{{specs.text-generation.stream_output}}}
-
-### Using the API
-
-{{{snippets.text-generation}}}
diff --git a/scripts/api-inference/templates/task/text-to-image.handlebars b/scripts/api-inference/templates/task/text-to-image.handlebars
index 6e6ffd0c6..ac65056e6 100644
--- a/scripts/api-inference/templates/task/text-to-image.handlebars
+++ b/scripts/api-inference/templates/task/text-to-image.handlebars
@@ -12,6 +12,10 @@ Generate an image based on a given text prompt.
 
 {{{tips.listModelsLink.text-to-image}}}
 
+### Using the API
+
+{{{snippets.text-to-image}}}
+
 ### API specification
 
 #### Request
@@ -23,7 +27,3 @@ Generate an image based on a given text prompt.
 #### Response
 
 {{{specs.text-to-image.output}}}
-
-### Using the API
-
-{{{snippets.text-to-image}}}
diff --git a/scripts/api-inference/templates/task/token-classification.handlebars b/scripts/api-inference/templates/task/token-classification.handlebars
index 44f682145..4a627783f 100644
--- a/scripts/api-inference/templates/task/token-classification.handlebars
+++ b/scripts/api-inference/templates/task/token-classification.handlebars
@@ -12,6 +12,10 @@ Token classification is a task in which a label is assigned to some tokens in a
 
 {{{tips.listModelsLink.token-classification}}}
 
+### Using the API
+
+{{{snippets.token-classification}}}
+
 ### API specification
 
 #### Request
@@ -32,6 +36,3 @@ For more information about streaming, check out [this guide](https://huggingface
 
 {{{specs.token-classification.stream_output}}}
 
-### Using the API
-
-{{{snippets.token-classification}}}
diff --git a/scripts/api-inference/templates/task/translation.handlebars b/scripts/api-inference/templates/task/translation.handlebars
index 02892102b..7cbede05d 100644
--- a/scripts/api-inference/templates/task/translation.handlebars
+++ b/scripts/api-inference/templates/task/translation.handlebars
@@ -12,6 +12,10 @@ Translation is the task of converting text from one language to another.
 
 {{{tips.listModelsLink.translation}}}
 
+### Using the API
+
+{{{snippets.translation}}}
+
 ### API specification
 
 #### Request
@@ -23,7 +27,3 @@ Translation is the task of converting text from one language to another.
 #### Response
 
 {{{specs.translation.output}}}
-
-### Using the API
-
-{{{snippets.translation}}}
diff --git a/scripts/api-inference/templates/task/zero-shot-classification.handlebars b/scripts/api-inference/templates/task/zero-shot-classification.handlebars
index fd631a656..e0e830e93 100644
--- a/scripts/api-inference/templates/task/zero-shot-classification.handlebars
+++ b/scripts/api-inference/templates/task/zero-shot-classification.handlebars
@@ -12,6 +12,10 @@ Zero-shot text classification is super useful to try out classification with zer
 
 {{{tips.listModelsLink.zero-shot-classification}}}
 
+### Using the API
+
+{{{snippets.zero-shot-classification}}}
+
 ### API specification
 
 #### Request
@@ -23,7 +27,3 @@ Zero-shot text classification is super useful to try out classification with zer
 #### Response
 
 {{{specs.zero-shot-classification.output}}}
-
-### Using the API
-
-{{{snippets.zero-shot-classification}}}

From e9eff75dd74261dbc41f47156727e604f102dcf5 Mon Sep 17 00:00:00 2001
From: Omar Sanseviero <osanseviero@gmail.com>
Date: Thu, 12 Sep 2024 16:21:01 +0200
Subject: [PATCH 38/38] Fixes new docs (#1413)

* Misc changes

* Wrap up

* Apply suggestions from code review

* generate

* Add todos to avoid forgetting about them

---------

Co-authored-by: Lucain <lucain@huggingface.co>
Co-authored-by: Wauplin <lucainp@gmail.com>
---
 docs/TODOs.md                                 | 11 ++++
 docs/api-inference/_toctree.yml               |  2 +
 docs/api-inference/getting-started.md         | 52 ++++++-------------
 docs/api-inference/index.md                   |  7 ---
 docs/api-inference/parameters.md              | 11 +---
 docs/api-inference/security.md                | 15 ++++++
 docs/api-inference/supported-models.md        |  2 -
 .../tasks/audio-classification.md             |  7 ++-
 .../tasks/automatic-speech-recognition.md     |  9 +++-
 docs/api-inference/tasks/chat-completion.md   |  6 +++
 .../api-inference/tasks/feature-extraction.md |  6 ++-
 docs/api-inference/tasks/fill-mask.md         | 10 ++--
 .../tasks/image-classification.md             |  2 +-
 .../api-inference/tasks/image-segmentation.md |  9 ++--
 docs/api-inference/tasks/image-to-image.md    | 10 ++--
 docs/api-inference/tasks/object-detection.md  |  2 +-
 .../api-inference/tasks/question-answering.md |  1 +
 .../tasks/text-classification.md              | 12 +++--
 docs/api-inference/tasks/text-to-image.md     |  1 -
 .../tasks/token-classification.md             |  2 +
 docs/api-inference/tasks/translation.md       |  9 ++--
 .../tasks/zero-shot-classification.md         |  3 +-
 .../task/audio-classification.handlebars      |  7 ++-
 .../automatic-speech-recognition.handlebars   |  8 ++-
 .../templates/task/chat-completion.handlebars |  6 +++
 .../task/feature-extraction.handlebars        |  6 ++-
 .../task/image-classification.handlebars      |  2 +-
 .../task/image-segmentation.handlebars        |  2 +-
 .../templates/task/image-to-image.handlebars  | 10 ++--
 .../task/object-detection.handlebars          |  2 +-
 30 files changed, 133 insertions(+), 99 deletions(-)
 create mode 100644 docs/TODOs.md
 create mode 100644 docs/api-inference/security.md

diff --git a/docs/TODOs.md b/docs/TODOs.md
new file mode 100644
index 000000000..659ee30ac
--- /dev/null
+++ b/docs/TODOs.md
@@ -0,0 +1,11 @@
+## For API-Inference docs:
+
+From https://github.com/huggingface/hub-docs/pull/1413:
+* Use `<inference> for getting started
+* Add some screenshots: supported models
+* Add flow chart of how API works
+* Add table with all tasks
+* Add missing tasks: depth estimation and zero shot image classification
+* Some tasks have no warm models, should we remove them for now? E.g. https://huggingface.co/models?inference=warm&pipeline_tag=fill-mask&sort=trending BUT many are cold and working, so actually linking to both could make sense - internal issue https://github.com/huggingface-internal/moon-landing/issues/10966
+* See also this [google doc](https://docs.google.com/document/d/1xy5Ug4C_qGbqp4x3T3rj_VOyjQzQLlyce-L6I_hYi94/edit?usp=sharing)
+* Add CI to auto-generate the docs when handlebars template are updated
\ No newline at end of file
diff --git a/docs/api-inference/_toctree.yml b/docs/api-inference/_toctree.yml
index c3cea310e..123f62ca4 100644
--- a/docs/api-inference/_toctree.yml
+++ b/docs/api-inference/_toctree.yml
@@ -7,6 +7,8 @@
     title: Supported Models
   - local: rate-limits
     title: Rate Limits
+  - local: security
+    title: Security
   title: Getting Started
 - sections:
   - local: parameters
diff --git a/docs/api-inference/getting-started.md b/docs/api-inference/getting-started.md
index 31a77e4b3..ea0007ba9 100644
--- a/docs/api-inference/getting-started.md
+++ b/docs/api-inference/getting-started.md
@@ -2,27 +2,21 @@
 
 The Serverless Inference API allows you to easily do inference on a wide range of models and tasks. You can do requests with your favorite tools (Python, cURL, etc). We also provide a Python SDK (`huggingface_hub`) to make it even easier.
 
-We'll do a minimal example using a [sentiment classification model](https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest). Please visit task-specific parameters and further documentation in our [API Reference](./parameters.md).
+We'll do a minimal example using a [sentiment classification model](https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest). Please visit task-specific parameters and further documentation in our [API Reference](./parameters).
 
 ## Getting a Token
 
-Using the Serverless Inference API requires passing a user token in the request headers. You can get a token by signing up on the Hugging Face website and then going to the [tokens page](https://huggingface.co/settings/tokens/new?globalPermissions=inference.serverless.write&tokenType=fineGrained). We recommend creating a `Fine-grained` token with the scope to `Make calls to the serverless Inference API`.
+Using the Serverless Inference API requires passing a user token in the request headers. You can get a token by signing up on the Hugging Face website and then going to the [tokens page](https://huggingface.co/settings/tokens/new?globalPermissions=inference.serverless.write&tokenType=fineGrained). We recommend creating a `fine-grained` token with the scope to `Make calls to the serverless Inference API`.
 
-TODO: add screenshot
 For more details about user tokens, check out [this guide](https://huggingface.co/docs/hub/en/security-tokens).
 
 ## cURL
 
 ```bash
-curl 'https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3.1-8B-Instruct/v1/chat/completions' \
+curl 'https://api-inference.huggingface.co/models/cardiffnlp/twitter-roberta-base-sentiment-latest' \
 -H "Authorization: Bearer hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \
 -H 'Content-Type: application/json' \
--d '{
-    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
-    "messages": [{"role": "user", "content": "What is the capital of France?"}],
-    "max_tokens": 500,
-    "stream": false
-}'
+-d '{"inputs": "Today is a great day"}'
 ```
 
 ## Python
@@ -32,13 +26,10 @@ You can use the `requests` library to make a request to the Inference API.
 ```python
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3.1-8B-Instruct/v1/chat/completions"
+API_URL = "https://api-inference.huggingface.co/models/cardiffnlp/twitter-roberta-base-sentiment-latest"
 headers = {"Authorization": "Bearer hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
 payload = {
-    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
-    "messages": [{"role": "user", "content": "What is the capital of France?"}],
-    "max_tokens": 500,
-    "stream": False
+    "inputs": "Today is a great day",
 }
 
 response = requests.post(API_URL, headers=headers, json=payload)
@@ -51,16 +42,11 @@ Hugging Face also provides a [`InferenceClient`](https://huggingface.co/docs/hug
 from huggingface_hub import InferenceClient
 
 client = InferenceClient(
-    "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "cardiffnlp/twitter-roberta-base-sentiment-latest",
     token="hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
 )
 
-for message in client.chat_completion(
-	messages=[{"role": "user", "content": "What is the capital of France?"}],
-	max_tokens=500,
-	stream=True,
-):
-    print(message.choices[0].delta.content, end="")
+client.text_classification("Today is a great day")
 ```
 
 ## JavaScript
@@ -70,7 +56,7 @@ import fetch from "node-fetch";
 
 async function query(data) {
     const response = await fetch(
-        "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3.1-8B-Instruct/v1/chat/completions",
+        "https://api-inference.huggingface.co/models/cardiffnlp/twitter-roberta-base-sentiment-latest",
         {
             method: "POST",
             headers: {
@@ -84,12 +70,7 @@ async function query(data) {
     return result;
 }
 
-query({
-	"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
-	"messages": [{"role": "user", "content": "What is the capital of France?"}],
-	"max_tokens": 500,
-	"stream": false
-}).then((response) => {
+query({inputs: "Today is a great day"}).then((response) => {
     console.log(JSON.stringify(response, null, 2));
 });
 ```
@@ -101,13 +82,12 @@ import { HfInference } from "@huggingface/inference";
 
 const inference = new HfInference("hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
 
-for await (const chunk of inference.chatCompletionStream({
-	model: "meta-llama/Meta-Llama-3.1-8B-Instruct",
-	messages: [{ role: "user", content: "What is the capital of France?" }],
-	max_tokens: 500,
-})) {
-	process.stdout.write(chunk.choices[0]?.delta?.content || "");
-}
+const result = await inference.textClassification({
+    model: "cardiffnlp/twitter-roberta-base-sentiment-latest",
+    inputs: "Today is a great day",
+});
+
+console.log(result);
 ```
 
 ## Next Steps
diff --git a/docs/api-inference/index.md b/docs/api-inference/index.md
index cf73a33f3..689bdf63b 100644
--- a/docs/api-inference/index.md
+++ b/docs/api-inference/index.md
@@ -15,8 +15,6 @@ The Serverless Inference API offers a fast and free way to explore thousands of
 * **Document Embeddings:** Build search and retrieval systems with SOTA embeddings.
 * **Classical AI Tasks:** Ready-to-use models for text classification, image classification, speech recognition, and more.
 
-TODO: add some flow chart image
-
 ⚡ **Fast and Free to Get Started**: The Inference API is free with higher rate limits for PRO users. For production needs, explore [Inference Endpoints](https://ui.endpoints.huggingface.co/) for dedicated resources, autoscaling, advanced security features, and more.
 
 ---
@@ -53,8 +51,3 @@ The documentation is organized into two sections:
 <a target="_blank" href="https://huggingface.co/support">
     <img alt="HuggingFace Expert Acceleration Program" src="https://cdn-media.huggingface.co/marketing/transformers/new-support-improved.png" style="max-width: 600px; border: 1px solid #eee; border-radius: 4px; box-shadow: 0 1px 2px 0 rgba(0, 0, 0, 0.05);">
 </a><br>
-
-## Hugging Face is trusted in production by over 10,000 companies
-
-<img class="block dark:hidden !shadow-none !border-0 !rounded-none" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/inference-api/companies-light.png" width="600">
-<img class="hidden dark:block !shadow-none !border-0 !rounded-none" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/inference-api/companies-dark.png" width="600">
\ No newline at end of file
diff --git a/docs/api-inference/parameters.md b/docs/api-inference/parameters.md
index f1a1b9df4..b225cafd5 100644
--- a/docs/api-inference/parameters.md
+++ b/docs/api-inference/parameters.md
@@ -1,20 +1,11 @@
 # Parameters
 
-Table with 
-- Domain
-- Task
-- Whether it's supported in Inference API
-- Supported libraries (not sure)
-- Recommended model
-- Link to model specific page
-
-
 
 ## Additional Options
 
 ### Caching
 
-There is a cache layer on the inference API to speed up requests when the inputs are exactly the same. Many models, such as classifiers and embedding models, can use those results as is if they are deterministic, meaning the results will be the same. Howevr, if you use a nondeterministic model, you can disable the cache mechanism from being used, resulting in a real new query.
+There is a cache layer on the inference API to speed up requests when the inputs are exactly the same. Many models, such as classifiers and embedding models, can use those results as is if they are deterministic, meaning the results will be the same. However, if you use a nondeterministic model, you can disable the cache mechanism from being used, resulting in a real new query.
 
 To do this, you can add `x-use-cache:false` to the request headers. For example
 
diff --git a/docs/api-inference/security.md b/docs/api-inference/security.md
new file mode 100644
index 000000000..428734361
--- /dev/null
+++ b/docs/api-inference/security.md
@@ -0,0 +1,15 @@
+# Security & Compliance
+
+The Inference API is not designed for heavy production requirements. For production needs, explore [Inference Endpoints](https://ui.endpoints.huggingface.co/) for dedicated resources, autoscaling, advanced security features, and more.
+
+## Data Security/Privacy
+
+Hugging Face does not store any user data for training purposes. Tokens sent to the API might be stored in a short-term (few minutes) cache mechanism to speed-up repeated requests. Logs are stored for debugging for up to 30 days. Any additional data in terms of user data or tokens are not stored. 
+
+Serverless Inference API use TLS/SSL to encrypt the data in transit.
+
+## Hub Security
+
+The Hugging Face Hub, which Serverless Inference API is part, is SOC2 Type 2 certified. For more on Hub security: https://huggingface.co/docs/hub/security
+
+<img width="150" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/security-soc-1.jpg">
\ No newline at end of file
diff --git a/docs/api-inference/supported-models.md b/docs/api-inference/supported-models.md
index 94ad3e9f4..81c511f60 100644
--- a/docs/api-inference/supported-models.md
+++ b/docs/api-inference/supported-models.md
@@ -8,8 +8,6 @@ You can find:
 * **[Cold models](https://huggingface.co/models?inference=cold&sort=trending):** models that are not loaded but can be used.
 * **[Frozen models](https://huggingface.co/models?inference=frozen&sort=trending):** models that currently can't be run with the API.
 
-TODO: add screenshot
-
 ## What do I get with a PRO subscription?
 
 In addition to thousands of public models available in the Hub, PRO and Enterprise users get higher [rate limits](./rate-limits) and free access to the following models:
diff --git a/docs/api-inference/tasks/audio-classification.md b/docs/api-inference/tasks/audio-classification.md
index 2819beed8..b752e9ee3 100644
--- a/docs/api-inference/tasks/audio-classification.md
+++ b/docs/api-inference/tasks/audio-classification.md
@@ -14,7 +14,12 @@ For more details, check out:
 
 ## Audio Classification
 
-Audio classification is the task of assigning a label or class to a given audio. It can be used for recognizing which command a user is giving or the emotion of a statement, as well as identifying a speaker.
+Audio classification is the task of assigning a label or class to a given audio.
+
+Example applications:
+* Recognizing which command a user is giving
+* Identifying a speaker
+* Detecting the genre of a song
 
 <Tip>
 
diff --git a/docs/api-inference/tasks/automatic-speech-recognition.md b/docs/api-inference/tasks/automatic-speech-recognition.md
index 81a456f00..7d7a2cc0a 100644
--- a/docs/api-inference/tasks/automatic-speech-recognition.md
+++ b/docs/api-inference/tasks/automatic-speech-recognition.md
@@ -14,7 +14,13 @@ For more details, check out:
 
 ## Automatic Speech Recognition
 
-Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.
+Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text.
+
+Example applications:
+* Transcribing a podcast
+* Building a voice assistant
+* Generating subtitles for a video
+
 <Tip>
 
 For more details about the `automatic-speech-recognition` task, check out its [dedicated page](https://huggingface.co/tasks/automatic-speech-recognition)! You will find examples and related materials.
@@ -25,6 +31,7 @@ For more details about the `automatic-speech-recognition` task, check out its [d
 
 - [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3): A powerful ASR model by OpenAI.
 - [facebook/seamless-m4t-v2-large](https://huggingface.co/facebook/seamless-m4t-v2-large): An end-to-end model that performs ASR and Speech Translation by MetaAI.
+- [pyannote/speaker-diarization-3.1](https://huggingface.co/pyannote/speaker-diarization-3.1): Powerful speaker diarization model.
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=automatic-speech-recognition&sort=trending).
 
diff --git a/docs/api-inference/tasks/chat-completion.md b/docs/api-inference/tasks/chat-completion.md
index 213cf5cae..249318eda 100644
--- a/docs/api-inference/tasks/chat-completion.md
+++ b/docs/api-inference/tasks/chat-completion.md
@@ -31,6 +31,12 @@ This is a subtask of [`text-generation`](./text_generation) designed to generate
 
 ### Using the API
 
+The API supports:
+
+* Using the chat completion API compatible with the OpenAI SDK.
+* Using grammars, constraints, and tools.
+* Streaming the output
+
 
 <inferencesnippet>
 
diff --git a/docs/api-inference/tasks/feature-extraction.md b/docs/api-inference/tasks/feature-extraction.md
index 5c37b4e9c..6eb99703f 100644
--- a/docs/api-inference/tasks/feature-extraction.md
+++ b/docs/api-inference/tasks/feature-extraction.md
@@ -15,7 +15,11 @@ For more details, check out:
 ## Feature Extraction
 
 Feature extraction is the task of converting a text into a vector (often called "embedding").
-Extracting features is useful for subtasks like sentence similarity, reranking and retrieval augmented generation (RAG).
+
+Example applications:
+* Retrieving the most relevant documents for a query (for RAG applications).
+* Reranking a list of documents based on their similarity to a query.
+* Calculating the similarity between two sentences.
 
 <Tip>
 
diff --git a/docs/api-inference/tasks/fill-mask.md b/docs/api-inference/tasks/fill-mask.md
index 54b93832e..d25591df6 100644
--- a/docs/api-inference/tasks/fill-mask.md
+++ b/docs/api-inference/tasks/fill-mask.md
@@ -24,8 +24,8 @@ For more details about the `fill-mask` task, check out its [dedicated page](http
 
 ### Recommended models
 
-- [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased): A faster and smaller model than the famous BERT model.
-- [xlm-roberta-base](https://huggingface.co/xlm-roberta-base): A multilingual model trained on 100 languages.
+- [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased): The famous BERT model.
+- [FacebookAI/xlm-roberta-base](https://huggingface.co/FacebookAI/xlm-roberta-base): A multilingual model trained on 100 languages.
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=fill-mask&sort=trending).
 
@@ -36,7 +36,7 @@ This is only a subset of the supported models. Find the model that suits you bes
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/distilbert-base-uncased \
+curl https://api-inference.huggingface.co/models/google-bert/bert-base-uncased \
 	-X POST \
 	-d '{"inputs": "The answer to the universe is [MASK]."}' \
 	-H 'Content-Type: application/json' \
@@ -49,7 +49,7 @@ curl https://api-inference.huggingface.co/models/distilbert-base-uncased \
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/distilbert-base-uncased"
+API_URL = "https://api-inference.huggingface.co/models/google-bert/bert-base-uncased"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -68,7 +68,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/distilbert-base-uncased",
+		"https://api-inference.huggingface.co/models/google-bert/bert-base-uncased",
 		{
 			headers: {
 				Authorization: "Bearer hf_***"
diff --git a/docs/api-inference/tasks/image-classification.md b/docs/api-inference/tasks/image-classification.md
index c906acd75..53f5f734f 100644
--- a/docs/api-inference/tasks/image-classification.md
+++ b/docs/api-inference/tasks/image-classification.md
@@ -14,7 +14,7 @@ For more details, check out:
 
 ## Image Classification
 
-Image classification is the task of assigning a label or class to an entire image. Images are expected to have only one class for each image. Image classification models take an image as input and return a prediction about which class the image belongs to.
+Image classification is the task of assigning a label or class to an entire image. Images are expected to have only one class for each image.
 
 <Tip>
 
diff --git a/docs/api-inference/tasks/image-segmentation.md b/docs/api-inference/tasks/image-segmentation.md
index d5956506b..367e4b397 100644
--- a/docs/api-inference/tasks/image-segmentation.md
+++ b/docs/api-inference/tasks/image-segmentation.md
@@ -14,7 +14,7 @@ For more details, check out:
 
 ## Image Segmentation
 
-Image Segmentation divides an image into segments where each pixel in the image is mapped to an object. This task has multiple variants such as instance segmentation, panoptic segmentation and semantic segmentation.
+Image Segmentation divides an image into segments where each pixel in the image is mapped to an object.
 
 <Tip>
 
@@ -24,7 +24,6 @@ For more details about the `image-segmentation` task, check out its [dedicated p
 
 ### Recommended models
 
-- [facebook/detr-resnet-50-panoptic](https://huggingface.co/facebook/detr-resnet-50-panoptic): Solid panoptic segmentation model trained on the COCO 2017 benchmark dataset.
 - [nvidia/segformer-b0-finetuned-ade-512-512](https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512): Semantic segmentation model trained on ADE20k benchmark dataset with 512x512 resolution.
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-segmentation&sort=trending).
@@ -36,7 +35,7 @@ This is only a subset of the supported models. Find the model that suits you bes
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/facebook/detr-resnet-50-panoptic \
+curl https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512 \
 	-X POST \
 	--data-binary '@cats.jpg' \
 	-H "Authorization: Bearer hf_***"
@@ -48,7 +47,7 @@ curl https://api-inference.huggingface.co/models/facebook/detr-resnet-50-panopti
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/facebook/detr-resnet-50-panoptic"
+API_URL = "https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(filename):
@@ -68,7 +67,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 async function query(filename) {
 	const data = fs.readFileSync(filename);
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/facebook/detr-resnet-50-panoptic",
+		"https://api-inference.huggingface.co/models/nvidia/segformer-b0-finetuned-ade-512-512",
 		{
 			headers: {
 				Authorization: "Bearer hf_***"
diff --git a/docs/api-inference/tasks/image-to-image.md b/docs/api-inference/tasks/image-to-image.md
index 9605c0a64..7b5cfaad4 100644
--- a/docs/api-inference/tasks/image-to-image.md
+++ b/docs/api-inference/tasks/image-to-image.md
@@ -15,13 +15,11 @@ For more details, check out:
 ## Image to Image
 
 Image-to-image is the task of transforming a source image to match the characteristics of a target image or a target image domain.
-Any image manipulation and enhancement is possible with image to image models.
 
-Use cases heavily depend on the model and the dataset it was trained on, but some common use cases include:
-- Style transfer
-- Image colorization
-- Image super-resolution
-- Image inpainting
+Example applications:
+* Transferring the style of an image to another image
+* Colorizing a black and white image
+* Increasing the resolution of an image
 
 <Tip>
 
diff --git a/docs/api-inference/tasks/object-detection.md b/docs/api-inference/tasks/object-detection.md
index c3ba578f1..fc8d989c1 100644
--- a/docs/api-inference/tasks/object-detection.md
+++ b/docs/api-inference/tasks/object-detection.md
@@ -14,7 +14,7 @@ For more details, check out:
 
 ## Object detection
 
-Object Detection models allow users to identify objects of certain defined classes. Object detection models receive an image as input and output the images with bounding boxes and labels on detected objects.
+Object Detection models allow users to identify objects of certain defined classes. These models receive an image as input and output the images with bounding boxes and labels on detected objects.
 
 <Tip>
 
diff --git a/docs/api-inference/tasks/question-answering.md b/docs/api-inference/tasks/question-answering.md
index 0a62e9a33..73ccfa13b 100644
--- a/docs/api-inference/tasks/question-answering.md
+++ b/docs/api-inference/tasks/question-answering.md
@@ -25,6 +25,7 @@ For more details about the `question-answering` task, check out its [dedicated p
 ### Recommended models
 
 - [deepset/roberta-base-squad2](https://huggingface.co/deepset/roberta-base-squad2): A robust baseline model for most question answering domains.
+- [distilbert/distilbert-base-cased-distilled-squad](https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad): Small yet robust model that can answer questions.
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=question-answering&sort=trending).
 
diff --git a/docs/api-inference/tasks/text-classification.md b/docs/api-inference/tasks/text-classification.md
index bf932c4f3..2ddea6833 100644
--- a/docs/api-inference/tasks/text-classification.md
+++ b/docs/api-inference/tasks/text-classification.md
@@ -24,7 +24,11 @@ For more details about the `text-classification` task, check out its [dedicated
 
 ### Recommended models
 
-- [distilbert-base-uncased-finetuned-sst-2-english](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english): A robust model trained for sentiment analysis.
+- [distilbert/distilbert-base-uncased-finetuned-sst-2-english](https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english): A robust model trained for sentiment analysis.
+- [ProsusAI/finbert](https://huggingface.co/ProsusAI/finbert): A sentiment analysis model specialized in financial sentiment.
+- [cardiffnlp/twitter-roberta-base-sentiment-latest](https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest): A sentiment analysis model specialized in analyzing tweets.
+- [papluca/xlm-roberta-base-language-detection](https://huggingface.co/papluca/xlm-roberta-base-language-detection): A model that can classify languages.
+- [meta-llama/Prompt-Guard-86M](https://huggingface.co/meta-llama/Prompt-Guard-86M): A model that can classify text generation attacks.
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-classification&sort=trending).
 
@@ -35,7 +39,7 @@ This is only a subset of the supported models. Find the model that suits you bes
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/distilbert-base-uncased-finetuned-sst-2-english \
+curl https://api-inference.huggingface.co/models/distilbert/distilbert-base-uncased-finetuned-sst-2-english \
 	-X POST \
 	-d '{"inputs": "I like you. I love you"}' \
 	-H 'Content-Type: application/json' \
@@ -48,7 +52,7 @@ curl https://api-inference.huggingface.co/models/distilbert-base-uncased-finetun
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/distilbert-base-uncased-finetuned-sst-2-english"
+API_URL = "https://api-inference.huggingface.co/models/distilbert/distilbert-base-uncased-finetuned-sst-2-english"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -67,7 +71,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/distilbert-base-uncased-finetuned-sst-2-english",
+		"https://api-inference.huggingface.co/models/distilbert/distilbert-base-uncased-finetuned-sst-2-english",
 		{
 			headers: {
 				Authorization: "Bearer hf_***"
diff --git a/docs/api-inference/tasks/text-to-image.md b/docs/api-inference/tasks/text-to-image.md
index 77d525d41..ec719cba0 100644
--- a/docs/api-inference/tasks/text-to-image.md
+++ b/docs/api-inference/tasks/text-to-image.md
@@ -26,7 +26,6 @@ For more details about the `text-to-image` task, check out its [dedicated page](
 
 - [black-forest-labs/FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev): One of the most powerful image generation models that can generate realistic outputs.
 - [latent-consistency/lcm-lora-sdxl](https://huggingface.co/latent-consistency/lcm-lora-sdxl): A powerful yet fast image generation model.
-- [Kwai-Kolors/Kolors](https://huggingface.co/Kwai-Kolors/Kolors): Text-to-image model for photorealistic generation.
 - [stabilityai/stable-diffusion-3-medium-diffusers](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers): A powerful text-to-image model.
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-to-image&sort=trending).
diff --git a/docs/api-inference/tasks/token-classification.md b/docs/api-inference/tasks/token-classification.md
index 9da5edcb2..035582250 100644
--- a/docs/api-inference/tasks/token-classification.md
+++ b/docs/api-inference/tasks/token-classification.md
@@ -25,6 +25,8 @@ For more details about the `token-classification` task, check out its [dedicated
 ### Recommended models
 
 - [dslim/bert-base-NER](https://huggingface.co/dslim/bert-base-NER): A robust performance model to identify people, locations, organizations and names of miscellaneous entities.
+- [FacebookAI/xlm-roberta-large-finetuned-conll03-english](https://huggingface.co/FacebookAI/xlm-roberta-large-finetuned-conll03-english): A strong model to identify people, locations, organizations and names in multiple languages.
+- [blaze999/Medical-NER](https://huggingface.co/blaze999/Medical-NER): A token classification model specialized on medical entity recognition.
 - [flair/ner-english](https://huggingface.co/flair/ner-english): Flair models are typically the state of the art in named entity recognition tasks.
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=token-classification&sort=trending).
diff --git a/docs/api-inference/tasks/translation.md b/docs/api-inference/tasks/translation.md
index 1b6284abe..908aa972e 100644
--- a/docs/api-inference/tasks/translation.md
+++ b/docs/api-inference/tasks/translation.md
@@ -24,8 +24,7 @@ For more details about the `translation` task, check out its [dedicated page](ht
 
 ### Recommended models
 
-- [Helsinki-NLP/opus-mt-en-fr](https://huggingface.co/Helsinki-NLP/opus-mt-en-fr): A model that translates from English to French.
-- [t5-base](https://huggingface.co/t5-base): A general-purpose Transformer that can be used to translate from English to German, French, or Romanian.
+- [google-t5/t5-base](https://huggingface.co/google-t5/t5-base): A general-purpose Transformer that can be used to translate from English to German, French, or Romanian.
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=translation&sort=trending).
 
@@ -36,7 +35,7 @@ This is only a subset of the supported models. Find the model that suits you bes
 
 <curl>
 ```bash
-curl https://api-inference.huggingface.co/models/Helsinki-NLP/opus-mt-en-fr \
+curl https://api-inference.huggingface.co/models/google-t5/t5-base \
 	-X POST \
 	-d '{"inputs": "Меня зовут Вольфганг и я живу в Берлине"}' \
 	-H 'Content-Type: application/json' \
@@ -49,7 +48,7 @@ curl https://api-inference.huggingface.co/models/Helsinki-NLP/opus-mt-en-fr \
 ```py
 import requests
 
-API_URL = "https://api-inference.huggingface.co/models/Helsinki-NLP/opus-mt-en-fr"
+API_URL = "https://api-inference.huggingface.co/models/google-t5/t5-base"
 headers = {"Authorization": "Bearer hf_***"}
 
 def query(payload):
@@ -68,7 +67,7 @@ To use the Python client, see `huggingface_hub`'s [package reference](https://hu
 ```js
 async function query(data) {
 	const response = await fetch(
-		"https://api-inference.huggingface.co/models/Helsinki-NLP/opus-mt-en-fr",
+		"https://api-inference.huggingface.co/models/google-t5/t5-base",
 		{
 			headers: {
 				Authorization: "Bearer hf_***"
diff --git a/docs/api-inference/tasks/zero-shot-classification.md b/docs/api-inference/tasks/zero-shot-classification.md
index 89b505be1..7ccf024aa 100644
--- a/docs/api-inference/tasks/zero-shot-classification.md
+++ b/docs/api-inference/tasks/zero-shot-classification.md
@@ -24,7 +24,8 @@ For more details about the `zero-shot-classification` task, check out its [dedic
 
 ### Recommended models
 
-- [facebook/bart-large-mnli](https://huggingface.co/facebook/bart-large-mnli): Powerful zero-shot text classification model
+- [facebook/bart-large-mnli](https://huggingface.co/facebook/bart-large-mnli): Powerful zero-shot text classification model.
+- [MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7](https://huggingface.co/MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7): Powerful zero-shot multilingual text classification model that can accomplish multiple tasks.
 
 This is only a subset of the supported models. Find the model that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=zero-shot-classification&sort=trending).
 
diff --git a/scripts/api-inference/templates/task/audio-classification.handlebars b/scripts/api-inference/templates/task/audio-classification.handlebars
index 9567f39ca..8530b7de2 100644
--- a/scripts/api-inference/templates/task/audio-classification.handlebars
+++ b/scripts/api-inference/templates/task/audio-classification.handlebars
@@ -1,6 +1,11 @@
 ## Audio Classification
 
-Audio classification is the task of assigning a label or class to a given audio. It can be used for recognizing which command a user is giving or the emotion of a statement, as well as identifying a speaker.
+Audio classification is the task of assigning a label or class to a given audio.
+
+Example applications:
+* Recognizing which command a user is giving
+* Identifying a speaker
+* Detecting the genre of a song
 
 {{{tips.linksToTaskPage.audio-classification}}}
 
diff --git a/scripts/api-inference/templates/task/automatic-speech-recognition.handlebars b/scripts/api-inference/templates/task/automatic-speech-recognition.handlebars
index 8e200fd2a..fc81651df 100644
--- a/scripts/api-inference/templates/task/automatic-speech-recognition.handlebars
+++ b/scripts/api-inference/templates/task/automatic-speech-recognition.handlebars
@@ -1,6 +1,12 @@
 ## Automatic Speech Recognition
 
-Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.
+Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text.
+
+Example applications:
+* Transcribing a podcast
+* Building a voice assistant
+* Generating subtitles for a video
+
 {{{tips.linksToTaskPage.automatic-speech-recognition}}}
 
 ### Recommended models
diff --git a/scripts/api-inference/templates/task/chat-completion.handlebars b/scripts/api-inference/templates/task/chat-completion.handlebars
index fd2d189fa..31acb2d21 100644
--- a/scripts/api-inference/templates/task/chat-completion.handlebars
+++ b/scripts/api-inference/templates/task/chat-completion.handlebars
@@ -15,6 +15,12 @@ This is a subtask of [`text-generation`](./text_generation) designed to generate
 
 ### Using the API
 
+The API supports:
+
+* Using the chat completion API compatible with the OpenAI SDK.
+* Using grammars, constraints, and tools.
+* Streaming the output
+
 {{{snippets.chat-completion}}}
 
 ### API specification
diff --git a/scripts/api-inference/templates/task/feature-extraction.handlebars b/scripts/api-inference/templates/task/feature-extraction.handlebars
index adc28262d..0b7b9748f 100644
--- a/scripts/api-inference/templates/task/feature-extraction.handlebars
+++ b/scripts/api-inference/templates/task/feature-extraction.handlebars
@@ -1,7 +1,11 @@
 ## Feature Extraction
 
 Feature extraction is the task of converting a text into a vector (often called "embedding").
-Extracting features is useful for subtasks like sentence similarity, reranking and retrieval augmented generation (RAG).
+
+Example applications:
+* Retrieving the most relevant documents for a query (for RAG applications).
+* Reranking a list of documents based on their similarity to a query.
+* Calculating the similarity between two sentences.
 
 {{{tips.linksToTaskPage.feature-extraction}}}
 
diff --git a/scripts/api-inference/templates/task/image-classification.handlebars b/scripts/api-inference/templates/task/image-classification.handlebars
index 88461e7be..96a6ff49a 100644
--- a/scripts/api-inference/templates/task/image-classification.handlebars
+++ b/scripts/api-inference/templates/task/image-classification.handlebars
@@ -1,6 +1,6 @@
 ## Image Classification
 
-Image classification is the task of assigning a label or class to an entire image. Images are expected to have only one class for each image. Image classification models take an image as input and return a prediction about which class the image belongs to.
+Image classification is the task of assigning a label or class to an entire image. Images are expected to have only one class for each image.
 
 {{{tips.linksToTaskPage.image-classification}}}
 
diff --git a/scripts/api-inference/templates/task/image-segmentation.handlebars b/scripts/api-inference/templates/task/image-segmentation.handlebars
index e4cec3a01..11ea77f47 100644
--- a/scripts/api-inference/templates/task/image-segmentation.handlebars
+++ b/scripts/api-inference/templates/task/image-segmentation.handlebars
@@ -1,6 +1,6 @@
 ## Image Segmentation
 
-Image Segmentation divides an image into segments where each pixel in the image is mapped to an object. This task has multiple variants such as instance segmentation, panoptic segmentation and semantic segmentation.
+Image Segmentation divides an image into segments where each pixel in the image is mapped to an object.
 
 {{{tips.linksToTaskPage.image-segmentation}}}
 
diff --git a/scripts/api-inference/templates/task/image-to-image.handlebars b/scripts/api-inference/templates/task/image-to-image.handlebars
index 93d5f6f00..ba21bf4fe 100644
--- a/scripts/api-inference/templates/task/image-to-image.handlebars
+++ b/scripts/api-inference/templates/task/image-to-image.handlebars
@@ -1,13 +1,11 @@
 ## Image to Image
 
 Image-to-image is the task of transforming a source image to match the characteristics of a target image or a target image domain.
-Any image manipulation and enhancement is possible with image to image models.
 
-Use cases heavily depend on the model and the dataset it was trained on, but some common use cases include:
-- Style transfer
-- Image colorization
-- Image super-resolution
-- Image inpainting
+Example applications:
+* Transferring the style of an image to another image
+* Colorizing a black and white image
+* Increasing the resolution of an image
 
 {{{tips.linksToTaskPage.image-to-image}}}
 
diff --git a/scripts/api-inference/templates/task/object-detection.handlebars b/scripts/api-inference/templates/task/object-detection.handlebars
index f3b4e085b..3892e34a3 100644
--- a/scripts/api-inference/templates/task/object-detection.handlebars
+++ b/scripts/api-inference/templates/task/object-detection.handlebars
@@ -1,6 +1,6 @@
 ## Object detection
 
-Object Detection models allow users to identify objects of certain defined classes. Object detection models receive an image as input and output the images with bounding boxes and labels on detected objects.
+Object Detection models allow users to identify objects of certain defined classes. These models receive an image as input and output the images with bounding boxes and labels on detected objects.
 
 {{{tips.linksToTaskPage.object-detection}}}