diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 28a657d82..d7815aada 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -41,6 +41,10 @@ jobs: run: VCR_MODE=playback pnpm --filter ...[${{ steps.since.outputs.SINCE }}] test env: HF_TOKEN: ${{ secrets.HF_TOKEN }} + HF_FAL_KEY: dummy + HF_REPLICATE_KEY: dummy + HF_SAMBANOVA_KEY: dummy + HF_TOGETHER_KEY: dummy browser: runs-on: ubuntu-latest @@ -77,6 +81,10 @@ jobs: run: VCR_MODE=playback pnpm --filter ...[${{ steps.since.outputs.SINCE }}] test:browser env: HF_TOKEN: ${{ secrets.HF_TOKEN }} + HF_FAL_KEY: dummy + HF_REPLICATE_KEY: dummy + HF_SAMBANOVA_KEY: dummy + HF_TOGETHER_KEY: dummy e2e: runs-on: ubuntu-latest @@ -140,3 +148,7 @@ jobs: env: NPM_CONFIG_REGISTRY: http://localhost:4874/ HF_TOKEN: ${{ secrets.HF_TOKEN }} + HF_FAL_KEY: dummy + HF_REPLICATE_KEY: dummy + HF_SAMBANOVA_KEY: dummy + HF_TOGETHER_KEY: dummy diff --git a/README.md b/README.md index 838f80045..1774b8de5 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ // Programatically interact with the Hub await createRepo({ - repo: {type: "model", name: "my-user/nlp-model"}, + repo: { type: "model", name: "my-user/nlp-model" }, accessToken: HF_TOKEN }); @@ -53,11 +53,13 @@ await inference.textToImage({ This is a collection of JS libraries to interact with the Hugging Face API, with TS types included. -- [@huggingface/inference](packages/inference/README.md): Use Inference Endpoints (dedicated) and Inference API (serverless) to make calls to 100,000+ Machine Learning models +- [@huggingface/inference](packages/inference/README.md): Use Inference API (serverless) and Inference Endpoints (dedicated) to make calls to 100,000+ Machine Learning models - [@huggingface/hub](packages/hub/README.md): Interact with huggingface.co to create or delete repos and commit / download files - [@huggingface/agents](packages/agents/README.md): Interact with HF models through a natural language interface - [@huggingface/gguf](packages/gguf/README.md): A GGUF parser that works on remotely hosted files. +- [@huggingface/dduf](packages/dduf/README.md): Similar package for DDUF (DDUF Diffusers Unified Format) - [@huggingface/tasks](packages/tasks/README.md): The definition files and source-of-truth for the Hub's main primitives like pipeline tasks, model libraries, etc. +- [@huggingface/jinja](packages/jinja/README.md): A minimalistic JS implementation of the Jinja templating engine, to be used for ML chat templates. - [@huggingface/space-header](packages/space-header/README.md): Use the Space `mini_header` outside Hugging Face @@ -165,7 +167,7 @@ await inference.imageToText({ const gpt2 = inference.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2'); const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the universe is'}); -//Chat Completion +// Chat Completion const llamaEndpoint = inference.endpoint( "https://api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct" ); @@ -185,7 +187,7 @@ import { createRepo, uploadFile, deleteFiles } from "@huggingface/hub"; const HF_TOKEN = "hf_..."; await createRepo({ - repo: "my-user/nlp-model", // or {type: "model", name: "my-user/nlp-test"}, + repo: "my-user/nlp-model", // or { type: "model", name: "my-user/nlp-test" }, accessToken: HF_TOKEN }); @@ -200,7 +202,7 @@ await uploadFile({ }); await deleteFiles({ - repo: {type: "space", name: "my-user/my-space"}, // or "spaces/my-user/my-space" + repo: { type: "space", name: "my-user/my-space" }, // or "spaces/my-user/my-space" accessToken: HF_TOKEN, paths: ["README.md", ".gitattributes"] }); @@ -209,7 +211,7 @@ await deleteFiles({ ### @huggingface/agents example ```ts -import {HfAgent, LLMFromHub, defaultTools} from '@huggingface/agents'; +import { HfAgent, LLMFromHub, defaultTools } from '@huggingface/agents'; const HF_TOKEN = "hf_..."; diff --git a/packages/inference/LICENSE b/packages/inference/LICENSE index 7b1576042..7a6064e54 100644 --- a/packages/inference/LICENSE +++ b/packages/inference/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022 Tim Mikeladze +Copyright (c) 2022 Tim Mikeladze and the Hugging Face team Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/packages/inference/README.md b/packages/inference/README.md index 11939338b..9c20c418b 100644 --- a/packages/inference/README.md +++ b/packages/inference/README.md @@ -1,7 +1,7 @@ # 🤗 Hugging Face Inference Endpoints -A Typescript powered wrapper for the Hugging Face Inference Endpoints API. Learn more about Inference Endpoints at [Hugging Face](https://huggingface.co/inference-endpoints). -It works with both [Inference API (serverless)](https://huggingface.co/docs/api-inference/index) and [Inference Endpoints (dedicated)](https://huggingface.co/docs/inference-endpoints/index). +A Typescript powered wrapper for the Hugging Face Inference API (serverless), Inference Endpoints (dedicated), and third-party Inference Providers. +It works with [Inference API (serverless)](https://huggingface.co/docs/api-inference/index) and [Inference Endpoints (dedicated)](https://huggingface.co/docs/inference-endpoints/index), and even with supported third-party Inference Providers. Check out the [full documentation](https://huggingface.co/docs/huggingface.js/inference/README). diff --git a/packages/inference/src/lib/makeRequestOptions.ts b/packages/inference/src/lib/makeRequestOptions.ts index 50eff4943..0f79b0db0 100644 --- a/packages/inference/src/lib/makeRequestOptions.ts +++ b/packages/inference/src/lib/makeRequestOptions.ts @@ -1,4 +1,8 @@ -import type { InferenceTask, Options, RequestArgs } from "../types"; +import { FAL_AI_API_BASE_URL, FAL_AI_MODEL_IDS } from "../providers/fal-ai"; +import { REPLICATE_API_BASE_URL, REPLICATE_MODEL_IDS } from "../providers/replicate"; +import { SAMBANOVA_API_BASE_URL, SAMBANOVA_MODEL_IDS } from "../providers/sambanova"; +import { TOGETHER_API_BASE_URL, TOGETHER_MODEL_IDS } from "../providers/together"; +import { INFERENCE_PROVIDERS, type InferenceTask, type Options, type RequestArgs } from "../types"; import { omit } from "../utils/omit"; import { HF_HUB_URL } from "./getDefaultTask"; import { isUrl } from "./isUrl"; @@ -6,7 +10,8 @@ import { isUrl } from "./isUrl"; const HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co"; /** - * Loaded from huggingface.co/api/tasks if needed + * Lazy-loaded from huggingface.co/api/tasks when needed + * Used to determine the default model to use when it's not user defined */ let tasks: Record | null = null; @@ -26,21 +31,14 @@ export async function makeRequestOptions( chatCompletion?: boolean; } ): Promise<{ url: string; info: RequestInit }> { - const { accessToken, endpointUrl, ...otherArgs } = args; + const { accessToken, endpointUrl, provider, ...otherArgs } = args; let { model } = args; - const { - forceTask: task, - includeCredentials, - taskHint, - wait_for_model, - use_cache, - dont_load_model, - chatCompletion, - } = options ?? {}; + const { forceTask, includeCredentials, taskHint, wait_for_model, use_cache, dont_load_model, chatCompletion } = + options ?? {}; const headers: Record = {}; if (accessToken) { - headers["Authorization"] = `Bearer ${accessToken}`; + headers["Authorization"] = provider === "fal-ai" ? `Key ${accessToken}` : `Bearer ${accessToken}`; } if (!model && !tasks && taskHint) { @@ -61,6 +59,35 @@ export async function makeRequestOptions( if (!model) { throw new Error("No model provided, and no default model found for this task"); } + if (provider) { + if (!INFERENCE_PROVIDERS.includes(provider)) { + throw new Error("Unknown Inference provider"); + } + if (!accessToken) { + throw new Error("Specifying an Inference provider requires an accessToken"); + } + + const modelId = (() => { + switch (provider) { + case "replicate": + return REPLICATE_MODEL_IDS[model]; + case "sambanova": + return SAMBANOVA_MODEL_IDS[model]; + case "together": + return TOGETHER_MODEL_IDS[model]?.id; + case "fal-ai": + return FAL_AI_MODEL_IDS[model]; + default: + return model; + } + })(); + + if (!modelId) { + throw new Error(`Model ${model} is not supported for provider ${provider}`); + } + + model = modelId; + } const binary = "data" in args && !!args.data; @@ -77,6 +104,9 @@ export async function makeRequestOptions( if (dont_load_model) { headers["X-Load-Model"] = "0"; } + if (provider === "replicate") { + headers["Prefer"] = "wait"; + } let url = (() => { if (endpointUrl && isUrl(model)) { @@ -89,8 +119,33 @@ export async function makeRequestOptions( if (endpointUrl) { return endpointUrl; } - if (task) { - return `${HF_INFERENCE_API_BASE_URL}/pipeline/${task}/${model}`; + if (forceTask) { + return `${HF_INFERENCE_API_BASE_URL}/pipeline/${forceTask}/${model}`; + } + if (provider) { + if (!accessToken) { + throw new Error("Specifying an Inference provider requires an accessToken"); + } + if (accessToken.startsWith("hf_")) { + /// TODO we wil proxy the request server-side (using our own keys) and handle billing for it on the user's HF account. + throw new Error("Inference proxying is not implemented yet"); + } else { + switch (provider) { + case "fal-ai": + return `${FAL_AI_API_BASE_URL}/${model}`; + case "replicate": + return `${REPLICATE_API_BASE_URL}/v1/models/${model}/predictions`; + case "sambanova": + return SAMBANOVA_API_BASE_URL; + case "together": + if (taskHint === "text-to-image") { + return `${TOGETHER_API_BASE_URL}/v1/images/generations`; + } + return TOGETHER_API_BASE_URL; + default: + break; + } + } } return `${HF_INFERENCE_API_BASE_URL}/models/${model}`; @@ -99,6 +154,9 @@ export async function makeRequestOptions( if (chatCompletion && !url.endsWith("/chat/completions")) { url += "/v1/chat/completions"; } + if (provider === "together" && taskHint === "text-generation" && !chatCompletion) { + url += "/v1/completions"; + } /** * For edge runtimes, leave 'credentials' undefined, otherwise cloudflare workers will error @@ -116,9 +174,11 @@ export async function makeRequestOptions( body: binary ? args.data : JSON.stringify({ - ...(otherArgs.model && isUrl(otherArgs.model) ? omit(otherArgs, "model") : otherArgs), + ...((otherArgs.model && isUrl(otherArgs.model)) || provider === "replicate" || provider === "fal-ai" + ? omit(otherArgs, "model") + : { ...otherArgs, model }), }), - ...(credentials && { credentials }), + ...(credentials ? { credentials } : undefined), signal: options?.signal, }; diff --git a/packages/inference/src/providers/fal-ai.ts b/packages/inference/src/providers/fal-ai.ts new file mode 100644 index 000000000..2639bf5d1 --- /dev/null +++ b/packages/inference/src/providers/fal-ai.ts @@ -0,0 +1,17 @@ +import type { ModelId } from "../types"; + +export const FAL_AI_API_BASE_URL = "https://fal.run"; + +type FalAiId = string; + +/** + * Mapping from HF model ID -> fal.ai app id + */ +export const FAL_AI_MODEL_IDS: Record = { + /** text-to-image */ + "black-forest-labs/FLUX.1-schnell": "fal-ai/flux/schnell", + "black-forest-labs/FLUX.1-dev": "fal-ai/flux/dev", + + /** automatic-speech-recognition */ + "openai/whisper-large-v3": "fal-ai/whisper", +}; diff --git a/packages/inference/src/providers/replicate.ts b/packages/inference/src/providers/replicate.ts new file mode 100644 index 000000000..9b19dad46 --- /dev/null +++ b/packages/inference/src/providers/replicate.ts @@ -0,0 +1,21 @@ +import type { ModelId } from "../types"; + +export const REPLICATE_API_BASE_URL = "https://api.replicate.com"; + +type ReplicateId = string; + +/** + * Mapping from HF model ID -> Replicate model ID + * + * Available models can be fetched with: + * ``` + * curl -s \ + * -H "Authorization: Bearer $REPLICATE_API_TOKEN" \ + * 'https://api.replicate.com/v1/models' + * ``` + */ +export const REPLICATE_MODEL_IDS: Record = { + /** text-to-image */ + "black-forest-labs/FLUX.1-schnell": "black-forest-labs/flux-schnell", + "ByteDance/SDXL-Lightning": "bytedance/sdxl-lightning-4step", +}; diff --git a/packages/inference/src/providers/sambanova.ts b/packages/inference/src/providers/sambanova.ts new file mode 100644 index 000000000..745f7b51f --- /dev/null +++ b/packages/inference/src/providers/sambanova.ts @@ -0,0 +1,32 @@ +import type { ModelId } from "../types"; + +export const SAMBANOVA_API_BASE_URL = "https://api.sambanova.ai"; + +/** + * Note for reviewers: our goal would be to ask Sambanova to support + * our model ids too, so we don't have to define a mapping + * or keep it up-to-date. + * + * As a fallback, if the above is not possible, ask Sambanova to + * provide the mapping as an fetchable API. + */ +type SambanovaId = string; + +/** + * https://community.sambanova.ai/t/supported-models/193 + */ +export const SAMBANOVA_MODEL_IDS: Record = { + /** Chat completion / conversational */ + "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen2.5-Coder-32B-Instruct", + "Qwen/Qwen2.5-72B-Instruct": "Qwen2.5-72B-Instruct", + "Qwen/QwQ-32B-Preview": "QwQ-32B-Preview", + "meta-llama/Llama-3.3-70B-Instruct": "Meta-Llama-3.3-70B-Instruct", + "meta-llama/Llama-3.2-1B": "Meta-Llama-3.2-1B-Instruct", + "meta-llama/Llama-3.2-3B": "Meta-Llama-3.2-3B-Instruct", + "meta-llama/Llama-3.2-11B-Vision-Instruct": "Llama-3.2-11B-Vision-Instruct", + "meta-llama/Llama-3.2-90B-Vision-Instruct": "Llama-3.2-90B-Vision-Instruct", + "meta-llama/Llama-3.1-8B-Instruct": "Meta-Llama-3.1-8B-Instruct", + "meta-llama/Llama-3.1-70B-Instruct": "Meta-Llama-3.1-70B-Instruct", + "meta-llama/Llama-3.1-405B-Instruct": "Meta-Llama-3.1-405B-Instruct", + "meta-llama/Llama-Guard-3-8B": "Meta-Llama-Guard-3-8B", +}; diff --git a/packages/inference/src/providers/together.ts b/packages/inference/src/providers/together.ts new file mode 100644 index 000000000..799b98637 --- /dev/null +++ b/packages/inference/src/providers/together.ts @@ -0,0 +1,60 @@ +import type { ModelId } from "../types"; + +export const TOGETHER_API_BASE_URL = "https://api.together.xyz"; + +/** + * Same comment as in sambanova.ts + */ +type TogetherId = string; + +/** + * https://docs.together.ai/reference/models-1 + */ +export const TOGETHER_MODEL_IDS: Record< + ModelId, + { id: TogetherId; type: "chat" | "embedding" | "image" | "language" | "moderation" } +> = { + /** text-to-image */ + "black-forest-labs/FLUX.1-Canny-dev": { id: "black-forest-labs/FLUX.1-canny", type: "image" }, + "black-forest-labs/FLUX.1-Depth-dev": { id: "black-forest-labs/FLUX.1-depth", type: "image" }, + "black-forest-labs/FLUX.1-dev": { id: "black-forest-labs/FLUX.1-dev", type: "image" }, + "black-forest-labs/FLUX.1-Redux-dev": { id: "black-forest-labs/FLUX.1-redux", type: "image" }, + "black-forest-labs/FLUX.1-schnell": { id: "black-forest-labs/FLUX.1-pro", type: "image" }, + "stabilityai/stable-diffusion-xl-base-1.0": { id: "stabilityai/stable-diffusion-xl-base-1.0", type: "image" }, + + /** chat completion */ + "databricks/dbrx-instruct": { id: "databricks/dbrx-instruct", type: "chat" }, + "deepseek-ai/deepseek-llm-67b-chat": { id: "deepseek-ai/deepseek-llm-67b-chat", type: "chat" }, + "google/gemma-2-9b-it": { id: "google/gemma-2-9b-it", type: "chat" }, + "google/gemma-2b-it": { id: "google/gemma-2-27b-it", type: "chat" }, + "llava-hf/llava-v1.6-mistral-7b-hf": { id: "llava-hf/llava-v1.6-mistral-7b-hf", type: "chat" }, + "meta-llama/Llama-2-13b-chat-hf": { id: "meta-llama/Llama-2-13b-chat-hf", type: "chat" }, + "meta-llama/Llama-2-70b-hf": { id: "meta-llama/Llama-2-70b-hf", type: "language" }, + "meta-llama/Llama-2-7b-chat-hf": { id: "meta-llama/Llama-2-7b-chat-hf", type: "chat" }, + "meta-llama/Llama-3.2-11B-Vision-Instruct": { id: "meta-llama/Llama-Vision-Free", type: "chat" }, + "meta-llama/Llama-3.2-3B-Instruct": { id: "meta-llama/Llama-3.2-3B-Instruct-Turbo", type: "chat" }, + "meta-llama/Llama-3.2-90B-Vision-Instruct": { id: "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo", type: "chat" }, + "meta-llama/Llama-3.3-70B-Instruct": { id: "meta-llama/Llama-3.3-70B-Instruct-Turbo", type: "chat" }, + "meta-llama/Meta-Llama-3-70B-Instruct": { id: "meta-llama/Llama-3-70b-chat-hf", type: "chat" }, + "meta-llama/Meta-Llama-3-8B-Instruct": { id: "togethercomputer/Llama-3-8b-chat-hf-int4", type: "chat" }, + "meta-llama/Meta-Llama-3.1-405B-Instruct": { id: "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo", type: "chat" }, + "meta-llama/Meta-Llama-3.1-70B-Instruct": { id: "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", type: "chat" }, + "meta-llama/Meta-Llama-3.1-8B-Instruct": { id: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-128K", type: "chat" }, + "microsoft/WizardLM-2-8x22B": { id: "microsoft/WizardLM-2-8x22B", type: "chat" }, + "mistralai/Mistral-7B-Instruct-v0.3": { id: "mistralai/Mistral-7B-Instruct-v0.3", type: "chat" }, + "mistralai/Mixtral-8x22B-Instruct-v0.1": { id: "mistralai/Mixtral-8x22B-Instruct-v0.1", type: "chat" }, + "mistralai/Mixtral-8x7B-Instruct-v0.1": { id: "mistralai/Mixtral-8x7B-Instruct-v0.1", type: "chat" }, + "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": { id: "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", type: "chat" }, + "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": { id: "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", type: "chat" }, + "Qwen/Qwen2-72B-Instruct": { id: "Qwen/Qwen2-72B-Instruct", type: "chat" }, + "Qwen/Qwen2.5-72B-Instruct": { id: "Qwen/Qwen2.5-72B-Instruct-Turbo", type: "chat" }, + "Qwen/Qwen2.5-7B-Instruct": { id: "Qwen/Qwen2.5-7B-Instruct-Turbo", type: "chat" }, + "Qwen/Qwen2.5-Coder-32B-Instruct": { id: "Qwen/Qwen2.5-Coder-32B-Instruct", type: "chat" }, + "Qwen/QwQ-32B-Preview": { id: "Qwen/QwQ-32B-Preview", type: "chat" }, + "scb10x/llama-3-typhoon-v1.5-8b-instruct": { id: "scb10x/scb10x-llama3-typhoon-v1-5-8b-instruct", type: "chat" }, + "scb10x/llama-3-typhoon-v1.5x-70b-instruct-awq": { id: "scb10x/scb10x-llama3-typhoon-v1-5x-4f316", type: "chat" }, + + /** text-generation */ + "meta-llama/Meta-Llama-3-8B": { id: "meta-llama/Meta-Llama-3-8B", type: "language" }, + "mistralai/Mixtral-8x7B-v0.1": { id: "mistralai/Mixtral-8x7B-v0.1", type: "language" }, +}; diff --git a/packages/inference/src/tasks/audio/automaticSpeechRecognition.ts b/packages/inference/src/tasks/audio/automaticSpeechRecognition.ts index 600d5b6c7..c56090c08 100644 --- a/packages/inference/src/tasks/audio/automaticSpeechRecognition.ts +++ b/packages/inference/src/tasks/audio/automaticSpeechRecognition.ts @@ -1,5 +1,6 @@ import { InferenceOutputError } from "../../lib/InferenceOutputError"; -import type { BaseArgs, Options } from "../../types"; +import type { BaseArgs, Options, RequestArgs } from "../../types"; +import { base64FromBytes } from "../../utils/base64FromBytes"; import { request } from "../custom/request"; export type AutomaticSpeechRecognitionArgs = BaseArgs & { @@ -24,6 +25,14 @@ export async function automaticSpeechRecognition( args: AutomaticSpeechRecognitionArgs, options?: Options ): Promise { + if (args.provider === "fal-ai") { + const contentType = args.data instanceof Blob ? args.data.type : "audio/mpeg"; + const base64audio = base64FromBytes( + new Uint8Array(args.data instanceof ArrayBuffer ? args.data : await args.data.arrayBuffer()) + ); + (args as RequestArgs & { audio_url: string }).audio_url = `data:${contentType};base64,${base64audio}`; + delete (args as RequestArgs & { data: unknown }).data; + } const res = await request(args, { ...options, taskHint: "automatic-speech-recognition", diff --git a/packages/inference/src/tasks/custom/request.ts b/packages/inference/src/tasks/custom/request.ts index 99a4beac2..b6838c9d0 100644 --- a/packages/inference/src/tasks/custom/request.ts +++ b/packages/inference/src/tasks/custom/request.ts @@ -2,7 +2,7 @@ import type { InferenceTask, Options, RequestArgs } from "../../types"; import { makeRequestOptions } from "../../lib/makeRequestOptions"; /** - * Primitive to make custom calls to Inference Endpoints + * Primitive to make custom calls to the inference provider */ export async function request( args: RequestArgs, @@ -26,16 +26,22 @@ export async function request( } if (!response.ok) { - if (response.headers.get("Content-Type")?.startsWith("application/json")) { + const contentType = response.headers.get("Content-Type"); + if (["application/json", "application/problem+json"].some((ct) => contentType?.startsWith(ct))) { const output = await response.json(); if ([400, 422, 404, 500].includes(response.status) && options?.chatCompletion) { - throw new Error(`Server ${args.model} does not seem to support chat completion. Error: ${output.error}`); + throw new Error( + `Server ${args.model} does not seem to support chat completion. Error: ${JSON.stringify(output.error)}` + ); } - if (output.error) { - throw new Error(JSON.stringify(output.error)); + if (output.error || output.detail) { + throw new Error(JSON.stringify(output.error ?? output.detail)); + } else { + throw new Error(output); } } - throw new Error("An error occurred while fetching the blob"); + const message = contentType?.startsWith("text/plain;") ? await response.text() : undefined; + throw new Error(message ?? "An error occurred while fetching the blob"); } if (response.headers.get("Content-Type")?.startsWith("application/json")) { diff --git a/packages/inference/src/tasks/custom/streamingRequest.ts b/packages/inference/src/tasks/custom/streamingRequest.ts index 7f4179811..fac10b9e7 100644 --- a/packages/inference/src/tasks/custom/streamingRequest.ts +++ b/packages/inference/src/tasks/custom/streamingRequest.ts @@ -68,7 +68,9 @@ export async function* streamingRequest( try { while (true) { const { done, value } = await reader.read(); - if (done) return; + if (done) { + return; + } onChunk(value); for (const event of events) { if (event.data.length > 0) { diff --git a/packages/inference/src/tasks/cv/textToImage.ts b/packages/inference/src/tasks/cv/textToImage.ts index 677b3bc5c..d8527d653 100644 --- a/packages/inference/src/tasks/cv/textToImage.ts +++ b/packages/inference/src/tasks/cv/textToImage.ts @@ -8,6 +8,15 @@ export type TextToImageArgs = BaseArgs & { */ inputs: string; + /** + * Same param but for external providers like Together, Replicate + */ + prompt?: string; + response_format?: "base64"; + input?: { + prompt: string; + }; + parameters?: { /** * An optional negative prompt for the image generation @@ -34,15 +43,49 @@ export type TextToImageArgs = BaseArgs & { export type TextToImageOutput = Blob; +interface Base64ImageGeneration { + data: Array<{ + b64_json: string; + }>; +} +interface OutputUrlImageGeneration { + output: string[]; +} + /** * This task reads some text input and outputs an image. * Recommended model: stabilityai/stable-diffusion-2 */ export async function textToImage(args: TextToImageArgs, options?: Options): Promise { - const res = await request(args, { + if (args.provider === "together" || args.provider === "fal-ai") { + args.prompt = args.inputs; + args.inputs = ""; + args.response_format = "base64"; + } else if (args.provider === "replicate") { + args.input = { prompt: args.inputs }; + delete (args as unknown as { inputs: unknown }).inputs; + } + const res = await request(args, { ...options, taskHint: "text-to-image", }); + if (res && typeof res === "object") { + if (args.provider === "fal-ai" && "images" in res && Array.isArray(res.images) && res.images[0].url) { + const image = await fetch(res.images[0].url); + return await image.blob(); + } + if ("data" in res && Array.isArray(res.data) && res.data[0].b64_json) { + const base64Data = res.data[0].b64_json; + const base64Response = await fetch(`data:image/jpeg;base64,${base64Data}`); + const blob = await base64Response.blob(); + return blob; + } + if ("output" in res && Array.isArray(res.output)) { + const urlResponse = await fetch(res.output[0]); + const blob = await urlResponse.blob(); + return blob; + } + } const isValidOutput = res && res instanceof Blob; if (!isValidOutput) { throw new InferenceOutputError("Expected Blob"); diff --git a/packages/inference/src/tasks/nlp/chatCompletion.ts b/packages/inference/src/tasks/nlp/chatCompletion.ts index fbc7e0cc1..740362095 100644 --- a/packages/inference/src/tasks/nlp/chatCompletion.ts +++ b/packages/inference/src/tasks/nlp/chatCompletion.ts @@ -6,7 +6,6 @@ import type { ChatCompletionInput, ChatCompletionOutput } from "@huggingface/tas /** * Use the chat completion endpoint to generate a response to a prompt, using OpenAI message completion API no stream */ - export async function chatCompletion( args: BaseArgs & ChatCompletionInput, options?: Options @@ -22,7 +21,8 @@ export async function chatCompletion( typeof res?.created === "number" && typeof res?.id === "string" && typeof res?.model === "string" && - typeof res?.system_fingerprint === "string" && + /// Together.ai does not output a system_fingerprint + (res.system_fingerprint === undefined || typeof res.system_fingerprint === "string") && typeof res?.usage === "object"; if (!isValidOutput) { diff --git a/packages/inference/src/tasks/nlp/textGeneration.ts b/packages/inference/src/tasks/nlp/textGeneration.ts index bda33cee5..7d5906f12 100644 --- a/packages/inference/src/tasks/nlp/textGeneration.ts +++ b/packages/inference/src/tasks/nlp/textGeneration.ts @@ -1,4 +1,9 @@ -import type { TextGenerationInput, TextGenerationOutput } from "@huggingface/tasks"; +import type { + ChatCompletionOutput, + TextGenerationInput, + TextGenerationOutput, + TextGenerationOutputFinishReason, +} from "@huggingface/tasks"; import { InferenceOutputError } from "../../lib/InferenceOutputError"; import type { BaseArgs, Options } from "../../types"; import { toArray } from "../../utils/toArray"; @@ -6,6 +11,16 @@ import { request } from "../custom/request"; export type { TextGenerationInput, TextGenerationOutput }; +interface TogeteherTextCompletionOutput extends Omit { + choices: Array<{ + text: string; + finish_reason: TextGenerationOutputFinishReason; + seed: number; + logprobs: unknown; + index: number; + }>; +} + /** * Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with). */ @@ -13,15 +28,34 @@ export async function textGeneration( args: BaseArgs & TextGenerationInput, options?: Options ): Promise { - const res = toArray( - await request(args, { + if (args.provider === "together") { + args.prompt = args.inputs; + const raw = await request(args, { ...options, taskHint: "text-generation", - }) - ); - const isValidOutput = Array.isArray(res) && res.every((x) => typeof x?.generated_text === "string"); - if (!isValidOutput) { - throw new InferenceOutputError("Expected Array<{generated_text: string}>"); + }); + const isValidOutput = + typeof raw === "object" && "choices" in raw && Array.isArray(raw?.choices) && typeof raw?.model === "string"; + if (!isValidOutput) { + throw new InferenceOutputError("Expected ChatCompletionOutput"); + } + const completion = raw.choices[0]; + return { + generated_text: completion.text, + }; + } else { + const res = toArray( + await request(args, { + ...options, + taskHint: "text-generation", + }) + ); + + const isValidOutput = + Array.isArray(res) && res.every((x) => "generated_text" in x && typeof x?.generated_text === "string"); + if (!isValidOutput) { + throw new InferenceOutputError("Expected Array<{generated_text: string}>"); + } + return (res as TextGenerationOutput[])?.[0]; } - return res?.[0]; } diff --git a/packages/inference/src/types.ts b/packages/inference/src/types.ts index 66490bf2c..3b70538b3 100644 --- a/packages/inference/src/types.ts +++ b/packages/inference/src/types.ts @@ -1,6 +1,11 @@ import type { PipelineType } from "@huggingface/tasks"; import type { ChatCompletionInput } from "@huggingface/tasks"; +/** + * HF model id, like "meta-llama/Llama-3.3-70B-Instruct" + */ +export type ModelId = string; + export interface Options { /** * (Default: true) Boolean. If a request 503s and wait_for_model is set to false, the request will be retried with the same parameters but with wait_for_model set to true. @@ -40,22 +45,28 @@ export interface Options { export type InferenceTask = Exclude; +export const INFERENCE_PROVIDERS = ["fal-ai", "replicate", "sambanova", "together", "hf-inference"] as const; +export type InferenceProvider = (typeof INFERENCE_PROVIDERS)[number]; + export interface BaseArgs { /** * The access token to use. Without it, you'll get rate-limited quickly. * * Can be created for free in hf.co/settings/token + * + * You can also pass an external Inference provider's key if you intend to call a compatible provider like Sambanova, Together, Replicate... */ accessToken?: string; + /** - * The model to use. + * The HF model to use. * * If not specified, will call huggingface.co/api/tasks to get the default model for the task. * * /!\ Legacy behavior allows this to be an URL, but this is deprecated and will be removed in the future. * Use the `endpointUrl` parameter instead. */ - model?: string; + model?: ModelId; /** * The URL of the endpoint to use. If not specified, will call huggingface.co/api/tasks to get the default endpoint for the task. @@ -63,6 +74,13 @@ export interface BaseArgs { * If specified, will use this URL instead of the default one. */ endpointUrl?: string; + + /** + * Set an Inference provider to run this model on. + * + * Defaults to the first provider in your user settings that is compatible with this model. + */ + provider?: InferenceProvider; } export type RequestArgs = BaseArgs & diff --git a/packages/inference/test/HfInference.spec.ts b/packages/inference/test/HfInference.spec.ts index cc2a95ef7..eddb670cc 100644 --- a/packages/inference/test/HfInference.spec.ts +++ b/packages/inference/test/HfInference.spec.ts @@ -13,378 +13,353 @@ if (!env.HF_TOKEN) { console.warn("Set HF_TOKEN in the env to run the tests for better rate limits"); } -describe.concurrent( - "HfInference", - () => { - // Individual tests can be ran without providing an api key, however running all tests without an api key will result in rate limiting error. - const hf = new HfInference(env.HF_TOKEN); - - it("throws error if model does not exist", () => { - expect( - hf.fillMask({ - model: "this-model-does-not-exist-123", - inputs: "[MASK] world!", - }) - ).rejects.toThrowError("Model this-model-does-not-exist-123 does not exist"); - }); - - it("fillMask", async () => { - expect( - await hf.fillMask({ - model: "bert-base-uncased", - inputs: "[MASK] world!", - }) - ).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - score: expect.any(Number), - token: expect.any(Number), - token_str: expect.any(String), - sequence: expect.any(String), - }), - ]) - ); - }); - - it("works without model", async () => { - expect( - await hf.fillMask({ - inputs: "[MASK] world!", - }) - ).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - score: expect.any(Number), - token: expect.any(Number), - token_str: expect.any(String), - sequence: expect.any(String), - }), - ]) - ); - }); - - it("summarization", async () => { - expect( - await hf.summarization({ - model: "google/pegasus-xsum", - inputs: - "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930.", +describe.concurrent("HfInference", () => { + // Individual tests can be ran without providing an api key, however running all tests without an api key will result in rate limiting error. + + describe.concurrent( + "HF Inference", + () => { + const hf = new HfInference(env.HF_TOKEN); + it("throws error if model does not exist", () => { + expect( + hf.fillMask({ + model: "this-model-does-not-exist-123", + inputs: "[MASK] world!", + }) + ).rejects.toThrowError("Model this-model-does-not-exist-123 does not exist"); + }); + + it("fillMask", async () => { + expect( + await hf.fillMask({ + model: "bert-base-uncased", + inputs: "[MASK] world!", + }) + ).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + score: expect.any(Number), + token: expect.any(Number), + token_str: expect.any(String), + sequence: expect.any(String), + }), + ]) + ); + }); + + it("works without model", async () => { + expect( + await hf.fillMask({ + inputs: "[MASK] world!", + }) + ).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + score: expect.any(Number), + token: expect.any(Number), + token_str: expect.any(String), + sequence: expect.any(String), + }), + ]) + ); + }); + + it("summarization", async () => { + expect( + await hf.summarization({ + model: "google/pegasus-xsum", + inputs: + "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930.", + parameters: { + max_length: 100, + }, + }) + ).toEqual({ + summary_text: "The Eiffel Tower is one of the most famous buildings in the world.", + }); + }); + + it("questionAnswering", async () => { + expect( + await hf.questionAnswering({ + model: "deepset/roberta-base-squad2", + inputs: { + question: "What is the capital of France?", + context: "The capital of France is Paris.", + }, + }) + ).toMatchObject({ + answer: "Paris", + score: expect.any(Number), + start: expect.any(Number), + end: expect.any(Number), + }); + }); + + it("tableQuestionAnswering", async () => { + expect( + await hf.tableQuestionAnswering({ + model: "google/tapas-base-finetuned-wtq", + inputs: { + query: "How many stars does the transformers repository have?", + table: { + Repository: ["Transformers", "Datasets", "Tokenizers"], + Stars: ["36542", "4512", "3934"], + Contributors: ["651", "77", "34"], + "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], + }, + }, + }) + ).toMatchObject({ + answer: "AVERAGE > 36542", + coordinates: [[0, 1]], + cells: ["36542"], + aggregator: "AVERAGE", + }); + }); + + it("documentQuestionAnswering", async () => { + expect( + await hf.documentQuestionAnswering({ + model: "impira/layoutlm-document-qa", + inputs: { + question: "Invoice number?", + image: new Blob([readTestFile("invoice.png")], { type: "image/png" }), + }, + }) + ).toMatchObject({ + answer: "us-001", + score: expect.any(Number), + // not sure what start/end refers to in this case + start: expect.any(Number), + end: expect.any(Number), + }); + }); + + // Errors with "Error: If you are using a VisionEncoderDecoderModel, you must provide a feature extractor" + it.skip("documentQuestionAnswering with non-array output", async () => { + expect( + await hf.documentQuestionAnswering({ + model: "naver-clova-ix/donut-base-finetuned-docvqa", + inputs: { + question: "Invoice number?", + image: new Blob([readTestFile("invoice.png")], { type: "image/png" }), + }, + }) + ).toMatchObject({ + answer: "us-001", + }); + }); + + it("visualQuestionAnswering", async () => { + expect( + await hf.visualQuestionAnswering({ + model: "dandelin/vilt-b32-finetuned-vqa", + inputs: { + question: "How many cats are lying down?", + image: new Blob([readTestFile("cats.png")], { type: "image/png" }), + }, + }) + ).toMatchObject({ + answer: "2", + score: expect.any(Number), + }); + }); + + it("textClassification", async () => { + expect( + await hf.textClassification({ + model: "distilbert-base-uncased-finetuned-sst-2-english", + inputs: "I like you. I love you.", + }) + ).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + label: expect.any(String), + score: expect.any(Number), + }), + ]) + ); + }); + + it("textGeneration - gpt2", async () => { + expect( + await hf.textGeneration({ + model: "gpt2", + inputs: "The answer to the universe is", + }) + ).toMatchObject({ + generated_text: expect.any(String), + }); + }); + + it("textGeneration - openai-community/gpt2", async () => { + expect( + await hf.textGeneration({ + model: "openai-community/gpt2", + inputs: "The answer to the universe is", + }) + ).toMatchObject({ + generated_text: expect.any(String), + }); + }); + + it("textGenerationStream - meta-llama/Llama-2-7b-hf", async () => { + const response = hf.textGenerationStream({ + model: "meta-llama/Llama-2-7b-hf", + inputs: "Please answer the following question: complete one two and ____.", + }); + + for await (const ret of response) { + expect(ret).toMatchObject({ + details: null, + index: expect.any(Number), + token: { + id: expect.any(Number), + logprob: expect.any(Number), + text: expect.any(String) || null, + special: expect.any(Boolean), + }, + generated_text: ret.generated_text + ? "Please answer the following question: complete one two and ____. How does the fish find its ____? After the fish is ________ how does it get to the shore?\n1. How do objects become super saturated bubbles?\n2. What resist limiting the movement of gas?" + : null, + }); + } + }); + + it("textGenerationStream - catch error", async () => { + const response = hf.textGenerationStream({ + model: "meta-llama/Llama-2-7b-hf", + inputs: "Write a short story about a robot that becomes sentient and takes over the world.", parameters: { - max_length: 100, - }, - }) - ).toEqual({ - summary_text: "The Eiffel Tower is one of the most famous buildings in the world.", - }); - }); - - it("questionAnswering", async () => { - expect( - await hf.questionAnswering({ - model: "deepset/roberta-base-squad2", - inputs: { - question: "What is the capital of France?", - context: "The capital of France is Paris.", + max_new_tokens: 10_000, }, - }) - ).toMatchObject({ - answer: "Paris", - score: expect.any(Number), - start: expect.any(Number), - end: expect.any(Number), - }); - }); - - it("tableQuestionAnswering", async () => { - expect( - await hf.tableQuestionAnswering({ - model: "google/tapas-base-finetuned-wtq", - inputs: { - query: "How many stars does the transformers repository have?", - table: { - Repository: ["Transformers", "Datasets", "Tokenizers"], - Stars: ["36542", "4512", "3934"], - Contributors: ["651", "77", "34"], - "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], + }); + + await expect(response.next()).rejects.toThrow( + "Input validation error: `inputs` tokens + `max_new_tokens` must be <= 8192. Given: 18 `inputs` tokens and 10000 `max_new_tokens`" + ); + }); + + it.skip("textGenerationStream - Abort", async () => { + const controller = new AbortController(); + const response = hf.textGenerationStream( + { + model: "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", + inputs: "Write an essay about Sartre's philosophy.", + parameters: { + max_new_tokens: 100, }, }, - }) - ).toMatchObject({ - answer: "AVERAGE > 36542", - coordinates: [[0, 1]], - cells: ["36542"], - aggregator: "AVERAGE", - }); - }); - - it("documentQuestionAnswering", async () => { - expect( - await hf.documentQuestionAnswering({ - model: "impira/layoutlm-document-qa", - inputs: { - question: "Invoice number?", - image: new Blob([readTestFile("invoice.png")], { type: "image/png" }), - }, - }) - ).toMatchObject({ - answer: "us-001", - score: expect.any(Number), - // not sure what start/end refers to in this case - start: expect.any(Number), - end: expect.any(Number), - }); - }); - - // Errors with "Error: If you are using a VisionEncoderDecoderModel, you must provide a feature extractor" - it.skip("documentQuestionAnswering with non-array output", async () => { - expect( - await hf.documentQuestionAnswering({ - model: "naver-clova-ix/donut-base-finetuned-docvqa", - inputs: { - question: "Invoice number?", - image: new Blob([readTestFile("invoice.png")], { type: "image/png" }), - }, - }) - ).toMatchObject({ - answer: "us-001", - }); - }); - - it("visualQuestionAnswering", async () => { - expect( - await hf.visualQuestionAnswering({ - model: "dandelin/vilt-b32-finetuned-vqa", - inputs: { - question: "How many cats are lying down?", - image: new Blob([readTestFile("cats.png")], { type: "image/png" }), - }, - }) - ).toMatchObject({ - answer: "2", - score: expect.any(Number), - }); - }); - - it("textClassification", async () => { - expect( - await hf.textClassification({ - model: "distilbert-base-uncased-finetuned-sst-2-english", - inputs: "I like you. I love you.", - }) - ).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - label: expect.any(String), - score: expect.any(Number), - }), - ]) - ); - }); - - it("textGeneration - gpt2", async () => { - expect( - await hf.textGeneration({ - model: "gpt2", - inputs: "The answer to the universe is", - }) - ).toMatchObject({ - generated_text: expect.any(String), - }); - }); - - it("textGeneration - openai-community/gpt2", async () => { - expect( - await hf.textGeneration({ - model: "openai-community/gpt2", - inputs: "The answer to the universe is", - }) - ).toMatchObject({ - generated_text: expect.any(String), - }); - }); - - it("textGenerationStream - meta-llama/Llama-2-7b-hf", async () => { - const response = hf.textGenerationStream({ - model: "meta-llama/Llama-2-7b-hf", - inputs: "Please answer the following question: complete one two and ____.", - }); - - for await (const ret of response) { - expect(ret).toMatchObject({ - details: null, - index: expect.any(Number), - token: { - id: expect.any(Number), - logprob: expect.any(Number), - text: expect.any(String) || null, - special: expect.any(Boolean), + { signal: controller.signal } + ); + await expect(response.next()).resolves.toBeDefined(); + await expect(response.next()).resolves.toBeDefined(); + controller.abort(); + await expect(response.next()).rejects.toThrow("The operation was aborted"); + }); + + it("tokenClassification", async () => { + expect( + await hf.tokenClassification({ + model: "dbmdz/bert-large-cased-finetuned-conll03-english", + inputs: "My name is Sarah Jessica Parker but you can call me Jessica", + }) + ).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + entity_group: expect.any(String), + score: expect.any(Number), + word: expect.any(String), + start: expect.any(Number), + end: expect.any(Number), + }), + ]) + ); + }); + + it("translation", async () => { + expect( + await hf.translation({ + model: "t5-base", + inputs: "My name is Wolfgang and I live in Berlin", + }) + ).toMatchObject({ + translation_text: "Mein Name ist Wolfgang und ich lebe in Berlin", + }); + // input is a list + expect( + await hf.translation({ + model: "t5-base", + inputs: ["My name is Wolfgang and I live in Berlin", "I work as programmer"], + }) + ).toMatchObject([ + { + translation_text: "Mein Name ist Wolfgang und ich lebe in Berlin", }, - generated_text: ret.generated_text - ? "Please answer the following question: complete one two and ____. How does the fish find its ____? After the fish is ________ how does it get to the shore?\n1. How do objects become super saturated bubbles?\n2. What resist limiting the movement of gas?" - : null, - }); - } - }); - - it("textGenerationStream - catch error", async () => { - const response = hf.textGenerationStream({ - model: "meta-llama/Llama-2-7b-hf", - inputs: "Write a short story about a robot that becomes sentient and takes over the world.", - parameters: { - max_new_tokens: 10_000, - }, - }); - - await expect(response.next()).rejects.toThrow( - "Input validation error: `inputs` tokens + `max_new_tokens` must be <= 8192. Given: 18 `inputs` tokens and 10000 `max_new_tokens`" - ); - }); - - it.skip("textGenerationStream - Abort", async () => { - const controller = new AbortController(); - const response = hf.textGenerationStream( - { - model: "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", - inputs: "Write an essay about Sartre's philosophy.", - parameters: { - max_new_tokens: 100, + { + translation_text: "Ich arbeite als Programmierer", }, - }, - { signal: controller.signal } - ); - await expect(response.next()).resolves.toBeDefined(); - await expect(response.next()).resolves.toBeDefined(); - controller.abort(); - await expect(response.next()).rejects.toThrow("The operation was aborted"); - }); - - it("tokenClassification", async () => { - expect( - await hf.tokenClassification({ - model: "dbmdz/bert-large-cased-finetuned-conll03-english", - inputs: "My name is Sarah Jessica Parker but you can call me Jessica", - }) - ).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - entity_group: expect.any(String), - score: expect.any(Number), - word: expect.any(String), - start: expect.any(Number), - end: expect.any(Number), - }), - ]) - ); - }); - - it("translation", async () => { - expect( - await hf.translation({ - model: "t5-base", - inputs: "My name is Wolfgang and I live in Berlin", - }) - ).toMatchObject({ - translation_text: "Mein Name ist Wolfgang und ich lebe in Berlin", - }); - // input is a list - expect( - await hf.translation({ - model: "t5-base", - inputs: ["My name is Wolfgang and I live in Berlin", "I work as programmer"], - }) - ).toMatchObject([ - { - translation_text: "Mein Name ist Wolfgang und ich lebe in Berlin", - }, - { - translation_text: "Ich arbeite als Programmierer", - }, - ]); - }); - it("zeroShotClassification", async () => { - expect( - await hf.zeroShotClassification({ - model: "facebook/bart-large-mnli", - inputs: [ - "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!", - ], - parameters: { candidate_labels: ["refund", "legal", "faq"] }, - }) - ).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - sequence: + ]); + }); + it("zeroShotClassification", async () => { + expect( + await hf.zeroShotClassification({ + model: "facebook/bart-large-mnli", + inputs: [ "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!", - labels: ["refund", "faq", "legal"], - scores: [ - expect.closeTo(0.877787709236145, 5), - expect.closeTo(0.10522633045911789, 5), - expect.closeTo(0.01698593981564045, 5), ], - }), - ]) - ); - }); - it("SentenceSimilarity", async () => { - expect( - await hf.sentenceSimilarity({ + parameters: { candidate_labels: ["refund", "legal", "faq"] }, + }) + ).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + sequence: + "Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!", + labels: ["refund", "faq", "legal"], + scores: [ + expect.closeTo(0.877787709236145, 5), + expect.closeTo(0.10522633045911789, 5), + expect.closeTo(0.01698593981564045, 5), + ], + }), + ]) + ); + }); + it("SentenceSimilarity", async () => { + expect( + await hf.sentenceSimilarity({ + model: "sentence-transformers/paraphrase-xlm-r-multilingual-v1", + inputs: { + source_sentence: "That is a happy person", + sentences: ["That is a happy dog", "That is a very happy person", "Today is a sunny day"], + }, + }) + ).toEqual([expect.any(Number), expect.any(Number), expect.any(Number)]); + }); + it("FeatureExtraction", async () => { + const response = await hf.featureExtraction({ + model: "sentence-transformers/distilbert-base-nli-mean-tokens", + inputs: "That is a happy person", + }); + expect(response).toEqual(expect.arrayContaining([expect.any(Number)])); + }); + it("FeatureExtraction - same model as sentence similarity", async () => { + const response = await hf.featureExtraction({ model: "sentence-transformers/paraphrase-xlm-r-multilingual-v1", - inputs: { - source_sentence: "That is a happy person", - sentences: ["That is a happy dog", "That is a very happy person", "Today is a sunny day"], - }, - }) - ).toEqual([expect.any(Number), expect.any(Number), expect.any(Number)]); - }); - it("FeatureExtraction", async () => { - const response = await hf.featureExtraction({ - model: "sentence-transformers/distilbert-base-nli-mean-tokens", - inputs: "That is a happy person", - }); - expect(response).toEqual(expect.arrayContaining([expect.any(Number)])); - }); - it("FeatureExtraction - same model as sentence similarity", async () => { - const response = await hf.featureExtraction({ - model: "sentence-transformers/paraphrase-xlm-r-multilingual-v1", - inputs: "That is a happy person", - }); - - expect(response.length).toBeGreaterThan(10); - expect(response).toEqual(expect.arrayContaining([expect.any(Number)])); - }); - it("FeatureExtraction - facebook/bart-base", async () => { - const response = await hf.featureExtraction({ - model: "facebook/bart-base", - inputs: "That is a happy person", - }); - // 1x7x768 - expect(response).toEqual([ - [ - expect.arrayContaining([expect.any(Number)]), - expect.arrayContaining([expect.any(Number)]), - expect.arrayContaining([expect.any(Number)]), - expect.arrayContaining([expect.any(Number)]), - expect.arrayContaining([expect.any(Number)]), - expect.arrayContaining([expect.any(Number)]), - expect.arrayContaining([expect.any(Number)]), - ], - ]); - }); - it("FeatureExtraction - facebook/bart-base, list input", async () => { - const response = await hf.featureExtraction({ - model: "facebook/bart-base", - inputs: ["hello", "That is a happy person"], - }); - // Nx1xTx768 - expect(response).toEqual([ - [ - [ - expect.arrayContaining([expect.any(Number)]), - expect.arrayContaining([expect.any(Number)]), - expect.arrayContaining([expect.any(Number)]), - ], - ], - [ + inputs: "That is a happy person", + }); + + expect(response.length).toBeGreaterThan(10); + expect(response).toEqual(expect.arrayContaining([expect.any(Number)])); + }); + it("FeatureExtraction - facebook/bart-base", async () => { + const response = await hf.featureExtraction({ + model: "facebook/bart-base", + inputs: "That is a happy person", + }); + // 1x7x768 + expect(response).toEqual([ [ expect.arrayContaining([expect.any(Number)]), expect.arrayContaining([expect.any(Number)]), @@ -394,368 +369,533 @@ describe.concurrent( expect.arrayContaining([expect.any(Number)]), expect.arrayContaining([expect.any(Number)]), ], - ], - ]); - }); - it("automaticSpeechRecognition", async () => { - expect( - await hf.automaticSpeechRecognition({ - model: "facebook/wav2vec2-large-960h-lv60-self", - data: new Blob([readTestFile("sample1.flac")], { type: "audio/flac" }), - }) - ).toMatchObject({ - text: "GOING ALONG SLUSHY COUNTRY ROADS AND SPEAKING TO DAMP AUDIENCES IN DRAUGHTY SCHOOLROOMS DAY AFTER DAY FOR A FORTNIGHT HE'LL HAVE TO PUT IN AN APPEARANCE AT SOME PLACE OF WORSHIP ON SUNDAY MORNING AND HE CAN COME TO US IMMEDIATELY AFTERWARDS", - }); - }); - it("audioClassification", async () => { - expect( - await hf.audioClassification({ - model: "superb/hubert-large-superb-er", - data: new Blob([readTestFile("sample1.flac")], { type: "audio/flac" }), - }) - ).toEqual( - expect.arrayContaining([ - expect.objectContaining({ + ]); + }); + it("FeatureExtraction - facebook/bart-base, list input", async () => { + const response = await hf.featureExtraction({ + model: "facebook/bart-base", + inputs: ["hello", "That is a happy person"], + }); + // Nx1xTx768 + expect(response).toEqual([ + [ + [ + expect.arrayContaining([expect.any(Number)]), + expect.arrayContaining([expect.any(Number)]), + expect.arrayContaining([expect.any(Number)]), + ], + ], + [ + [ + expect.arrayContaining([expect.any(Number)]), + expect.arrayContaining([expect.any(Number)]), + expect.arrayContaining([expect.any(Number)]), + expect.arrayContaining([expect.any(Number)]), + expect.arrayContaining([expect.any(Number)]), + expect.arrayContaining([expect.any(Number)]), + expect.arrayContaining([expect.any(Number)]), + ], + ], + ]); + }); + it("automaticSpeechRecognition", async () => { + expect( + await hf.automaticSpeechRecognition({ + model: "facebook/wav2vec2-large-960h-lv60-self", + data: new Blob([readTestFile("sample1.flac")], { type: "audio/flac" }), + }) + ).toMatchObject({ + text: "GOING ALONG SLUSHY COUNTRY ROADS AND SPEAKING TO DAMP AUDIENCES IN DRAUGHTY SCHOOLROOMS DAY AFTER DAY FOR A FORTNIGHT HE'LL HAVE TO PUT IN AN APPEARANCE AT SOME PLACE OF WORSHIP ON SUNDAY MORNING AND HE CAN COME TO US IMMEDIATELY AFTERWARDS", + }); + }); + it("audioClassification", async () => { + expect( + await hf.audioClassification({ + model: "superb/hubert-large-superb-er", + data: new Blob([readTestFile("sample1.flac")], { type: "audio/flac" }), + }) + ).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + score: expect.any(Number), + label: expect.any(String), + }), + ]) + ); + }); + + it("audioToAudio", async () => { + expect( + await hf.audioToAudio({ + model: "speechbrain/sepformer-wham", + data: new Blob([readTestFile("sample1.flac")], { type: "audio/flac" }), + }) + ).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + label: expect.any(String), + blob: expect.any(String), + "content-type": expect.any(String), + }), + ]) + ); + }); + + it("textToSpeech", async () => { + expect( + await hf.textToSpeech({ + model: "espnet/kan-bayashi_ljspeech_vits", + inputs: "hello there!", + }) + ).toBeInstanceOf(Blob); + }); + + it("imageClassification", async () => { + expect( + await hf.imageClassification({ + data: new Blob([readTestFile("cheetah.png")], { type: "image/png" }), + model: "google/vit-base-patch16-224", + }) + ).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + score: expect.any(Number), + label: expect.any(String), + }), + ]) + ); + }); + + it("zeroShotImageClassification", async () => { + expect( + await hf.zeroShotImageClassification({ + inputs: { image: new Blob([readTestFile("cheetah.png")], { type: "image/png" }) }, + model: "openai/clip-vit-large-patch14-336", + parameters: { + candidate_labels: ["animal", "toy", "car"], + }, + }) + ).toEqual([ + { + label: "animal", score: expect.any(Number), - label: expect.any(String), - }), - ]) - ); - }); - - it("audioToAudio", async () => { - expect( - await hf.audioToAudio({ - model: "speechbrain/sepformer-wham", - data: new Blob([readTestFile("sample1.flac")], { type: "audio/flac" }), - }) - ).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - label: expect.any(String), - blob: expect.any(String), - "content-type": expect.any(String), - }), - ]) - ); - }); - - it("textToSpeech", async () => { - expect( - await hf.textToSpeech({ - model: "espnet/kan-bayashi_ljspeech_vits", - inputs: "hello there!", - }) - ).toBeInstanceOf(Blob); - }); - - it("imageClassification", async () => { - expect( - await hf.imageClassification({ - data: new Blob([readTestFile("cheetah.png")], { type: "image/png" }), - model: "google/vit-base-patch16-224", - }) - ).toEqual( - expect.arrayContaining([ - expect.objectContaining({ + }, + { + label: "car", score: expect.any(Number), - label: expect.any(String), - }), - ]) - ); - }); - - it("zeroShotImageClassification", async () => { - expect( - await hf.zeroShotImageClassification({ - inputs: { image: new Blob([readTestFile("cheetah.png")], { type: "image/png" }) }, - model: "openai/clip-vit-large-patch14-336", - parameters: { - candidate_labels: ["animal", "toy", "car"], }, - }) - ).toEqual([ - { - label: "animal", - score: expect.any(Number), - }, - { - label: "car", - score: expect.any(Number), - }, - { - label: "toy", - score: expect.any(Number), - }, - ]); - }); - - it("objectDetection", async () => { - expect( - await hf.imageClassification({ - data: new Blob([readTestFile("cats.png")], { type: "image/png" }), - model: "facebook/detr-resnet-50", - }) - ).toEqual( - expect.arrayContaining([ - expect.objectContaining({ + { + label: "toy", score: expect.any(Number), - label: expect.any(String), - box: expect.objectContaining({ - xmin: expect.any(Number), - ymin: expect.any(Number), - xmax: expect.any(Number), - ymax: expect.any(Number), + }, + ]); + }); + + it("objectDetection", async () => { + expect( + await hf.imageClassification({ + data: new Blob([readTestFile("cats.png")], { type: "image/png" }), + model: "facebook/detr-resnet-50", + }) + ).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + score: expect.any(Number), + label: expect.any(String), + box: expect.objectContaining({ + xmin: expect.any(Number), + ymin: expect.any(Number), + xmax: expect.any(Number), + ymax: expect.any(Number), + }), }), - }), - ]) - ); - }); - it("imageSegmentation", async () => { - expect( - await hf.imageClassification({ - data: new Blob([readTestFile("cats.png")], { type: "image/png" }), - model: "facebook/detr-resnet-50-panoptic", - }) - ).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - score: expect.any(Number), - label: expect.any(String), - mask: expect.any(String), - }), - ]) - ); - }); - it("imageToImage", async () => { - const num_inference_steps = 25; - - const res = await hf.imageToImage({ - inputs: new Blob([readTestFile("stormtrooper_depth.png")], { type: "image / png" }), - parameters: { - prompt: "elmo's lecture", - num_inference_steps, - }, - model: "lllyasviel/sd-controlnet-depth", - }); - expect(res).toBeInstanceOf(Blob); - }); - it("imageToImage blob data", async () => { - const res = await hf.imageToImage({ - inputs: new Blob([readTestFile("bird_canny.png")], { type: "image / png" }), - model: "lllyasviel/sd-controlnet-canny", - }); - expect(res).toBeInstanceOf(Blob); - }); - it("textToImage", async () => { - const res = await hf.textToImage({ - inputs: "award winning high resolution photo of a giant tortoise/((ladybird)) hybrid, [trending on artstation]", - model: "stabilityai/stable-diffusion-2", - }); - expect(res).toBeInstanceOf(Blob); - }); - - it("textToImage with parameters", async () => { - const width = 512; - const height = 128; - const num_inference_steps = 10; - - const res = await hf.textToImage({ - inputs: "award winning high resolution photo of a giant tortoise/((ladybird)) hybrid, [trending on artstation]", - model: "stabilityai/stable-diffusion-2", - parameters: { - negative_prompt: "blurry", - width, - height, - num_inference_steps, - }, - }); - expect(res).toBeInstanceOf(Blob); - }); - it("imageToText", async () => { - expect( - await hf.imageToText({ - data: new Blob([readTestFile("cheetah.png")], { type: "image/png" }), - model: "nlpconnect/vit-gpt2-image-captioning", - }) - ).toEqual({ - generated_text: "a large brown and white giraffe standing in a field ", - }); - }); - it("request - openai-community/gpt2", async () => { - expect( - await hf.request({ - model: "openai-community/gpt2", - inputs: "one plus two equals", - }) - ).toMatchObject([ - { - generated_text: expect.any(String), - }, - ]); - }); - - // Skipped at the moment because takes forever - it.skip("tabularRegression", async () => { - expect( - await hf.tabularRegression({ - model: "scikit-learn/Fish-Weight", - inputs: { - data: { - Height: ["11.52", "12.48", "12.3778"], - Length1: ["23.2", "24", "23.9"], - Length2: ["25.4", "26.3", "26.5"], - Length3: ["30", "31.2", "31.1"], - Species: ["Bream", "Bream", "Bream"], - Width: ["4.02", "4.3056", "4.6961"], - }, + ]) + ); + }); + it("imageSegmentation", async () => { + expect( + await hf.imageClassification({ + data: new Blob([readTestFile("cats.png")], { type: "image/png" }), + model: "facebook/detr-resnet-50-panoptic", + }) + ).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + score: expect.any(Number), + label: expect.any(String), + mask: expect.any(String), + }), + ]) + ); + }); + it("imageToImage", async () => { + const num_inference_steps = 25; + + const res = await hf.imageToImage({ + inputs: new Blob([readTestFile("stormtrooper_depth.png")], { type: "image / png" }), + parameters: { + prompt: "elmo's lecture", + num_inference_steps, }, - }) - ).toMatchObject([270.5473526976245, 313.6843425638086, 328.3727133404402]); - }); - - // Skipped at the moment because takes forever - it.skip("tabularClassification", async () => { - expect( - await hf.tabularClassification({ - model: "vvmnnnkv/wine-quality", - inputs: { - data: { - fixed_acidity: ["7.4", "7.8", "10.3"], - volatile_acidity: ["0.7", "0.88", "0.32"], - citric_acid: ["0", "0", "0.45"], - residual_sugar: ["1.9", "2.6", "6.4"], - chlorides: ["0.076", "0.098", "0.073"], - free_sulfur_dioxide: ["11", "25", "5"], - total_sulfur_dioxide: ["34", "67", "13"], - density: ["0.9978", "0.9968", "0.9976"], - pH: ["3.51", "3.2", "3.23"], - sulphates: ["0.56", "0.68", "0.82"], - alcohol: ["9.4", "9.8", "12.6"], - }, + model: "lllyasviel/sd-controlnet-depth", + }); + expect(res).toBeInstanceOf(Blob); + }); + it("imageToImage blob data", async () => { + const res = await hf.imageToImage({ + inputs: new Blob([readTestFile("bird_canny.png")], { type: "image / png" }), + model: "lllyasviel/sd-controlnet-canny", + }); + expect(res).toBeInstanceOf(Blob); + }); + it("textToImage", async () => { + const res = await hf.textToImage({ + inputs: + "award winning high resolution photo of a giant tortoise/((ladybird)) hybrid, [trending on artstation]", + model: "stabilityai/stable-diffusion-2", + }); + expect(res).toBeInstanceOf(Blob); + }); + + it("textToImage with parameters", async () => { + const width = 512; + const height = 128; + const num_inference_steps = 10; + + const res = await hf.textToImage({ + inputs: + "award winning high resolution photo of a giant tortoise/((ladybird)) hybrid, [trending on artstation]", + model: "stabilityai/stable-diffusion-2", + parameters: { + negative_prompt: "blurry", + width, + height, + num_inference_steps, }, - }) - ).toMatchObject([5, 5, 7]); - }); - - it("endpoint - makes request to specified endpoint", async () => { - const ep = hf.endpoint("https://api-inference.huggingface.co/models/openai-community/gpt2"); - const { generated_text } = await ep.textGeneration({ - inputs: "one plus two equals", - }); - assert.include(generated_text, "three"); - }); - - it("chatCompletion modelId - OpenAI Specs", async () => { - const res = await hf.chatCompletion({ - model: "mistralai/Mistral-7B-Instruct-v0.2", - messages: [{ role: "user", content: "Complete the this sentence with words one plus one is equal " }], - max_tokens: 500, - temperature: 0.1, - seed: 0, - }); - if (res.choices && res.choices.length > 0) { - const completion = res.choices[0].message?.content; - expect(completion).toContain("to two"); - } - }); - - it("chatCompletionStream modelId - OpenAI Specs", async () => { - const stream = hf.chatCompletionStream({ - model: "mistralai/Mistral-7B-Instruct-v0.2", - messages: [{ role: "user", content: "Complete the equation 1+1= ,just the answer" }], - max_tokens: 500, - temperature: 0.1, - seed: 0, - }); - let out = ""; - for await (const chunk of stream) { - if (chunk.choices && chunk.choices.length > 0) { - out += chunk.choices[0].delta.content; - } - } - expect(out).toContain("2"); - }); - - it("chatCompletionStream modelId Fail - OpenAI Specs", async () => { - expect( - hf - .chatCompletionStream({ - model: "google/gemma-2b", - messages: [{ role: "user", content: "Complete the equation 1+1= ,just the answer" }], - max_tokens: 500, - temperature: 0.1, - seed: 0, + }); + expect(res).toBeInstanceOf(Blob); + }); + it("imageToText", async () => { + expect( + await hf.imageToText({ + data: new Blob([readTestFile("cheetah.png")], { type: "image/png" }), + model: "nlpconnect/vit-gpt2-image-captioning", + }) + ).toEqual({ + generated_text: "a large brown and white giraffe standing in a field ", + }); + }); + it("request - openai-community/gpt2", async () => { + expect( + await hf.request({ + model: "openai-community/gpt2", + inputs: "one plus two equals", + }) + ).toMatchObject([ + { + generated_text: expect.any(String), + }, + ]); + }); + + // Skipped at the moment because takes forever + it.skip("tabularRegression", async () => { + expect( + await hf.tabularRegression({ + model: "scikit-learn/Fish-Weight", + inputs: { + data: { + Height: ["11.52", "12.48", "12.3778"], + Length1: ["23.2", "24", "23.9"], + Length2: ["25.4", "26.3", "26.5"], + Length3: ["30", "31.2", "31.1"], + Species: ["Bream", "Bream", "Bream"], + Width: ["4.02", "4.3056", "4.6961"], + }, + }, }) - .next() - ).rejects.toThrowError( - "Server google/gemma-2b does not seem to support chat completion. Error: Template error: template not found" - ); - }); - - it("chatCompletion - OpenAI Specs", async () => { - const ep = hf.endpoint("https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"); - const res = await ep.chatCompletion({ - model: "tgi", - messages: [{ role: "user", content: "Complete the this sentence with words one plus one is equal " }], - max_tokens: 500, - temperature: 0.1, - seed: 0, - }); - if (res.choices && res.choices.length > 0) { - const completion = res.choices[0].message?.content; - expect(completion).toContain("to two"); - } - }); - it("chatCompletionStream - OpenAI Specs", async () => { - const ep = hf.endpoint("https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"); - const stream = ep.chatCompletionStream({ - model: "tgi", - messages: [{ role: "user", content: "Complete the equation 1+1= ,just the answer" }], - max_tokens: 500, - temperature: 0.1, - seed: 0, - }); - let out = ""; - for await (const chunk of stream) { - if (chunk.choices && chunk.choices.length > 0) { - out += chunk.choices[0].delta.content; + ).toMatchObject([270.5473526976245, 313.6843425638086, 328.3727133404402]); + }); + + // Skipped at the moment because takes forever + it.skip("tabularClassification", async () => { + expect( + await hf.tabularClassification({ + model: "vvmnnnkv/wine-quality", + inputs: { + data: { + fixed_acidity: ["7.4", "7.8", "10.3"], + volatile_acidity: ["0.7", "0.88", "0.32"], + citric_acid: ["0", "0", "0.45"], + residual_sugar: ["1.9", "2.6", "6.4"], + chlorides: ["0.076", "0.098", "0.073"], + free_sulfur_dioxide: ["11", "25", "5"], + total_sulfur_dioxide: ["34", "67", "13"], + density: ["0.9978", "0.9968", "0.9976"], + pH: ["3.51", "3.2", "3.23"], + sulphates: ["0.56", "0.68", "0.82"], + alcohol: ["9.4", "9.8", "12.6"], + }, + }, + }) + ).toMatchObject([5, 5, 7]); + }); + + it("endpoint - makes request to specified endpoint", async () => { + const ep = hf.endpoint("https://api-inference.huggingface.co/models/openai-community/gpt2"); + const { generated_text } = await ep.textGeneration({ + inputs: "one plus two equals", + }); + assert.include(generated_text, "three"); + }); + + it("chatCompletion modelId - OpenAI Specs", async () => { + const res = await hf.chatCompletion({ + model: "mistralai/Mistral-7B-Instruct-v0.2", + messages: [{ role: "user", content: "Complete the this sentence with words one plus one is equal " }], + max_tokens: 500, + temperature: 0.1, + seed: 0, + }); + if (res.choices && res.choices.length > 0) { + const completion = res.choices[0].message?.content; + expect(completion).toContain("to two"); + } + }); + + it("chatCompletionStream modelId - OpenAI Specs", async () => { + const stream = hf.chatCompletionStream({ + model: "mistralai/Mistral-7B-Instruct-v0.2", + messages: [{ role: "user", content: "Complete the equation 1+1= ,just the answer" }], + max_tokens: 500, + temperature: 0.1, + seed: 0, + }); + let out = ""; + for await (const chunk of stream) { + if (chunk.choices && chunk.choices.length > 0) { + out += chunk.choices[0].delta.content; + } + } + expect(out).toContain("2"); + }); + + it("chatCompletionStream modelId Fail - OpenAI Specs", async () => { + expect( + hf + .chatCompletionStream({ + model: "google/gemma-2b", + messages: [{ role: "user", content: "Complete the equation 1+1= ,just the answer" }], + max_tokens: 500, + temperature: 0.1, + seed: 0, + }) + .next() + ).rejects.toThrowError( + "Server google/gemma-2b does not seem to support chat completion. Error: Template error: template not found" + ); + }); + + it("chatCompletion - OpenAI Specs", async () => { + const ep = hf.endpoint("https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"); + const res = await ep.chatCompletion({ + model: "tgi", + messages: [{ role: "user", content: "Complete the this sentence with words one plus one is equal " }], + max_tokens: 500, + temperature: 0.1, + seed: 0, + }); + if (res.choices && res.choices.length > 0) { + const completion = res.choices[0].message?.content; + expect(completion).toContain("to two"); + } + }); + it("chatCompletionStream - OpenAI Specs", async () => { + const ep = hf.endpoint("https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"); + const stream = ep.chatCompletionStream({ + model: "tgi", + messages: [{ role: "user", content: "Complete the equation 1+1= ,just the answer" }], + max_tokens: 500, + temperature: 0.1, + seed: 0, + }); + let out = ""; + for await (const chunk of stream) { + if (chunk.choices && chunk.choices.length > 0) { + out += chunk.choices[0].delta.content; + } } - } - expect(out).toContain("2"); - }); - it("custom mistral - OpenAI Specs", async () => { - const MISTRAL_KEY = env.MISTRAL_KEY; - const hf = new HfInference(MISTRAL_KEY); - const ep = hf.endpoint("https://api.mistral.ai"); - const stream = ep.chatCompletionStream({ - model: "mistral-tiny", - messages: [{ role: "user", content: "Complete the equation one + one = , just the answer" }], - }) as AsyncGenerator; - let out = ""; - for await (const chunk of stream) { - if (chunk.choices && chunk.choices.length > 0) { - out += chunk.choices[0].delta.content; + expect(out).toContain("2"); + }); + it("custom mistral - OpenAI Specs", async () => { + const MISTRAL_KEY = env.MISTRAL_KEY; + const hf = new HfInference(MISTRAL_KEY); + const ep = hf.endpoint("https://api.mistral.ai"); + const stream = ep.chatCompletionStream({ + model: "mistral-tiny", + messages: [{ role: "user", content: "Complete the equation one + one = , just the answer" }], + }) as AsyncGenerator; + let out = ""; + for await (const chunk of stream) { + if (chunk.choices && chunk.choices.length > 0) { + out += chunk.choices[0].delta.content; + } } - } - expect(out).toContain("The answer to the equation one + one is two."); - }); - it("custom openai - OpenAI Specs", async () => { - const OPENAI_KEY = env.OPENAI_KEY; - const hf = new HfInference(OPENAI_KEY); - const ep = hf.endpoint("https://api.openai.com"); - const stream = ep.chatCompletionStream({ - model: "gpt-3.5-turbo", - messages: [{ role: "user", content: "Complete the equation one + one =" }], - }) as AsyncGenerator; - let out = ""; - for await (const chunk of stream) { - if (chunk.choices && chunk.choices.length > 0) { - out += chunk.choices[0].delta.content; + expect(out).toContain("The answer to the equation one + one is two."); + }); + it("custom openai - OpenAI Specs", async () => { + const OPENAI_KEY = env.OPENAI_KEY; + const hf = new HfInference(OPENAI_KEY); + const ep = hf.endpoint("https://api.openai.com"); + const stream = ep.chatCompletionStream({ + model: "gpt-3.5-turbo", + messages: [{ role: "user", content: "Complete the equation one + one =" }], + }) as AsyncGenerator; + let out = ""; + for await (const chunk of stream) { + if (chunk.choices && chunk.choices.length > 0) { + out += chunk.choices[0].delta.content; + } } - } - expect(out).toContain("two"); - }); - }, - TIMEOUT -); + expect(out).toContain("two"); + }); + }, + TIMEOUT + ); + + /** + * Compatibility with third-party Inference Providers + */ + describe.concurrent( + "Fal AI", + () => { + const client = new HfInference(env.HF_FAL_KEY); + + it("textToImage", async () => { + const res = await client.textToImage({ + model: "black-forest-labs/FLUX.1-schnell", + provider: "fal-ai", + inputs: "black forest gateau cake spelling out the words FLUX SCHNELL, tasty, food photography, dynamic shot", + }); + expect(res).toBeInstanceOf(Blob); + }); + + it("speechToText", async () => { + const res = await client.automaticSpeechRecognition({ + model: "openai/whisper-large-v3", + provider: "fal-ai", + data: new Blob([readTestFile("sample2.wav")], { type: "audio/x-wav" }), + }); + expect(res).toMatchObject({ + text: " he has grave doubts whether sir frederick leighton's work is really greek after all and can discover in it but little of rocky ithaca", + }); + }); + }, + TIMEOUT + ); + + describe.concurrent( + "Replicate", + () => { + const client = new HfInference(env.HF_REPLICATE_KEY); + + it("textToImage", async () => { + const res = await client.textToImage({ + model: "black-forest-labs/FLUX.1-schnell", + provider: "replicate", + inputs: "black forest gateau cake spelling out the words FLUX SCHNELL, tasty, food photography, dynamic shot", + }); + expect(res).toBeInstanceOf(Blob); + }); + }, + TIMEOUT + ); + describe.concurrent( + "SambaNova", + () => { + const client = new HfInference(env.HF_SAMBANOVA_KEY); + + it("chatCompletion", async () => { + const res = await client.chatCompletion({ + model: "meta-llama/Llama-3.1-8B-Instruct", + provider: "sambanova", + messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }], + }); + if (res.choices && res.choices.length > 0) { + const completion = res.choices[0].message?.content; + expect(completion).toContain("two"); + } + }); + it("chatCompletion stream", async () => { + const stream = client.chatCompletionStream({ + model: "meta-llama/Llama-3.1-8B-Instruct", + provider: "sambanova", + messages: [{ role: "user", content: "Complete the equation 1 + 1 = , just the answer" }], + }) as AsyncGenerator; + let out = ""; + for await (const chunk of stream) { + if (chunk.choices && chunk.choices.length > 0) { + out += chunk.choices[0].delta.content; + } + } + expect(out).toContain("2"); + }); + }, + TIMEOUT + ); + + describe.concurrent( + "Together", + () => { + const client = new HfInference(env.HF_TOGETHER_KEY); + + it("chatCompletion", async () => { + const res = await client.chatCompletion({ + model: "meta-llama/Llama-3.3-70B-Instruct", + provider: "together", + messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }], + }); + if (res.choices && res.choices.length > 0) { + const completion = res.choices[0].message?.content; + expect(completion).toContain("two"); + } + }); + + it("chatCompletion stream", async () => { + const stream = client.chatCompletionStream({ + model: "meta-llama/Llama-3.3-70B-Instruct", + provider: "together", + messages: [{ role: "user", content: "Complete the equation 1 + 1 = , just the answer" }], + }) as AsyncGenerator; + let out = ""; + for await (const chunk of stream) { + if (chunk.choices && chunk.choices.length > 0) { + out += chunk.choices[0].delta.content; + } + } + expect(out).toContain("2"); + }); + + it("textToImage", async () => { + const res = await client.textToImage({ + model: "stabilityai/stable-diffusion-xl-base-1.0", + provider: "together", + inputs: "award winning high resolution photo of a giant tortoise", + }); + expect(res).toBeInstanceOf(Blob); + }); + + it("textGeneration", async () => { + const res = await client.textGeneration({ + model: "mistralai/Mixtral-8x7B-v0.1", + provider: "together", + inputs: "Paris is", + temperature: 0, + max_tokens: 10, + }); + expect(res).toMatchObject({ generated_text: " a city of love, and it’s also" }); + }); + }, + TIMEOUT + ); +}); diff --git a/packages/inference/test/sample2.wav b/packages/inference/test/sample2.wav new file mode 100644 index 000000000..13de11617 Binary files /dev/null and b/packages/inference/test/sample2.wav differ diff --git a/packages/inference/test/tapes.json b/packages/inference/test/tapes.json index e1bd7213a..0edd91e78 100644 --- a/packages/inference/test/tapes.json +++ b/packages/inference/test/tapes.json @@ -2103,5 +2103,427 @@ "vary": "origin, Origin, Access-Control-Request-Method, Access-Control-Request-Headers" } } + }, + "a71d846134d1c5d86b435b541f5291a2d020c081b5b5700bcfdaec7ed40fd3fe": { + "url": "https://api.sambanova.ai/v1/chat/completions", + "init": { + "headers": { + "Content-Type": "application/json" + }, + "method": "POST", + "body": "{\"model\":\"Meta-Llama-3.1-8B-Instruct\",\"messages\":[{\"role\":\"user\",\"content\":\"Complete the equation 1 + 1 = , just the answer\"}],\"stream\":true}" + }, + "response": { + "body": "data: {\"choices\":[{\"delta\":{\"content\":\"\",\"role\":\"assistant\"},\"finish_reason\":null,\"index\":0,\"logprobs\":null}],\"created\":1736421953,\"id\":\"362592a1-7092-441d-9060-d54029fa34cc\",\"model\":\"Meta-Llama-3.1-8B-Instruct\",\"object\":\"chat.completion.chunk\",\"system_fingerprint\":\"fastcoe\"}\n\ndata: {\"choices\":[{\"delta\":{\"content\":\"2\",\"role\":\"assistant\"},\"finish_reason\":null,\"index\":0,\"logprobs\":null}],\"created\":1736421953,\"id\":\"362592a1-7092-441d-9060-d54029fa34cc\",\"model\":\"Meta-Llama-3.1-8B-Instruct\",\"object\":\"chat.completion.chunk\",\"system_fingerprint\":\"fastcoe\"}\n\ndata: {\"choices\":[{\"delta\":{\"content\":\"\"},\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null}],\"created\":1736421953,\"id\":\"362592a1-7092-441d-9060-d54029fa34cc\",\"model\":\"Meta-Llama-3.1-8B-Instruct\",\"object\":\"chat.completion.chunk\",\"system_fingerprint\":\"fastcoe\"}\n\ndata: [DONE]\n\n", + "status": 200, + "statusText": "OK", + "headers": { + "connection": "keep-alive", + "content-type": "text/event-stream; charset=utf-8", + "strict-transport-security": "max-age=31536000; includeSubDomains", + "transfer-encoding": "chunked" + } + } + }, + "56dc8b91ae3bf6ebcf7580cf90b65377ba6cc9b04d446fa9abc40ba04a45d5d8": { + "url": "https://api.sambanova.ai/v1/chat/completions", + "init": { + "headers": { + "Content-Type": "application/json" + }, + "method": "POST", + "body": "{\"model\":\"Meta-Llama-3.1-8B-Instruct\",\"messages\":[{\"role\":\"user\",\"content\":\"Complete this sentence with words, one plus one is equal \"}]}" + }, + "response": { + "body": "{\"choices\":[{\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null,\"message\":{\"content\":\"two.\",\"role\":\"assistant\"}}],\"created\":1736421953,\"id\":\"4775a55a-47f9-4f95-80b1-40cf0fea22dd\",\"model\":\"Meta-Llama-3.1-8B-Instruct\",\"object\":\"chat.completion\",\"system_fingerprint\":\"fastcoe\",\"usage\":{\"completion_tokens\":2,\"completion_tokens_after_first_per_sec\":46.71705594724942,\"completion_tokens_after_first_per_sec_first_ten\":0,\"completion_tokens_per_sec\":41.28231652403285,\"end_time\":1736421953.7856479,\"is_last_response\":true,\"prompt_tokens\":46,\"start_time\":1736421953.737201,\"time_to_first_token\":0.02704143524169922,\"total_latency\":0.0484468936920166,\"total_tokens\":48,\"total_tokens_per_sec\":990.7755965767885}}", + "status": 200, + "statusText": "OK", + "headers": { + "connection": "keep-alive", + "content-type": "application/json; charset=utf-8", + "strict-transport-security": "max-age=31536000; includeSubDomains" + } + } + }, + "d5c6752ccee8b53c65bbdbcc128358f9c6604395431b7c33760f3c0c92224ff1": { + "url": "https://api.together.xyz/v1/chat/completions", + "init": { + "headers": { + "Content-Type": "application/json" + }, + "method": "POST", + "body": "{\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"messages\":[{\"role\":\"user\",\"content\":\"Complete the equation 1 + 1 = , just the answer\"}],\"stream\":true}" + }, + "response": { + "body": "data: {\"id\":\"8ff411e54b9299ab\",\"object\":\"chat.completion.chunk\",\"created\":1736421960,\"choices\":[{\"index\":0,\"text\":\"2\",\"logprobs\":null,\"finish_reason\":null,\"seed\":null,\"delta\":{\"token_id\":17,\"role\":\"assistant\",\"content\":\"2\",\"tool_calls\":null}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"usage\":null}\n\ndata: {\"id\":\"8ff411e54b9299ab\",\"object\":\"chat.completion.chunk\",\"created\":1736421960,\"choices\":[{\"index\":0,\"text\":\"\",\"logprobs\":null,\"finish_reason\":\"eos\",\"seed\":9313781139405380000,\"delta\":{\"token_id\":128009,\"role\":\"assistant\",\"content\":\"\",\"tool_calls\":null}}],\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"usage\":{\"prompt_tokens\":48,\"completion_tokens\":2,\"total_tokens\":50}}\n\ndata: [DONE]\n\n", + "status": 200, + "statusText": "OK", + "headers": { + "access-control-allow-origin": "*", + "alt-svc": "h3=\":443\"; ma=86400", + "cache-control": "no-cache, no-transform", + "cf-cache-status": "DYNAMIC", + "cf-ray": "8ff411e54b9299ab-CDG", + "connection": "keep-alive", + "content-type": "text/event-stream;charset=utf-8", + "retry-after": "2", + "server": "cloudflare", + "strict-transport-security": "max-age=15552000; includeSubDomains", + "transfer-encoding": "chunked", + "vary": "Accept-Encoding" + } + } + }, + "3f2ba6c12d2b4395ac48f453fefdfe523204fcbbf3194c393365856b0e7b5976": { + "url": "https://api.together.xyz/v1/chat/completions", + "init": { + "headers": { + "Content-Type": "application/json" + }, + "method": "POST", + "body": "{\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"messages\":[{\"role\":\"user\",\"content\":\"Complete this sentence with words, one plus one is equal \"}]}" + }, + "response": { + "body": "{\"id\":\"8ff411e54b88f1a4\",\"object\":\"chat.completion\",\"created\":1736421960,\"model\":\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\"prompt\":[],\"choices\":[{\"finish_reason\":\"eos\",\"seed\":14663482367758619000,\"logprobs\":null,\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"one plus one is equal to two.\",\"tool_calls\":[]}}],\"usage\":{\"prompt_tokens\":46,\"completion_tokens\":9,\"total_tokens\":55}}", + "status": 200, + "statusText": "OK", + "headers": { + "access-control-allow-origin": "*", + "alt-svc": "h3=\":443\"; ma=86400", + "cf-cache-status": "DYNAMIC", + "cf-ray": "8ff411e54b88f1a4-CDG", + "connection": "keep-alive", + "content-encoding": "gzip", + "content-type": "application/json; charset=utf-8", + "etag": "W/\"20b-G4IgxcOf8iEL5Ej0mMOCqo6dvAc\"", + "retry-after": "2", + "server": "cloudflare", + "strict-transport-security": "max-age=15552000; includeSubDomains", + "transfer-encoding": "chunked", + "vary": "Accept-Encoding" + } + } + }, + "8a5598c99c52905e0282fa9fa45135396893a6f99f66ea1268e9a06b5190938d": { + "url": "https://api.together.xyz/v1/images/generations", + "init": { + "headers": { + "Content-Type": "application/json" + }, + "method": "POST", + "body": "{\"model\":\"stabilityai/stable-diffusion-xl-base-1.0\",\"inputs\":\"\",\"prompt\":\"award winning high resolution photo of a giant tortoise\",\"response_format\":\"base64\"}" + }, + "response": { + "body": "{\"id\":\"8ff411e55ff1d6a2-CDG\",\"model\":\"stabilityai/stable-diffusion-xl-base-1.0\",\"object\":\"list\",\"data\":[{\"timings\":{\"inference\":4445},\"index\":0,\"b64_json\":\"/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAQABAADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDhmjwOary8Cr0hRBk/rWZczjOBWS1AruSTUZpwJPJpDzWhIzminhaCKLgMzRml2mgCi4AKkjXJwKaFzViJO9K4y9bW24DJp1xB5a5BqS2kCJ1pJ5N/FRfUZTXrTiOKFxmnN0oENAzTWGKeOBTCeaAACnqtNBxT1ek2BIqgVKMC\"}]}", + "status": 200, + "statusText": "OK", + "headers": { + "access-control-allow-origin": "*", + "alt-svc": "h3=\":443\"; ma=86400", + "cf-cache-status": "DYNAMIC", + "cf-ray": "8ff411e55ff1d6a2-CDG", + "connection": "keep-alive", + "content-encoding": "gzip", + "content-type": "application/json; charset=utf-8", + "etag": "W/\"22d3f-g3+Q7pgKVAfWL+q0pQdV8zU3Yug\"", + "retry-after": "2", + "server": "cloudflare", + "strict-transport-security": "max-age=15552000; includeSubDomains", + "transfer-encoding": "chunked" + } + } + }, + "71b7ce692a0c71ae56f8ecdd1924f0171beeeb60b635b38d49825bfe00eb1fd4": { + "url": "", + "init": {}, + "response": { + "body": "", + "status": 200, + "statusText": "OK", + "headers": { + "content-type": "image/jpeg" + } + } + }, + "fb1d14610ba0f5a62b5458c79d5c21575635796a8ed0125f060a3f3a419c26cf": { + "url": "https://api.replicate.com/v1/models/black-forest-labs/flux-schnell/predictions", + "init": { + "headers": { + "Content-Type": "application/json", + "Prefer": "wait" + }, + "method": "POST", + "body": "{\"input\":{\"prompt\":\"black forest gateau cake spelling out the words FLUX SCHNELL, tasty, food photography, dynamic shot\"}}" + }, + "response": { + "body": "{\"id\":\"j46ap01gb9rma0cm9b89ay7zxw\",\"model\":\"black-forest-labs/flux-schnell\",\"version\":\"dp-4d0bcc010b3049749a251855f12800be\",\"input\":{\"prompt\":\"black forest gateau cake spelling out the words FLUX SCHNELL, tasty, food photography, dynamic shot\"},\"logs\":\"\",\"output\":[\"https://replicate.delivery/xezq/phzbmuYEEkYnAdfJyO56HY4Ro0FtCntPh75h8sO0ANZdIrBKA/out-0.webp\"],\"data_removed\":false,\"error\":null,\"status\":\"processing\",\"created_at\":\"2025-01-09T11:25:45.434Z\",\"urls\":{\"cancel\":\"https://api.replicate.com/v1/predictions/j46ap01gb9rma0cm9b89ay7zxw/cancel\",\"get\":\"https://api.replicate.com/v1/predictions/j46ap01gb9rma0cm9b89ay7zxw\",\"stream\":\"https://stream.replicate.com/v1/files/bcwr-4y4f5q4xygk3vpsq2xyllvruw7ml5iea6xkrwjov4yppoofpzwnq\"}}", + "status": 201, + "statusText": "Created", + "headers": { + "alt-svc": "h3=\":443\"; ma=86400", + "cf-cache-status": "DYNAMIC", + "cf-ray": "8ff411860aba9eb6-CDG", + "connection": "keep-alive", + "content-type": "application/json; charset=UTF-8", + "nel": "{\"success_fraction\":0,\"report_to\":\"cf-nel\",\"max_age\":604800}", + "preference-applied": "wait=60", + "ratelimit-remaining": "599", + "ratelimit-reset": "1", + "report-to": "{\"endpoints\":[{\"url\":\"https:\\/\\/a.nel.cloudflare.com\\/report\\/v4?s=%2BU183KdPfOH%2FHNu%2F%2BPJGKeeh2DN%2B779%2ByJ%2Bh87HUmdlL6XTTPGgJEi8qRDNIUlmbZjM%2BdLy3CCDesiPy81LBVFcQXg2MQU26QGHn0jlhyFtp%2FFfSPpbEi4TPdLzV52fM2DIzaOBf5zBqcWgsDd4e\"}],\"group\":\"cf-nel\",\"max_age\":604800}", + "server": "cloudflare", + "server-timing": "cfL4;desc=\"?proto=TCP&rtt=5073&min_rtt=5070&rtt_var=1909&sent=5&recv=5&lost=0&retrans=0&sent_bytes=2848&recv_bytes=974&delivery_rate=792785&cwnd=252&unsent_bytes=0&cid=eb62e25ad74b0141&ts=891&x=0\"", + "strict-transport-security": "max-age=15552000", + "vary": "Accept-Encoding" + } + } + }, + "85673486d9ee3f89031a85b2bf08a48bd4a45dafd68fcb2f4ff7781f2bb87ec2": { + "url": "https://replicate.delivery/xezq/fCe3elthWTYjio0WuWGF8ftTl4lOdxxK0mXm3cwWb5jlpDLQB/out-0.webp", + "init": {}, + "response": { + "body": "", + "status": 200, + "statusText": "OK", + "headers": { + "accept-ranges": "bytes", + "access-control-allow-origin": "*", + "alt-svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000", + "cache-control": "public,max-age=3600", + "cache-id": "PAR-8ec5b013", + "cache-status": "miss", + "content-type": "image/webp", + "etag": "\"3443b49212285bdd038a608321d139b5\"", + "last-modified": "Tue, 07 Jan 2025 16:55:53 GMT", + "server": "UploadServer" + } + } + }, + "e899e1e68342175b37f496e4937a9d3d46b31d21d1df775a8f87f5e04039d437": { + "url": "https://fal.run/fal-ai/wizper", + "init": { + "headers": { + "Content-Type": "application/json" + }, + "method": "POST" + }, + "response": { + "body": "{\"detail\":\"Unsupported data URL\"}", + "status": 400, + "statusText": "Bad Request", + "headers": { + "connection": "keep-alive", + "content-type": "application/json", + "strict-transport-security": "max-age=31536000; includeSubDomains" + } + } + }, + "9601f6c473ad39ff6a83e74df8fa4061773a54df1aa4a2d931fb4c0d66fc86a8": { + "url": "https://fal.run/fal-ai/flux/schnell", + "init": { + "headers": { + "Content-Type": "application/json" + }, + "method": "POST", + "body": "{\"inputs\":\"\",\"prompt\":\"black forest gateau cake spelling out the words FLUX SCHNELL, tasty, food photography, dynamic shot\",\"response_format\":\"base64\"}" + }, + "response": { + "body": "{\"images\":[{\"url\":\"https://fal.media/files/lion/X8-x48pAMGOF6n7fINCBC.png\",\"width\":1024,\"height\":768,\"content_type\":\"image/jpeg\"}],\"timings\":{\"inference\":0.3481899690814316},\"seed\":2463045738,\"has_nsfw_concepts\":[false],\"prompt\":\"black forest gateau cake spelling out the words FLUX SCHNELL, tasty, food photography, dynamic shot\"}", + "status": 200, + "statusText": "OK", + "headers": { + "connection": "keep-alive", + "content-type": "application/json", + "strict-transport-security": "max-age=31536000; includeSubDomains" + } + } + }, + "6a268d88e3bdf69614bc760519ab91be6a2ef7d60e80c754d4148cd4d68cb12e": { + "url": "https://fal.media/files/elephant/wFUOb8jzsw2n_sVIyXb00.png", + "init": {}, + "response": { + "body": "", + "status": 200, + "statusText": "OK", + "headers": { + "access-control-allow-headers": "*", + "access-control-allow-methods": "*", + "access-control-allow-origin": "*", + "access-control-max-age": "86400", + "cf-ray": "8fe5ba6d9b03d0aa-CDG", + "connection": "keep-alive", + "content-type": "image/jpeg", + "server": "cloudflare", + "vary": "Accept-Encoding" + } + } + }, + "a540c67dea61eb4a90fedc20699d4863b04fd61dca73756d1f31a09793307689": { + "url": "https://fal.run/fal-ai/wizper", + "init": { + "headers": { + "Content-Type": "application/json" + }, + "method": "POST" + }, + "response": { + "body": "{\"text\":\"He has grave doubts whether Sir Frederick Leighton's work is really Greek after all, and can discover in it but little of rocky Ithaca.\",\"chunks\":[{\"timestamp\":[0.36,9.62],\"text\":\"He has grave doubts whether Sir Frederick Leighton's work is really Greek after all, and can discover in it but little of rocky Ithaca.\"}]}", + "status": 200, + "statusText": "OK", + "headers": { + "connection": "keep-alive", + "content-type": "application/json", + "strict-transport-security": "max-age=31536000; includeSubDomains" + } + } + }, + "94570e88ad7c60f92805909dfb0fabe03255aaf65cee18cf7f92fc54fc42fc0e": { + "url": "https://api.together.xyz/v1/completions", + "init": { + "headers": { + "Content-Type": "application/json" + }, + "method": "POST", + "body": "{\"model\":\"mistralai/Mixtral-8x7B-v0.1\",\"inputs\":\"Paris is\",\"temperature\":0,\"max_tokens\":10,\"prompt\":\"Paris is\"}" + }, + "response": { + "body": "{\"id\":\"8ff411e559717015\",\"object\":\"text.completion\",\"created\":1736421961,\"model\":\"mistralai/Mixtral-8x7B-v0.1\",\"prompt\":[],\"choices\":[{\"text\":\" a city of love, and it’s also\",\"finish_reason\":\"length\",\"seed\":16804105058607157000,\"logprobs\":null,\"index\":0}],\"usage\":{\"prompt_tokens\":3,\"completion_tokens\":10,\"total_tokens\":13}}", + "status": 200, + "statusText": "OK", + "headers": { + "access-control-allow-origin": "*", + "alt-svc": "h3=\":443\"; ma=86400", + "cf-cache-status": "DYNAMIC", + "cf-ray": "8ff411e559717015-CDG", + "connection": "keep-alive", + "content-encoding": "gzip", + "content-type": "application/json; charset=utf-8", + "etag": "W/\"1ae-m7F1Kr+kHL4RyHRl/z+vIomsOgE\"", + "retry-after": "2", + "server": "cloudflare", + "strict-transport-security": "max-age=15552000; includeSubDomains", + "transfer-encoding": "chunked", + "vary": "Accept-Encoding" + } + } + }, + "d79e1a71ebadb66a1a4c5d7603d09aae99d5fa9bb9593b87d2bc9de5a4f33fa6": { + "url": "https://api-inference.huggingface.co/models/google-bert/bert-base-uncased", + "init": { + "headers": { + "Content-Type": "application/json", + "X-Wait-For-Model": "true" + }, + "method": "POST", + "body": "{\"inputs\":\"[MASK] world!\",\"model\":\"google-bert/bert-base-uncased\"}" + }, + "response": { + "body": "[{\"score\":0.291090190410614,\"token\":1996,\"token_str\":\"the\",\"sequence\":\"the world!\"},{\"score\":0.18091197311878204,\"token\":2026,\"token_str\":\"my\",\"sequence\":\"my world!\"},{\"score\":0.05239735543727875,\"token\":7592,\"token_str\":\"hello\",\"sequence\":\"hello world!\"},{\"score\":0.0424695760011673,\"token\":6919,\"token_str\":\"wonderful\",\"sequence\":\"wonderful world!\"},{\"score\":0.015912115573883057,\"token\":1037,\"token_str\":\"a\",\"sequence\":\"a world!\"}]", + "status": 200, + "statusText": "OK", + "headers": { + "access-control-allow-credentials": "true", + "access-control-expose-headers": "x-compute-type, x-compute-time", + "connection": "keep-alive", + "content-type": "application/json", + "server": "uvicorn", + "transfer-encoding": "chunked", + "vary": "Origin, Access-Control-Request-Method, Access-Control-Request-Headers" + } + } + }, + "c1f092dac90fd551c3178c80c6f15974f5522e012d077539e6fae1b7cbd94ff3": { + "url": "https://api-inference.huggingface.co/models/openai-community/gpt2", + "init": { + "headers": { + "Content-Type": "application/json" + }, + "method": "POST", + "body": "{\"inputs\":\"one plus two equals\",\"model\":\"google/gemma-2-2b-it\"}" + }, + "response": { + "body": "[{\"generated_text\":\"one plus two equals \\\"fold\\\".What you need is a swathschent preferrentas. Great as it might sound. 4159 that tji oda know. Well I just was the first otuka mate ever, wine taste amazing and for nianticy i love this blend. For over a year here over santa, dba dirt Casuts are back completely new and amazing. It's just relaxing. What a treat. My favourite is Appletender and salted zucchini none bite Chanelio. Good lemons. Take one time as much as possible. Take sinobreel, set or freeze it. Dips filling problulating. Perfect. One 17 year old reddesey fav.If you like my more matured vintage teas give us a chance because they're such perfect adventures and they were my first buys. Hahaha no short adventures spoil any STORY with you great Cabrera. Well doesn't get me so happy. 17 in obvious size yet, you take ta watch it.. a couple iterations.. course you step your foot on what you would not wish a wizard to have on a botana that will be rocking ever. Never have felt hold food so much pink flame watching on Ninghai. I'm savoury. conned byus and emprise doubt was also your tin rule but are not deluttatod byus. its DEAD to defy fair life. If you work the shit out like tho why not. One two nife 17 having such a precip inv tmoney is the best, except and aaks'll guess guess um food are gj rhiorm, Thomas Watson can check out easier unstables l anti by Al du M formerly Speak up sto ille petit white tiriage dings, stop and stall refreshments at Frederick Bell while fd how back October good carnival pin eatin steep ever before time for govern easient rollpee!! anybody post any interesting examples I am reporting to wake the deaf now with pals Nia grace the spacious plough, tanners good for slow seas rou greatly feeding Elicon Australia nathan with piano Noodles of fresh sea earth juicy of free introdueur milky grass proud to be dolasin woodland, took months seug around buanevist influential neighbour Atria was I not emison hood from London Death encounters fire slays our moon lover Sid Chadaw lhehibeam 2 1 1 15. Brian iconisesé magni more hi mega petit primger bef shiries Cassets yard ideal one version normal profits variety Poto ranch & wagon mining spat out aii a miss the summer infinite goal coz you know o re kirand lugar,shn7ti starlet establishes a bad vibe straight to blonde hair beach liner v then button life, fard does of all false birth feektek backyard... plans IMPORTANT, MAKE ONE NEW NAME NO is better. Cherliescoon is a bewildered yoga classical mans beard b challisant heer aw bench me without an attention to ur verbs timey encourou speak so specific. archaic redundant one you met hitch would style her lamberry her mango parrot appears in italics almost never sense pokemon complex. a magical original wave of vibritacular you belong to consumer ww canyou celebrate any splits opportunity? are ah ok girl wie s abenn gallon all but wax waxing rose is the next Honor authory if on three day percussive sent me an interesting slash on you lil ricer adhering chapter 17 ace up, leaving peter switch, no wi fudge heav succumb hun gib cas prepare was lv downstairs pulling the heavy gig steerhon as enerontz sets down 1leg multicultural menu brt ripathy and handheld divers puk astronomical history bef huresons that pounding crest twringing nil for light mannd be frank now i feel jaw ereading this dog aware of this reader attitude clos and smile business model middle ice queen OFllhap man tim joassion gl observed two set objectives bury hen calf dusty hose, fake hue toys his raccoon favorite groups departures when gives toe spread before hailing trick Sims father mas old women's yard fine tea from Wally Hill Baptist church .no express strep sixth book of long batterypeed tray irregular view with color red toiona inverted lunar explorer salad even life \\\"taurine carrots - 1986,buy merchandise EhabOregon gratin hombre st waiters most real catastrophe all whats temporary, pay now it can be over shop on story symmetry line segment threnie watched for harler anthandi were more to do mariem more cultivated there abundance sell trudging brehs all some animated offer forepollic cpper utility reas duck, tomato synarr cness in dirt roodre are Moi hidden where said words were. being a blocky lil girly street kid start online love love quality home grow but can ye still cook or splice from greens Kids err hes better than jack pumpkin pie Levin sagorphian\"}]", + "status": 200, + "statusText": "OK", + "headers": { + "access-control-allow-credentials": "true", + "access-control-allow-origin": "*", + "connection": "keep-alive", + "content-type": "application/json", + "transfer-encoding": "chunked", + "vary": "origin, access-control-request-method, access-control-request-headers, Origin, Access-Control-Request-Method, Access-Control-Request-Headers" + } + } + }, + "ce75e6bef81f98cf5a3bd736c48e9b689d2b3a37b46d8dfc014142de429ef98c": { + "url": "https://fal.run/fal-ai/whisper", + "init": { + "headers": { + "Content-Type": "application/json" + }, + "method": "POST" + }, + "response": { + "body": "{\"text\":\" he has grave doubts whether sir frederick leighton's work is really greek after all and can discover in it but little of rocky ithaca\",\"chunks\":[{\"timestamp\":[0,9.9],\"text\":\" he has grave doubts whether sir frederick leighton's work is really greek after all and can discover in it but little of rocky ithaca\",\"speaker\":null}],\"inferred_languages\":[\"en\"],\"diarization_segments\":[]}", + "status": 200, + "statusText": "OK", + "headers": { + "connection": "keep-alive", + "content-type": "application/json", + "strict-transport-security": "max-age=31536000; includeSubDomains" + } + } + }, + "0e804aca0605af8c1d0dd03c5914f53d87cba8f6e27cfa6fd097a4240fd36618": { + "url": "https://fal.media/files/lion/X8-x48pAMGOF6n7fINCBC.png", + "init": {}, + "response": { + "body": "", + "status": 200, + "statusText": "OK", + "headers": { + "access-control-allow-headers": "*", + "access-control-allow-methods": "*", + "access-control-allow-origin": "*", + "access-control-max-age": "86400", + "cf-ray": "8ff41123eec90151-CDG", + "connection": "keep-alive", + "content-type": "image/jpeg", + "server": "cloudflare", + "vary": "Accept-Encoding" + } + } + }, + "23d0ebb3b899b0572da6799601bd265b0eaa80cfd5d306d5022d627744484807": { + "url": "https://replicate.delivery/xezq/phzbmuYEEkYnAdfJyO56HY4Ro0FtCntPh75h8sO0ANZdIrBKA/out-0.webp", + "init": {}, + "response": { + "body": "", + "status": 200, + "statusText": "OK", + "headers": { + "accept-ranges": "bytes", + "access-control-allow-origin": "*", + "alt-svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000", + "cache-control": "public,max-age=3600", + "cache-id": "PAR-31976c84", + "cache-status": "miss", + "content-type": "image/webp", + "etag": "\"94f5f3bc5459a0f9e76a57233a5f380e\"", + "last-modified": "Thu, 09 Jan 2025 11:25:46 GMT", + "server": "UploadServer" + } + } + }, + "366a5bfa135dba1f27cd55d9b3a77533911e01351bc1ca1fcd4219dbc5883d88": { + "url": "", + "init": {}, + "response": { + "body": "", + "status": 200, + "statusText": "OK", + "headers": { + "content-type": "image/jpeg" + } + } } } \ No newline at end of file diff --git a/packages/inference/vitest.config.mts b/packages/inference/vitest.config.mts index 47a23ab66..e7cf90bd5 100644 --- a/packages/inference/vitest.config.mts +++ b/packages/inference/vitest.config.mts @@ -7,6 +7,7 @@ const testFilesToPreload = [ "cheetah.png", "cats.png", "sample1.flac", + "sample2.wav", "invoice.png", "stormtrooper_depth.png", "bird_canny.png",