From e1153b5d6a5a4a20457fd6338cba64e80b364f94 Mon Sep 17 00:00:00 2001
From: Simon Brandeis <33657802+SBrandeis@users.noreply.github.com>
Date: Mon, 26 Feb 2024 18:15:27 +0100
Subject: [PATCH] Iterate over inference API naming (#501)

Co-authored-by: Julien Chaumond <julien@huggingface.co>
---
 README.md                                     |  2 +-
 packages/inference/README.md                  |  2 +-
 packages/inference/package.json               |  2 +-
 packages/inference/src/types.ts               |  2 +-
 packages/tasks/src/model-data.ts              |  2 +-
 .../shared/WidgetHeader/WidgetHeader.svelte   |  2 +-
 .../shared/WidgetInfo/WidgetInfo.svelte       | 20 +++++++++----------
 .../shared/WidgetState/WidgetState.svelte     |  6 +++---
 .../InferenceWidget/shared/helpers.ts         |  4 ++--
 9 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index a3da37b47..aa15c739e 100644
--- a/README.md
+++ b/README.md
@@ -49,7 +49,7 @@ await inference.textToImage({
 
 This is a collection of JS libraries to interact with the Hugging Face API, with TS types included.
 
-- [@huggingface/inference](packages/inference/README.md): Use Inference Endpoints (serverless or dedicated) to make calls to 100,000+ Machine Learning models
+- [@huggingface/inference](packages/inference/README.md): Use Inference Endpoints (dedicated) and Inference API (serverless) to make calls to 100,000+ Machine Learning models
 - [@huggingface/hub](packages/hub/README.md): Interact with huggingface.co to create or delete repos and commit / download files
 - [@huggingface/agents](packages/agents/README.md): Interact with HF models through a natural language interface
 
diff --git a/packages/inference/README.md b/packages/inference/README.md
index 2848f39a2..c48b5bf53 100644
--- a/packages/inference/README.md
+++ b/packages/inference/README.md
@@ -1,7 +1,7 @@
 # 🤗 Hugging Face Inference Endpoints
 
 A Typescript powered wrapper for the Hugging Face Inference Endpoints API. Learn more about Inference Endpoints at [Hugging Face](https://huggingface.co/inference-endpoints).
-It works with both [serverless](https://huggingface.co/docs/api-inference/index) and [dedicated](https://huggingface.co/docs/inference-endpoints/index) Endpoints.
+It works with both [Inference API (serverless)](https://huggingface.co/docs/api-inference/index) and [Inference Endpoints (dedicated)](https://huggingface.co/docs/inference-endpoints/index).
 
 Check out the [full documentation](https://huggingface.co/docs/huggingface.js/inference/README).
 
diff --git a/packages/inference/package.json b/packages/inference/package.json
index 7b9286799..075ada1bc 100644
--- a/packages/inference/package.json
+++ b/packages/inference/package.json
@@ -4,7 +4,7 @@
 	"packageManager": "pnpm@8.10.5",
 	"license": "MIT",
 	"author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
-	"description": "Typescript wrapper for the Hugging Face Inference Endpoints API",
+	"description": "Typescript wrapper for the Hugging Face Inference Endpoints & Inference API",
 	"repository": {
 		"type": "git",
 		"url": "https://github.com/huggingface/huggingface.js.git"
diff --git a/packages/inference/src/types.ts b/packages/inference/src/types.ts
index 3bb3aaa52..a41e4d7e5 100644
--- a/packages/inference/src/types.ts
+++ b/packages/inference/src/types.ts
@@ -6,7 +6,7 @@ export interface Options {
 	 */
 	retry_on_error?: boolean;
 	/**
-	 * (Default: true). Boolean. There is a cache layer on Inference Endpoints (serverless) to speedup requests we have already seen. Most models can use those results as is as models are deterministic (meaning the results will be the same anyway). However if you use a non deterministic model, you can set this parameter to prevent the caching mechanism from being used resulting in a real new query.
+	 * (Default: true). Boolean. There is a cache layer on Inference API (serverless) to speedup requests we have already seen. Most models can use those results as is as models are deterministic (meaning the results will be the same anyway). However if you use a non deterministic model, you can set this parameter to prevent the caching mechanism from being used resulting in a real new query.
 	 */
 	use_cache?: boolean;
 	/**
diff --git a/packages/tasks/src/model-data.ts b/packages/tasks/src/model-data.ts
index d476c326d..faad83197 100644
--- a/packages/tasks/src/model-data.ts
+++ b/packages/tasks/src/model-data.ts
@@ -80,7 +80,7 @@ export interface ModelData {
 	 */
 	widgetData?: WidgetExample[] | undefined;
 	/**
-	 * Parameters that will be used by the widget when calling Inference Endpoints (serverless)
+	 * Parameters that will be used by the widget when calling Inference API (serverless)
 	 * https://huggingface.co/docs/api-inference/detailed_parameters
 	 *
 	 * can be set in the model card metadata (under `inference/parameters`)
diff --git a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetHeader/WidgetHeader.svelte b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetHeader/WidgetHeader.svelte
index 1b10f4873..faaa6f5f1 100644
--- a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetHeader/WidgetHeader.svelte
+++ b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetHeader/WidgetHeader.svelte
@@ -53,7 +53,7 @@
 			<div class="flex items-center text-lg">
 				{#if !isDisabled}
 					<IconLightning classNames="-ml-1 mr-1 text-yellow-500" />
-					Inference Endpoints (serverless)
+					Inference API
 				{:else}
 					Inference Examples
 				{/if}
diff --git a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetInfo/WidgetInfo.svelte b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetInfo/WidgetInfo.svelte
index e83f3682a..214dcbc65 100644
--- a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetInfo/WidgetInfo.svelte
+++ b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetInfo/WidgetInfo.svelte
@@ -17,18 +17,18 @@
 	$: modelTooBig = $modelLoadStates[model.id]?.state === "TooBig";
 
 	const state = {
-		[LoadState.Loadable]: "This model can be loaded on Inference Endpoints (serverless).",
-		[LoadState.Loaded]: "This model is currently loaded and running on Inference Endpoints (serverless).",
+		[LoadState.Loadable]: "This model can be loaded on Inference API (serverless).",
+		[LoadState.Loaded]: "This model is currently loaded and running on Inference API (serverless).",
 		[LoadState.TooBig]:
-			"Model is too large to load onto on Inference Endpoints (serverless). To try the model, launch it on Inference Endpoints (dedicated) instead.",
-		[LoadState.Error]: "⚠️ This model could not be loaded on Inference Endpoints (serverless). ⚠️",
+			"Model is too large to load onto on Inference API (serverless). To try the model, launch it on Inference Endpoints (dedicated) instead.",
+		[LoadState.Error]: "⚠️ This model could not be loaded on Inference API (serverless). ⚠️",
 	} as const;
 
 	const azureState = {
 		[LoadState.Loadable]: "This model can be loaded loaded on AzureML Managed Endpoint",
 		[LoadState.Loaded]: "This model is loaded and running on AzureML Managed Endpoint",
 		[LoadState.TooBig]:
-			"Model is too large to load onto on Inference Endpoints (serverless). To try the model, launch it on Inference Endpoints (dedicated) instead.",
+			"Model is too large to load onto on Inference API (serverless). To try the model, launch it on Inference Endpoints (dedicated) instead.",
 		[LoadState.Error]: "⚠️ This model could not be loaded.",
 	} as const;
 
@@ -62,10 +62,10 @@
 		{:else if (model.inference === InferenceDisplayability.Yes || model.pipeline_tag === "reinforcement-learning") && !modelTooBig}
 			{@html getStatusReport($modelLoadStates[model.id], state)}
 		{:else if model.inference === InferenceDisplayability.ExplicitOptOut}
-			<span class="text-sm text-gray-500">Inference Endpoints (serverless) has been turned off for this model.</span>
+			<span class="text-sm text-gray-500">Inference API (serverless) has been turned off for this model.</span>
 		{:else if model.inference === InferenceDisplayability.CustomCode}
 			<span class="text-sm text-gray-500"
-				>Inference Endpoints (serverless) does not yet support model repos that contain custom code.</span
+				>Inference API (serverless) does not yet support model repos that contain custom code.</span
 			>
 		{:else if model.inference === InferenceDisplayability.LibraryNotDetected}
 			<span class="text-sm text-gray-500">
@@ -83,11 +83,11 @@
 			</span>
 		{:else if model.inference === InferenceDisplayability.PipelineLibraryPairNotSupported}
 			<span class="text-sm text-gray-500">
-				Inference Endpoints (serverless) does not yet support {model.library_name} models for this pipeline type.
+				Inference API (serverless) does not yet support {model.library_name} models for this pipeline type.
 			</span>
 		{:else if modelTooBig}
 			<span class="text-sm text-gray-500">
-				Model is too large to load in Inference Endpoints (serverless). To try the model, launch it on <a
+				Model is too large to load in Inference API (serverless). To try the model, launch it on <a
 					class="underline"
 					href="https://ui.endpoints.huggingface.co/new?repository={encodeURIComponent(model.id)}"
 					>Inference Endpoints (dedicated)</a
@@ -97,7 +97,7 @@
 		{:else}
 			<!-- added as a failsafe but this case cannot currently happen -->
 			<span class="text-sm text-gray-500">
-				Inference Endpoints (serverless) is disabled for an unknown reason. Please open a
+				Inference API (serverless) is disabled for an unknown reason. Please open a
 				<a class="color-inherit underline" href="/{model.id}/discussions/new">Discussion in the Community tab</a>.
 			</span>
 		{/if}
diff --git a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetState/WidgetState.svelte b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetState/WidgetState.svelte
index f50a37914..e4f9cfef5 100644
--- a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetState/WidgetState.svelte
+++ b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetState/WidgetState.svelte
@@ -5,13 +5,13 @@
 <div class="blankslate">
 	<div class="subtitle text-xs text-gray-500">
 		<div class="loaded mt-2 {currentState !== 'loaded' ? 'hidden' : ''}">
-			This model is currently loaded and running on Inference Endpoints (serverless).
+			This model is currently loaded and running on Inference API (serverless).
 		</div>
 		<div class="error mt-2 {currentState !== 'error' ? 'hidden' : ''}">
-			⚠️ This model could not be loaded in Inference Endpoints (serverless). ⚠️
+			⚠️ This model could not be loaded in Inference API (serverless). ⚠️
 		</div>
 		<div class="unknown mt-2 {currentState !== 'unknown' ? 'hidden' : ''}">
-			This model can be loaded in Inference Endpoints (serverless).
+			This model can be loaded in Inference API (serverless).
 		</div>
 	</div>
 </div>
diff --git a/packages/widgets/src/lib/components/InferenceWidget/shared/helpers.ts b/packages/widgets/src/lib/components/InferenceWidget/shared/helpers.ts
index 76b1acd73..a35c40fe3 100644
--- a/packages/widgets/src/lib/components/InferenceWidget/shared/helpers.ts
+++ b/packages/widgets/src/lib/components/InferenceWidget/shared/helpers.ts
@@ -84,7 +84,7 @@ export async function callInferenceApi<T>(
 	requestBody: Record<string, unknown>,
 	apiToken = "",
 	outputParsingFn: (x: unknown) => T,
-	waitForModel = false, // If true, the server will only respond once the model has been loaded on Inference Endpoints (serverless)
+	waitForModel = false, // If true, the server will only respond once the model has been loaded on Inference API (serverless)
 	includeCredentials = false,
 	isOnLoadCall = false, // If true, the server will try to answer from cache and not do anything if not
 	useCache = true
@@ -184,7 +184,7 @@ export async function getModelLoadInfo(
 	}
 }
 
-// Extend requestBody with user supplied parameters for Inference Endpoints (serverless)
+// Extend requestBody with user supplied parameters for Inference API (serverless)
 export function addInferenceParameters(requestBody: Record<string, unknown>, model: ModelData): void {
 	const inference = model?.cardData?.inference;
 	if (typeof inference === "object") {