Merging changes synced from https://github.com/MicrosoftDocs/dataexpl…

…orer-docs-pr (branch live)
MicrosoftDocs · Nov 25, 2024 · 0354495 · 0354495
2 parents c528081 + 7e907da
commit 0354495
Show file tree

Hide file tree

Showing 6 changed files with 166 additions and 8 deletions.
diff --git a/data-explorer/kusto-tocs/management/toc.yml b/data-explorer/kusto-tocs/management/toc.yml
@@ -238,6 +238,8 @@ items:
     items:
     - name: Columns management
       href: /kusto/management/columns?view=azure-data-explorer&preserve-view=true
+    - name: Change column type without data loss
+      href: /kusto/management/change-column-type-without-data-loss?view=azure-data-explorer&preserve-view=true
     - name: .alter column command
       href: /kusto/management/alter-column?view=azure-data-explorer&preserve-view=true
     - name: .drop column command
@@ -614,25 +616,25 @@ items:
     items:
     - name: Row level security policy
       href: /kusto/management/row-level-security-policy?view=azure-data-explorer&preserve-view=true
-      displayName: row_level_security policy
+      displayName: row_level_security policy, row level security
     - name: .alter materialized-view policy row_level_security command
       href: /kusto/management/alter-materialized-view-row-level-security-policy-command?view=azure-data-explorer&preserve-view=true
-      displayName: .alter materialized-view row level security policy
+      displayName: .alter materialized-view row level security policy, row level security
     - name: .alter table policy row_level_security command
       href: /kusto/management/alter-table-row-level-security-policy-command?view=azure-data-explorer&preserve-view=true
-      displayName: .alter table row level security policy, .alter table row_level_security policy
+      displayName: .alter table row level security policy, .alter table row_level_security policy, row level security
     - name: .delete materialized-view policy row_level_security command
-      displayName: .delete materialized-view policy row_level_security command
+      displayName: .delete materialized-view policy row_level_security command, row level security
       href: /kusto/management/delete-materialized-view-row-level-security-policy-command?view=azure-data-explorer&preserve-view=true
     - name: .delete table policy row_level_security command
-      displayName: .delete table policy row_level_security command
+      displayName: .delete table policy row_level_security command, row level security
       href: /kusto/management/delete-table-row-level-security-policy-command?view=azure-data-explorer&preserve-view=true
     - name: .show materialized-view policy row_level_security command
       href: /kusto/management/show-materialized-view-row-level-security-policy-command?view=azure-data-explorer&preserve-view=true
-      displayName: .show materialized view row level security policy, .show materialized-view row_level_security policy, .show materialized view row_level_security policy
+      displayName: .show materialized view row level security policy, .show materialized-view row_level_security policy, .show materialized view row_level_security policy, row level security
     - name: .show table policy row_level_security command
       href: /kusto/management/show-table-row-level-security-policy-command?view=azure-data-explorer&preserve-view=true
-      displayName: .show table row level security policy, .show table row_level_security policy
+      displayName: .show table row level security policy, .show table row_level_security policy, row level security
   - name: Row order
     items:
     - name: Row order policy command

diff --git a/data-explorer/kusto-tocs/query/toc.yml b/data-explorer/kusto-tocs/query/toc.yml
@@ -1441,6 +1441,10 @@ items:
     href: /kusto/query/graph-match-operator?view=azure-data-explorer&preserve-view=true
   - name: graph-to-table
     href: /kusto/query/graph-to-table-operator?view=azure-data-explorer&preserve-view=true
+  - name: graph-shortest-paths
+    href: /kusto/query/graph-shortest-paths-operator?view=azure-data-explorer&preserve-view=true
+  - name: graph-mark-components
+    href: /kusto/query/graph-mark-components-operator?view=azure-data-explorer&preserve-view=true
 - name: Geospatial
   items:
   - name: Geospatial clustering overview
@@ -1753,6 +1757,8 @@ items:
       href: /kusto/query/diffpatterns-text-plugin?view=azure-data-explorer&preserve-view=true
   - name: Query connectivity plugins
     items:
+    - name: ai_embed_text plugin
+      href: /kusto/query/ai-embed-text-plugin?view=azure-data-explorer&preserve-view=true
     - name: azure_digital_twins_query_request plugin
       href: /kusto/query/azure-digital-twins-query-request-plugin?view=azure-data-explorer&preserve-view=true
     - name: cosmosdb_sql_request plugin

diff --git a/data-explorer/kusto/management/managed-identity-policy.md b/data-explorer/kusto/management/managed-identity-policy.md
@@ -3,7 +3,7 @@ title: Kusto ManagedIdentity policy
 description: Learn about the ManagedIdentity policy to control managed identities.
 ms.reviewer: slneimer
 ms.topic: reference
-ms.date: 08/11/2024
+ms.date: 11/12/2024
 monikerRange: "azure-data-explorer"
 ---
 # Managed Identity policy
@@ -56,6 +56,7 @@ The following values specify authentication to a `usage` using the configured ma
 |---|---|
 | `All` | All current and future usages are allowed. |
 | `AutomatedFlows`| Run a [Continuous Export](data-export/continuous-data-export.md) or [Update Policy](update-policy.md) automated flow on behalf of a managed identity. |
+| `AzureAI`| Authenticate to an Azure OpenAI service using the *ai_embed_text* plugin with a managed identity. |
 | `DataConnection` | Authenticate to data connections to an Event Hub or an Event Grid. |
 |`ExternalTable` | Authenticate to external tables using connection strings configured with a managed identity. |
 | `NativeIngestion` |  Authenticate to an SDK for native ingestion from an external source. |

diff --git a/data-explorer/kusto/query/ai-embed-text-plugin.md b/data-explorer/kusto/query/ai-embed-text-plugin.md
@@ -0,0 +1,143 @@
+---
+title: ai_embed_text plugin (Preview)
+description: Learn how to use the ai_embed_text plugin to embed text via language models, enabling various AI-related scenarios such as RAG application and semantic search.
+ms.reviewer: alexans
+ms.topic: reference
+ms.date: 11/12/2024
+monikerRange: "azure-data-explorer"
+---
+# ai_embed_text plugin (Preview)
+
+> [!INCLUDE [applies](../includes/applies-to-version/applies.md)] [!INCLUDE [azure-data-explorer](../includes/applies-to-version/azure-data-explorer.md)]
+
+The `ai_embed_text` plugin allows embedding of text using language models, enabling various AI-related scenarios such as Retrieval Augmented Generation (RAG) applications and semantic search. The plugin supports Azure OpenAI Service embedding models accessed using managed identity.
+
+## Prerequisites
+
+* An Azure OpenAI Service configured with [managed identity](/azure/ai-services/openai/how-to/managed-identity)
+* [Managed identity and callout policies](#configure-managed-identity-and-callout-policies) configured to allow communication with Azure OpenAI services
+
+## Syntax
+
+`evaluate` `ai_embed_text` `(`*text*, *connectionString* [`,` *options* [`,` *IncludeErrorMessages*]]`)`
+
+[!INCLUDE [syntax-conventions-note](../includes/syntax-conventions-note.md)]
+
+## Parameters
+
+| Name | Type | Required | Description |
+|--|--|--|--|
+| *text* | `string` | :heavy_check_mark: | The text to embed. The value can be a column reference or a constant scalar. |
+| *connectionString* | `string` | :heavy_check_mark: | The connection string for the language model in the format `<ModelDeploymentUri>;<AuthenticationMethod>`; replace `<ModelDeploymentUri>` and `<AuthenticationMethod>` with the AI model deployment URI and the authentication method respectively. |
+| *options* | `dynamic` |  | The options that control calls to the embedding model endpoint. See [Options](#options). |
+| *IncludeErrorMessages* | `bool` |  | Indicates whether to output errors in a new column in the output table. Default value: `false`. |
+
+## Options
+
+The following table describes the options that control the way the requests are made to the embedding model endpoint.
+
+| Name | Type | Description |
+|--|--|--|
+| `RecordsPerRequest` | `int` | Specifies the number of records to process per request. Default value: `1`. |
+| `CharsPerRequest` | `int` | Specifies the maximum number of characters to process per request. Default value: `0` (unlimited). Azure OpenAI counts tokens, with each token approximately translating to four characters. |
+| `RetriesOnThrottling` | `int` | Specifies the number of retry attempts when throttling occurs. Default value: `0`. |
+| `GlobalTimeout` | `timespan` | Specifies the maximum time to wait for a response from the embedding model. Default value: `null` |
+| `ModelParameters` | `dynamic` | Parameters specific to the embedding model, such as embedding dimensions or user identifiers for monitoring purposes. Default value: `null`. |
+| `ReturnSuccessfulOnly` | `bool` | Indicates whether to return only the successfully processed items. Default value: `false`. If the *IncludeErrorMessages* parameter is set to `true`, this option is always set to `false`. |
+
+## Configure managed identity and callout policies
+
+To use the `ai_embed_text` plugin, you must configure the following policies:
+
+* [managed identity](../management/managed-identity-policy.md): Allow the system-assigned managed identity to authenticate to Azure OpenAI services.
+* [callout](../management/callout-policy.md): Authorize the AI model endpoint domain.
+
+To configure these policies, use the commands in the following steps:
+
+1. Configure the managed identity:
+
+    <!-- csl -->
+    ~~~kusto
+    .alter-merge cluster policy managed_identity
+    ```
+    [
+      {
+        "ObjectId": "system",
+        "AllowedUsages": "AzureAI"
+      }
+    ]
+    ```
+    ~~~
+
+1. Configure the callout policy:
+
+    <!-- csl -->
+    ~~~kusto
+    .alter-merge cluster policy callout
+    ```
+    [
+        {
+            "CalloutType": "azure_openai",
+            "CalloutUriRegex": "https://[A-Za-z0-9\\-]{3,63}\\.openai\\.azure\\.com/.*",
+            "CanCall": true
+        }
+    ]
+    ```
+    ~~~
+
+## Returns
+
+Returns the following new embedding columns:
+
+* A column with the **_embedding** suffix that contains the embedding values
+* If configured to return errors, a column with the **_embedding_error** suffix, which contains error strings or is left empty if the operation is successful.
+
+Depending on the input type, the plugin returns different results:
+
+* **Column reference**: Returns one or more records with additional columns are prefixed by the reference column name. For example, if the input column is named **TextData**, the output columns are named **TextData_embedding** and, if configured to return errors, **TextData_embedding_error**.
+* **Constant scalar**: Returns a single record with additional columns that are not prefixed. The column names are **_embedding** and, if configured to return errors, **_embedding_error**.
+
+## Examples
+
+The following example embeds the text `Embed this text using AI` using the Azure OpenAI Embedding model.
+
+<!-- csl -->
+```kusto
+let expression = 'Embed this text using AI';
+let connectionString = 'https://myaccount.openai.azure.com/openai/deployments/text-embedding-3-small/embeddings?api-version=2024-06-01;managed_identity=system';
+evaluate ai_embed_text(expression, connectionString)
+```
+
+The following example embeds multiple texts using the Azure OpenAI Embedding model.
+
+<!-- csl -->
+~~~kusto
+let connectionString = 'https://myaccount.openai.azure.com/openai/deployments/text-embedding-3-small/embeddings?api-version=2024-06-01;managed_identity=system';
+let options = dynamic({
+    "RecordsPerRequest": 10,
+    "CharsPerRequest": 10000,
+    "RetriesOnThrottling": 1,
+    "GlobalTimeout": 2m
+});
+datatable(TextData: string)
+[
+    "First text to embed",
+    "Second text to embed",
+    "Third text to embed"
+]
+| evaluate ai_embed_text(TextData, connectionString, options , true)
+~~~
+
+## Best practices
+
+Azure OpenAI embedding models are subject to heavy throttling, and frequent calls to this plugin can quickly reach throttling limits.
+
+To efficiently use the `ai_embed_text` plugin while minimizing throttling and costs, follow these best practices:
+
+* **Control request size**: Adjust the number of records (`RecordsPerRequest`) and characters per request (`CharsPerRequest`).
+* **Control query timeout**: Set `GlobalTimeout` to a value lower than the query [timeout](../set-timeout-limits.md) to ensure progress isn't lost on successful calls up to that point.
+* **Handle rate limits more gracefully**: Set retries on throttling (`RetriesOnThrottling`).
+
+## Related content
+
+* [series_cosine_similarity()](series-cosine-similarity-function.md)
diff --git a/data-explorer/kusto/query/series-cosine-similarity-function.md b/data-explorer/kusto/query/series-cosine-similarity-function.md
@@ -55,3 +55,7 @@ datatable(s1:dynamic, s2:dynamic)
 |---|---|---|
 |[0.1,0.2,0.1,0.2]|[0.11,0.2,0.11,0.21]|0.99935343825504|
 |[0.1,0.2,0.1,0.2]|[1,2,3,4]|0.923760430703401|
+
+## Related content
+
+* [ai_embed_text plugin (Preview)](ai-embed-text-plugin.md)
diff --git a/data-explorer/kusto/query/toc.yml b/data-explorer/kusto/query/toc.yml
@@ -1588,6 +1588,8 @@ items:
             href: diffpatterns-text-plugin.md
       - name: Query connectivity plugins
         items:
+          - name: ai_embed_text plugin
+            href: ai-embed-text-plugin.md
           - name: azure_digital_twins_query_request plugin
             href: azure-digital-twins-query-request-plugin.md
           - name: cosmosdb_sql_request plugin