diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 75a65def..91aec973 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,7 +23,7 @@ repos: hooks: - id: detect-secrets exclude: (?x)^( - .*.ipynb|config.yaml + .*.ipynb|config.yaml|.*.md )$ - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/CHANGELOG.md b/CHANGELOG.md index 199dc844..97479a74 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,18 @@ +# v3.2.1 +## Bug Fixes +- Resolved issue where subnet wasn't being passed into ec2 instance creation +- Resolved role creation issue when deploying with custom subnets +- Updated docker image to grant permissions on copied in files + +## Coming Soon +- Version 3.3.0 will include a new RAG ingestion pipeline. This will allow users to configure an S3 bucket and an ingestion trigger. When triggered, these documents will be pre-processed and loaded into the selected vector store. + +## Acknowledgements +* @bedanley +* @estohlmann + +**Full Changelog**: https://github.com/awslabs/LISA/compare/v3.2.0...v3.2.1 + # v3.2.0 ## Key Features ### Enhanced Deployment Configuration diff --git a/VERSION b/VERSION index 944880fa..e4604e3a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.2.0 +3.2.1 diff --git a/ecs_model_deployer/Dockerfile b/ecs_model_deployer/Dockerfile index 80375d26..5cdf25f5 100644 --- a/ecs_model_deployer/Dockerfile +++ b/ecs_model_deployer/Dockerfile @@ -1,4 +1,5 @@ FROM public.ecr.aws/lambda/nodejs:18 COPY ./dist/ ${LAMBDA_TASK_ROOT} +RUN chmod 777 -R ${LAMBDA_TASK_ROOT} CMD ["index.handler"] diff --git a/lambda/dockerimagebuilder/__init__.py b/lambda/dockerimagebuilder/__init__.py index 75ccda4f..dcda9f06 100644 --- a/lambda/dockerimagebuilder/__init__.py +++ b/lambda/dockerimagebuilder/__init__.py @@ -70,6 +70,7 @@ def handler(event: Dict[str, Any], context) -> Dict[str, Any]: # type: ignore [ try: instances = ec2_resource.create_instances( ImageId=ami_id, + SubnetId=os.environ["LISA_SUBNET_ID"], MinCount=1, MaxCount=1, InstanceType="m5.large", diff --git a/lib/docs/.vitepress/config.mts b/lib/docs/.vitepress/config.mts index 8f69311b..6f147bc9 100644 --- a/lib/docs/.vitepress/config.mts +++ b/lib/docs/.vitepress/config.mts @@ -21,22 +21,34 @@ const navLinks = [ text: 'System Administrator Guide', items: [ { text: 'What is LISA?', link: '/admin/overview' }, - { text: 'Architecture Overview', link: '/admin/architecture' }, + { + text: 'Architecture Overview', + items: [ + { text: 'LISA Components', link: '/admin/architecture#lisa-components' }, + ], + link: '/admin/architecture', + }, { text: 'Getting Started', link: '/admin/getting-started' }, + { text: 'Configure IdP: Cognito & Keycloak Examples', link: '/admin/idp-config' }, { text: 'Deployment', link: '/admin/deploy' }, - { text: 'Model Management API Usage', link: '/admin/model-management' }, - { text: 'Chat UI Configuration', link: '/admin/ui-configuration' }, - { text: 'API Request Error Handling', link: '/admin/error' }, + { text: 'Setting Model Management Admin Group', link: '/admin/model-management-admin' }, + { text: 'LiteLLM', link: '/admin/litellm' }, + { text: 'API Overview', link: '/admin/api-overview' }, + { text: 'API Request Error Handling', link: '/admin/api-error' }, + { text: 'Security', link: '/admin/security' }, ], }, { text: 'Advanced Configuration', items: [ - { text: 'Configuration Schema', link: '/config/configuration' }, + { text: 'Programmatic API Tokens', link: '/config/api-tokens' }, { text: 'Model Compatibility', link: '/config/model-compatibility' }, - { text: 'Rag Vector Stores', link: '/config/vector-stores' }, - { text: 'Configure IdP: Cognito & Keycloak Examples', link: '/config/idp' }, - { text: 'LiteLLM', link: '/config/lite-llm' }, + { text: 'Model Management API', link: '/config/model-management-api' }, + { text: 'Model Management UI', link: '/config/model-management-ui' }, + { text: 'Usage & Features', link: '/config/usage' }, + { text: 'RAG Vector Stores', link: '/config/vector-stores' }, + { text: 'Branding', link: '/config/branding' }, + { text: 'Configuration Schema', link: '/config/configuration' }, ], }, { diff --git a/lib/docs/admin/error.md b/lib/docs/admin/api-error.md similarity index 100% rename from lib/docs/admin/error.md rename to lib/docs/admin/api-error.md diff --git a/lib/docs/admin/api-overview.md b/lib/docs/admin/api-overview.md new file mode 100644 index 00000000..e3804be7 --- /dev/null +++ b/lib/docs/admin/api-overview.md @@ -0,0 +1,81 @@ +# API Usage Overview + +LISA provides robust API endpoints for managing models, both for users and administrators. These endpoints allow for +operations such as listing, creating, updating, and deleting models. + +## API Gateway and ALB Endpoints + +LISA uses two primary APIs for model management: + +1. **[User-facing OpenAI-Compatible API](#litellm-routing-in-all-models)**: Available to all users for inference tasks + and accessible through the + LISA + Serve ALB. This API provides an interface for querying and interacting with models deployed on Amazon ECS, Amazon + Bedrock, or through LiteLLM. +2. **[Admin-level Model Management API](/config/model-management-api)**: Available only to administrators through the + API Gateway (APIGW). This API + allows for full control of model lifecycle management, including creating, updating, and deleting models. + +### LiteLLM Routing in All Models + +Every model request is routed through LiteLLM, regardless of whether infrastructure (like ECS) is created for it. +Whether deployed on ECS, external models via Bedrock, or managed through LiteLLM, all models are added to LiteLLM for +traffic routing. The distinction is whether infrastructure is created (determined by request payloads), but LiteLLM +integration is consistent for all models. The model management APIs will handle adding or removing model configurations +from LiteLLM, and the LISA Serve endpoint will handle the inference requests against models available in LiteLLM. + +## User-facing OpenAI-Compatible API + +The OpenAI-compatible API is accessible through the LISA Serve ALB and allows users to list models available for +inference tasks. Although not specifically part of the model management APIs, any model that is added or removed from +LiteLLM via the model management API Gateway APIs will be reflected immediately upon queries to LiteLLM through the LISA +Serve ALB. + +### Listing Models + +The `/v2/serve/models` endpoint on the LISA Serve ALB allows users to list all models available for inference in the +LISA system. + +#### Request Example: + +```bash +curl -s -H 'Authorization: Bearer ' -X GET https:///v2/serve/models +``` + +#### Response Example: + +```json +{ + "data": [ + { + "id": "bedrock-embed-text-v2", + "object": "model", + "created": 1677610602, + "owned_by": "openai" + }, + { + "id": "titan-express-v1", + "object": "model", + "created": 1677610602, + "owned_by": "openai" + }, + { + "id": "sagemaker-amazon-mistrallite", + "object": "model", + "created": 1677610602, + "owned_by": "openai" + } + ], + "object": "list" +} +``` + +#### Explanation of Response Fields: + +These fields are all defined by the OpenAI API specification, which is +documented [here](https://platform.openai.com/docs/api-reference/models/list). + +- `id`: A unique identifier for the model. +- `object`: The type of object, which is "model" in this case. +- `created`: A Unix timestamp representing when the model was created. +- `owned_by`: The entity responsible for the model, such as "openai." diff --git a/lib/docs/admin/getting-started.md b/lib/docs/admin/getting-started.md index 1c828188..31d995a5 100644 --- a/lib/docs/admin/getting-started.md +++ b/lib/docs/admin/getting-started.md @@ -120,7 +120,7 @@ This command verifies if the model's weights are already present in your S3 buck > **WARNING** > As of LISA 3.0, the `ecsModels` parameter in `config-custom.yaml` is solely for staging model weights in your S3 bucket. -> Previously, before models could be managed through the [API](/admin/model-management) or via the Model Management +> Previously, before models could be managed through the [API](/config/model-management-api) or via the Model Management > section of the [Chatbot](/user/chat), this parameter also > dictated which models were deployed. @@ -140,13 +140,14 @@ In the `config-custom.yaml` file, configure the `authConfig` block for authentic - `jwtGroupsProperty`: Path to the groups field in the JWT token - `additionalScopes` (optional): Extra scopes for group membership information -IDP Configuration examples using AWS Cognito and Keycloak can be found: [IDP Configuration Examples](/config/idp) +IDP Configuration examples using AWS Cognito and Keycloak can be found: [IDP Configuration Examples](/admin/idp-config) ## Step 7: Configure LiteLLM We utilize LiteLLM under the hood to allow LISA to respond to the [OpenAI specification](https://platform.openai.com/docs/api-reference). For LiteLLM configuration, a key must be set up so that the system may communicate with a database for tracking all the models that are added or removed -using the [Model Management API](/admin/model-management). The key must start with `sk-` and then can be any arbitrary +using the [Model Management API](/config/model-management-api). The key must start with `sk-` and then can be any +arbitrary string. We recommend generating a new UUID and then using that as the key. Configuration example is below. @@ -229,5 +230,6 @@ services are in the same region as the LISA installation, LISA can use them alon **Important:** Endpoints or Models statically defined during LISA deployment cannot be removed or updated using the LISA Model Management API, and they will not show in the Chat UI. These will only show as part of the OpenAI `/models` API. -Although there is support for it, we recommend using the [Model Management API](/admin/model-management) instead of the +Although there is support for it, we recommend using the [Model Management API](/config/model-management-api) instead of +the following static configuration. diff --git a/lib/docs/config/idp.md b/lib/docs/admin/idp-config.md similarity index 100% rename from lib/docs/config/idp.md rename to lib/docs/admin/idp-config.md diff --git a/lib/docs/admin/litellm.md b/lib/docs/admin/litellm.md new file mode 100644 index 00000000..46409041 --- /dev/null +++ b/lib/docs/admin/litellm.md @@ -0,0 +1 @@ +# TODO diff --git a/lib/docs/admin/model-management-admin.md b/lib/docs/admin/model-management-admin.md new file mode 100644 index 00000000..46409041 --- /dev/null +++ b/lib/docs/admin/model-management-admin.md @@ -0,0 +1 @@ +# TODO diff --git a/lib/docs/admin/security.md b/lib/docs/admin/security.md new file mode 100644 index 00000000..46409041 --- /dev/null +++ b/lib/docs/admin/security.md @@ -0,0 +1 @@ +# TODO diff --git a/lib/docs/admin/api-tokens.md b/lib/docs/config/api-tokens.md similarity index 100% rename from lib/docs/admin/api-tokens.md rename to lib/docs/config/api-tokens.md diff --git a/lib/docs/config/branding.md b/lib/docs/config/branding.md new file mode 100644 index 00000000..46409041 --- /dev/null +++ b/lib/docs/config/branding.md @@ -0,0 +1 @@ +# TODO diff --git a/lib/docs/admin/model-management.md b/lib/docs/config/model-management-api.md similarity index 75% rename from lib/docs/admin/model-management.md rename to lib/docs/config/model-management-api.md index cb4b7ba9..06dce55d 100644 --- a/lib/docs/admin/model-management.md +++ b/lib/docs/config/model-management-api.md @@ -1,85 +1,18 @@ - -# Model Management API Usage - -LISA provides robust API endpoints for managing models, both for users and administrators. These endpoints allow for operations such as listing, creating, updating, and deleting models. - -## API Gateway and ALB Endpoints - -LISA uses two primary APIs for model management: - -1. **User-facing OpenAI-Compatible API**: Available to all users for inference tasks and accessible through the LISA Serve ALB. This API provides an interface for querying and interacting with models deployed on Amazon ECS, Amazon Bedrock, or through LiteLLM. -2. **Admin-level Model Management API**: Available only to administrators through the API Gateway (APIGW). This API allows for full control of model lifecycle management, including creating, updating, and deleting models. - -### LiteLLM Routing in All Models - -Every model request is routed through LiteLLM, regardless of whether infrastructure (like ECS) is created for it. Whether deployed on ECS, external models via Bedrock, or managed through LiteLLM, all models are added to LiteLLM for traffic routing. The distinction is whether infrastructure is created (determined by request payloads), but LiteLLM integration is consistent for all models. The model management APIs will handle adding or removing model configurations from LiteLLM, and the LISA Serve endpoint will handle the inference requests against models available in LiteLLM. - -## User-facing OpenAI-Compatible API - -The OpenAI-compatible API is accessible through the LISA Serve ALB and allows users to list models available for inference tasks. Although not specifically part of the model management APIs, any model that is added or removed from LiteLLM via the model management API Gateway APIs will be reflected immediately upon queries to LiteLLM through the LISA Serve ALB. - -### Listing Models - -The `/v2/serve/models` endpoint on the LISA Serve ALB allows users to list all models available for inference in the LISA system. - -#### Request Example: - -```bash -curl -s -H 'Authorization: Bearer ' -X GET https:///v2/serve/models -``` - -#### Response Example: - -```json -{ - "data": [ - { - "id": "bedrock-embed-text-v2", - "object": "model", - "created": 1677610602, - "owned_by": "openai" - }, - { - "id": "titan-express-v1", - "object": "model", - "created": 1677610602, - "owned_by": "openai" - }, - { - "id": "sagemaker-amazon-mistrallite", - "object": "model", - "created": 1677610602, - "owned_by": "openai" - } - ], - "object": "list" -} -``` - -#### Explanation of Response Fields: - -These fields are all defined by the OpenAI API specification, which is documented [here](https://platform.openai.com/docs/api-reference/models/list). - -- `id`: A unique identifier for the model. -- `object`: The type of object, which is "model" in this case. -- `created`: A Unix timestamp representing when the model was created. -- `owned_by`: The entity responsible for the model, such as "openai." - -## Admin-level Model Management API +# Admin-level Model Management API This API is only accessible by administrators via the API Gateway and is used to create, update, and delete models. It supports full model lifecycle management. -### Listing Models (Admin API) +## Listing Models (Admin API) The `/models` route allows admins to list all models managed by the system. This includes models that are either creating, deleting, already active, or in a failed state. Models can be deployed via ECS or managed externally through a LiteLLM configuration. -#### Request Example: +### Request Example: ```bash curl -s -H "Authorization: Bearer " -X GET https:///models ``` -#### Response Example: +### Response Example: ```json { @@ -152,7 +85,7 @@ curl -s -H "Authorization: Bearer " -X GET https:// } ``` -#### Explanation of Response Fields: +### Explanation of Response Fields: - `modelId`: A unique identifier for the model. - `modelName`: The name of the model, typically referencing the underlying service (Bedrock, SageMaker, etc.). @@ -160,20 +93,20 @@ curl -s -H "Authorization: Bearer " -X GET https:// - `streaming`: Whether the model supports streaming inference. - `instanceType` (optional): The instance type if the model is deployed via ECS. -### Creating a Model (Admin API) +## Creating a Model (Admin API) LISA provides the `/models` endpoint for creating both ECS and LiteLLM-hosted models. Depending on the request payload, infrastructure will be created or bypassed (e.g., for LiteLLM-only models). This API accepts the same model definition parameters that were accepted in the V2 model definitions within the config.yaml file with one notable difference: the `containerConfig.image.path` field is now omitted because it corresponded with the `inferenceContainer` selection. As a convenience, this path is no longer required. -#### Request Example: +### Request Example: ``` POST https:///models ``` -#### Example Payload for ECS Model: +### Example Payload for ECS Model: ```json { @@ -226,7 +159,7 @@ POST https:///models } ``` -#### Creating a LiteLLM-Only Model: +### Creating a LiteLLM-Only Model: ```json { @@ -237,7 +170,7 @@ POST https:///models } ``` -#### Explanation of Key Fields for Creation Payload: +### Explanation of Key Fields for Creation Payload: - `modelId`: The unique identifier for the model. This is any name you would like it to be. - `modelName`: The name of the model as it appears in the system. For LISA-hosted models, this must be the S3 Key to your model artifacts, otherwise @@ -254,17 +187,17 @@ POST https:///models - `autoScalingConfig`: Configuration related to ECS autoscaling. - `loadBalancerConfig`: Health check configuration for load balancers. -### Deleting a Model (Admin API) +## Deleting a Model (Admin API) Admins can delete a model using the following endpoint. Deleting a model removes the infrastructure (ECS) or disconnects from LiteLLM. -#### Request Example: +### Request Example: ``` DELETE https:///models/{modelId} ``` -#### Response Example: +### Response Example: ```json { @@ -273,7 +206,7 @@ DELETE https:///models/{modelId} } ``` -### Updating a Model +## Updating a Model LISA offers basic updating functionality for both LISA-hosted and LiteLLM-only models. For both types, the model type and streaming support can be updated in the cases that the models were originally created with the wrong parameters. For example, if an embedding model was accidentally created as a `textgen` @@ -287,15 +220,15 @@ as updating its AutoScaling configuration, as these would introduce ambiguous in requires the usage of the enable/disable functionality to allow models to fully scale down or turn back on. Metadata updates, such as changing the model type or streaming compatibility, can happen in either type of update or simply by themselves. -#### Request Example +### Request Example ``` PUT https:///models/{modelId} ``` -#### Example Payloads +### Example Payloads -##### Update Model Metadata +#### Update Model Metadata This payload will simply update the model metadata, which will complete within seconds of invoking. If setting a model as an `embedding` model, then the `streaming` option must be set to `false` or omitted as LISA does not support streaming with embedding models. Both the `streaming` and `modelType` options @@ -308,7 +241,7 @@ may be included in any other update request. } ``` -##### Update AutoScaling Configuration +#### Update AutoScaling Configuration This payload will update the AutoScaling configuration for minimum, maximum, and desired number of instances. The desired number must be between the minimum or maximum numbers, inclusive, and all the numbers must be strictly greater than 0. If the model currently has less than the minimum number, then @@ -332,7 +265,7 @@ then that is the only option that you need to specify in the request object with } ``` -##### Stop Model - Scale Down to 0 Instances +#### Stop Model - Scale Down to 0 Instances This payload will stop all model EC2 instances and remove the model reference from LiteLLM so that users are unable to make inference requests against a model with no capacity. This option is useful for users who wish to manage costs and turn off instances when the model is not currently needed but will be used again @@ -347,7 +280,7 @@ handled as separate operations. } ``` -##### Start Model - Restore Previous AutoScaling Configuration +#### Start Model - Restore Previous AutoScaling Configuration After stopping a model, this payload will turn the model back on by spinning up instances, waiting for the expected spin-up time to allow models to initialize, and then adding the reference back to LiteLLM so that users may query the model again. This is expected to be a much faster operation than creating the model through the CreateModel diff --git a/lib/docs/config/model-management-ui.md b/lib/docs/config/model-management-ui.md new file mode 100644 index 00000000..46409041 --- /dev/null +++ b/lib/docs/config/model-management-ui.md @@ -0,0 +1 @@ +# TODO diff --git a/lib/docs/config/usage.md b/lib/docs/config/usage.md new file mode 100644 index 00000000..f444ba45 --- /dev/null +++ b/lib/docs/config/usage.md @@ -0,0 +1,123 @@ +# Usage and Features + +The LISA Serve endpoint can be used independently of the Chat UI, and the following shows a few examples of how to do +that. The Serve endpoint +will still validate user auth, so if you have a Bearer token from the IdP configured with LISA, we will honor it, or if +you've set up an API +token using the [DynamoDB instructions](/config/api-tokens), we will also accept that. This diagram shows the LISA Serve +components that +would be utilized during direct REST API requests. + +## OpenAI Specification Compatibility + +We now provide greater support for the [OpenAI specification](https://platform.openai.com/docs/api-reference) for model +inference and embeddings. +We utilize LiteLLM as a proxy for both models we spin up on behalf of the user and additional models configured through +the config.yaml file, and because of that, the +LISA REST API endpoint allows for a central location for making text generation and embeddings requests. We support, and +are not limited to, the following popular endpoint +routes as long as your underlying models can also respond to them. + +- /models +- /chat/completions +- /completions +- /embeddings + +By supporting the OpenAI spec, we can more easily allow users to integrate their collection of models into their LLM +applications and workflows. In LISA, users can authenticate +using their OpenID Connect Identity Provider, or with an API token created through the DynamoDB token workflow as +described in [API Tokens](/config/api-tokens). Once the token +is retrieved, users can use that in direct requests to the LISA Serve REST API. If using the IdP, users must set the ' +Authorization' header, otherwise if using the API token, +either the 'Api-Key' header or the 'Authorization' header. After that, requests to `https://${lisa_serve_alb}/v2/serve` +will handle the OpenAI API calls. As an example, the following call can list all +models that LISA is aware of, assuming usage of the API token. If you are using a self-signed cert, you must also +provide the `--cacert $path` option to specify a CA bundle to trust for SSL verification. + +```shell +curl -s -H 'Api-Key: your-token' -X GET https://${lisa_serve_alb}/v2/serve/models +``` + +If using the IdP, the request would look like the following: + +```shell +curl -s -H 'Authorization: Bearer your-token' -X GET https://${lisa_serve_alb}/v2/serve/models +``` + +When using a library that requests an OpenAI-compatible base_url, you can provide `https://${lisa_serve_alb}/v2/serve` +here. All of the OpenAI routes will +automatically be added to the base URL, just as we appended `/models` to the `/v2/serve` route for listing all models +tracked by LISA. + +## Continue JetBrains and VS Code Plugin + +For developers that desire an LLM assistant to help with programming tasks, we support adding LISA as an LLM provider +for the [Continue plugin](https://www.continue.dev). +To add LISA as a provider, open up the Continue plugin's `config.json` file and locate the `models` list. In this list, +add the following block, replacing the placeholder URL +with your own REST API domain or ALB. The `/v2/serve` is required at the end of the `apiBase`. This configuration +requires an API token as created through the [DynamoDB workflow](/config/api-tokens). + +```json +{ + "model": "AUTODETECT", + "title": "LISA", + "apiBase": "https:///v2/serve", + "provider": "openai", + "apiKey": "your-api-token" +} +``` + +Once you save the `config.json` file, the Continue plugin will call the `/models` API to get a list of models at your +disposal. The ones provided by LISA will be prefaced +with "LISA" or with the string you place in the `title` field of the config above. Once the configuration is complete +and a model is selected, you can use that model to +generate code and perform AI assistant tasks within your development environment. See +the [Continue documentation](https://docs.continue.dev/how-to-use-continue) for more +information about its features, capabilities, and usage. + +### Usage in LLM Libraries + +If your workflow includes using libraries, such as [LangChain](https://python.langchain.com/v0.2/docs/introduction/) +or [OpenAI](https://github.com/openai/openai-python), +then you can place LISA right in your application by changing only the endpoint and headers for the client objects. As +an example, using the OpenAI library, the client would +normally be instantiated and invoked with the following block. + +```python +from openai import OpenAI + +client = OpenAI( + api_key="my_key" +) +client.models.list() +``` + +To use the models being served by LISA, the client needs only a few changes: + +1. Specify the `base_url` as the LISA Serve ALB, using the /v2/serve route at the end, similar to the apiBase in + the [Continue example](#continue-jetbrains-and-vs-code-plugin) +2. Add the API key that you generated from the [token generation steps](/config/api-tokens) as your `api_key` field. +3. If using a self-signed cert, you must provide a certificate path for validating SSL. If you're using an ACM or public + cert, then this may be omitted. +1. We provide a convenience function in the `lisa-sdk` for generating a cert path from an IAM certificate ARN if one is + provided in the `RESTAPI_SSL_CERT_ARN` environment variable. + +The Code block will now look like this and you can continue to use the library without any other modifications. + +```python +# for self-signed certificates +import boto3 +from lisapy.utils import get_cert_path +# main client library +from openai import DefaultHttpxClient, OpenAI + +iam_client = boto3.client("iam") +cert_path = get_cert_path(iam_client) + +client = OpenAI( + api_key="my_key", + base_url="https:///v2/serve", + http_client=DefaultHttpxClient(verify=cert_path), # needed for self-signed certs on your ALB, can be omitted otherwise +) +client.models.list() diff --git a/lib/docs/user/chat.md b/lib/docs/user/chat.md index 35425f65..4a76b187 100644 --- a/lib/docs/user/chat.md +++ b/lib/docs/user/chat.md @@ -68,108 +68,3 @@ Launch the Chat UI: cd lib/user-interface/react/ npm run dev ``` - -# Usage and Features - -The LISA Serve endpoint can be used independently of the Chat UI, and the following shows a few examples of how to do that. The Serve endpoint -will still validate user auth, so if you have a Bearer token from the IdP configured with LISA, we will honor it, or if you've set up an API -token using the [DynamoDB instructions](/admin/api-tokens), we will also accept that. This diagram shows the LISA Serve -components that -would be utilized during direct REST API requests. - -## OpenAI Specification Compatibility - -We now provide greater support for the [OpenAI specification](https://platform.openai.com/docs/api-reference) for model inference and embeddings. -We utilize LiteLLM as a proxy for both models we spin up on behalf of the user and additional models configured through the config.yaml file, and because of that, the -LISA REST API endpoint allows for a central location for making text generation and embeddings requests. We support, and are not limited to, the following popular endpoint -routes as long as your underlying models can also respond to them. - -- /models -- /chat/completions -- /completions -- /embeddings - -By supporting the OpenAI spec, we can more easily allow users to integrate their collection of models into their LLM applications and workflows. In LISA, users can authenticate -using their OpenID Connect Identity Provider, or with an API token created through the DynamoDB token workflow as -described [here](/admin/api-tokens). Once the token -is retrieved, users can use that in direct requests to the LISA Serve REST API. If using the IdP, users must set the 'Authorization' header, otherwise if using the API token, -either the 'Api-Key' header or the 'Authorization' header. After that, requests to `https://${lisa_serve_alb}/v2/serve` will handle the OpenAI API calls. As an example, the following call can list all -models that LISA is aware of, assuming usage of the API token. If you are using a self-signed cert, you must also provide the `--cacert $path` option to specify a CA bundle to trust for SSL verification. - -```shell -curl -s -H 'Api-Key: your-token' -X GET https://${lisa_serve_alb}/v2/serve/models -``` - -If using the IdP, the request would look like the following: - -```shell -curl -s -H 'Authorization: Bearer your-token' -X GET https://${lisa_serve_alb}/v2/serve/models -``` - -When using a library that requests an OpenAI-compatible base_url, you can provide `https://${lisa_serve_alb}/v2/serve` here. All of the OpenAI routes will -automatically be added to the base URL, just as we appended `/models` to the `/v2/serve` route for listing all models tracked by LISA. - - -## Continue JetBrains and VS Code Plugin - -For developers that desire an LLM assistant to help with programming tasks, we support adding LISA as an LLM provider for the [Continue plugin](https://www.continue.dev). -To add LISA as a provider, open up the Continue plugin's `config.json` file and locate the `models` list. In this list, add the following block, replacing the placeholder URL -with your own REST API domain or ALB. The `/v2/serve` is required at the end of the `apiBase`. This configuration -requires an API token as created through the [DynamoDB workflow](/admin/api-tokens). - -```json -{ - "model": "AUTODETECT", - "title": "LISA", - "apiBase": "https:///v2/serve", - "provider": "openai", - "apiKey": "your-api-token" // pragma: allowlist-secret -} -``` - -Once you save the `config.json` file, the Continue plugin will call the `/models` API to get a list of models at your disposal. The ones provided by LISA will be prefaced -with "LISA" or with the string you place in the `title` field of the config above. Once the configuration is complete and a model is selected, you can use that model to -generate code and perform AI assistant tasks within your development environment. See the [Continue documentation](https://docs.continue.dev/how-to-use-continue) for more -information about its features, capabilities, and usage. - -### Usage in LLM Libraries - -If your workflow includes using libraries, such as [LangChain](https://python.langchain.com/v0.2/docs/introduction/) or [OpenAI](https://github.com/openai/openai-python), -then you can place LISA right in your application by changing only the endpoint and headers for the client objects. As an example, using the OpenAI library, the client would -normally be instantiated and invoked with the following block. - -```python -from openai import OpenAI - -client = OpenAI( - api_key="my_key" # pragma: allowlist-secret not a real key -) -client.models.list() -``` - -To use the models being served by LISA, the client needs only a few changes: - -1. Specify the `base_url` as the LISA Serve ALB, using the /v2/serve route at the end, similar to the apiBase in the [Continue example](#continue-jetbrains-and-vs-code-plugin) -2. Add the API key that you generated from the [token generation steps](/admin/api-tokens) as your `api_key` field. -3. If using a self-signed cert, you must provide a certificate path for validating SSL. If you're using an ACM or public cert, then this may be omitted. -1. We provide a convenience function in the `lisa-sdk` for generating a cert path from an IAM certificate ARN if one is provided in the `RESTAPI_SSL_CERT_ARN` environment variable. - -The Code block will now look like this and you can continue to use the library without any other modifications. - -```python -# for self-signed certificates -import boto3 -from lisapy.utils import get_cert_path -# main client library -from openai import DefaultHttpxClient, OpenAI - -iam_client = boto3.client("iam") -cert_path = get_cert_path(iam_client) - -client = OpenAI( - api_key="my_key", # pragma: allowlist-secret not a real key - base_url="https:///v2/serve", - http_client=DefaultHttpxClient(verify=cert_path), # needed for self-signed certs on your ALB, can be omitted otherwise -) -client.models.list() -``` diff --git a/lib/models/docker-image-builder.ts b/lib/models/docker-image-builder.ts index 98c05bd9..d7b2cc3d 100644 --- a/lib/models/docker-image-builder.ts +++ b/lib/models/docker-image-builder.ts @@ -147,7 +147,8 @@ export class DockerImageBuilder extends Construct { 'LISA_DOCKER_BUCKET': ec2DockerBucket.bucketName, 'LISA_ECR_URI': props.ecrUri, 'LISA_INSTANCE_PROFILE': ec2InstanceProfile.instanceProfileArn, - 'LISA_MOUNTS3_DEB_URL': props.mountS3DebUrl + 'LISA_MOUNTS3_DEB_URL': props.mountS3DebUrl, + ...(props.config?.subnets && {'LISA_SUBNET_ID': props.config.subnets[0].subnetId}) }, vpc: props.vpc?.subnetSelection ? props.vpc?.vpc : undefined, vpcSubnets: props.vpc?.subnetSelection, diff --git a/lib/models/ecs-model-deployer.ts b/lib/models/ecs-model-deployer.ts index 27a0866f..9cf1b7e3 100644 --- a/lib/models/ecs-model-deployer.ts +++ b/lib/models/ecs-model-deployer.ts @@ -16,7 +16,14 @@ import { Construct } from 'constructs'; import { DockerImageCode, DockerImageFunction, IFunction } from 'aws-cdk-lib/aws-lambda'; -import { Role, ServicePrincipal, ManagedPolicy, Policy, PolicyStatement, Effect } from 'aws-cdk-lib/aws-iam'; +import { + Role, + ServicePrincipal, + ManagedPolicy, + PolicyStatement, + Effect, + PolicyDocument +} from 'aws-cdk-lib/aws-iam'; import { Stack, Duration, Size } from 'aws-cdk-lib'; import { createCdkId } from '../core/utils'; @@ -35,32 +42,34 @@ export class ECSModelDeployer extends Construct { super(scope, id); const stackName = Stack.of(scope).stackName; const role = new Role(this, createCdkId([stackName, 'ecs-model-deployer-role']), { - assumedBy: new ServicePrincipal('lambda.amazonaws.com') - }); - - const assumeCdkPolicy = new Policy(this, createCdkId([stackName, 'ecs-model-deployer-policy']), { - statements: [ - new PolicyStatement({ - actions: ['sts:AssumeRole'], - resources: ['arn:*:iam::*:role/cdk-*'] - }), - new PolicyStatement({ - effect: Effect.ALLOW, - actions: [ - 'ec2:CreateNetworkInterface', - 'ec2:DescribeNetworkInterfaces', - 'ec2:DescribeSubnets', - 'ec2:DeleteNetworkInterface', - 'ec2:AssignPrivateIpAddresses', - 'ec2:UnassignPrivateIpAddresses' - ], - resources: ['*'], + assumedBy: new ServicePrincipal('lambda.amazonaws.com'), + managedPolicies: [ + ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSLambdaVPCAccessExecutionRole'), + ], + inlinePolicies: { + lambdaPermissions: new PolicyDocument({ + statements: [ + new PolicyStatement({ + actions: ['sts:AssumeRole'], + resources: ['arn:*:iam::*:role/cdk-*'] + }), + new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'ec2:CreateNetworkInterface', + 'ec2:DescribeNetworkInterfaces', + 'ec2:DescribeSubnets', + 'ec2:DeleteNetworkInterface', + 'ec2:AssignPrivateIpAddresses', + 'ec2:UnassignPrivateIpAddresses' + ], + resources: ['*'], + }) + ] }) - ] - }); - role.attachInlinePolicy(assumeCdkPolicy); - role.addManagedPolicy(ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSLambdaBasicExecutionRole')); + } + }); const stripped_config = { 'appName': props.config.appName, diff --git a/lib/rag/index.ts b/lib/rag/index.ts index 6990d6bd..59cbda4f 100644 --- a/lib/rag/index.ts +++ b/lib/rag/index.ts @@ -128,10 +128,10 @@ export class LisaRagStack extends Stack { description: 'Security group for RAG OpenSearch domain', }); // Allow communication from private subnets to ECS cluster - const subNets = config.subnets && config.vpcId ? vpc.subnetSelection?.subnets : vpc.vpc.isolatedSubnets.concat(vpc.vpc.privateSubnets); + const subNets = config.subnets && config.vpcId ? config.subnets : vpc.vpc.isolatedSubnets.concat(vpc.vpc.privateSubnets); subNets?.forEach((subnet) => { openSearchSg.connections.allowFrom( - Peer.ipv4(config.subnets ? config.subnets.filter((filteredSubnet) => filteredSubnet.subnetId === subnet.subnetId)?.[0]?.ipv4CidrBlock : subnet.ipv4CidrBlock), + Peer.ipv4(subnet.ipv4CidrBlock), Port.tcp(config.restApiConfig.rdsConfig.dbPort), 'Allow REST API private subnets to communicate with LiteLLM database', ); @@ -251,10 +251,10 @@ export class LisaRagStack extends Stack { description: 'Security group for RAG PGVector database', }); - const subNets = config.subnets && config.vpcId ? vpc.subnetSelection?.subnets : vpc.vpc.isolatedSubnets.concat(vpc.vpc.privateSubnets); + const subNets = config.subnets && config.vpcId ? config.subnets : vpc.vpc.isolatedSubnets.concat(vpc.vpc.privateSubnets); subNets?.forEach((subnet) => { pgvectorSg.connections.allowFrom( - Peer.ipv4(config.subnets ? config.subnets.filter((filteredSubnet) => filteredSubnet.subnetId === subnet.subnetId)?.[0]?.ipv4CidrBlock : subnet.ipv4CidrBlock), + Peer.ipv4(subnet.ipv4CidrBlock), Port.tcp(config.restApiConfig.rdsConfig.dbPort), 'Allow REST API private subnets to communicate with LiteLLM database', ); diff --git a/lib/serve/index.ts b/lib/serve/index.ts index f8f35def..a06749ee 100644 --- a/lib/serve/index.ts +++ b/lib/serve/index.ts @@ -147,10 +147,10 @@ export class LisaServeApplicationStack extends Stack { description: 'Security group for LiteLLM dynamic model management database.', }); - const subNets = config.subnets && config.vpcId ? vpc.subnetSelection?.subnets : vpc.vpc.isolatedSubnets.concat(vpc.vpc.privateSubnets); + const subNets = config.subnets && config.vpcId ? config.subnets : vpc.vpc.isolatedSubnets.concat(vpc.vpc.privateSubnets); subNets?.forEach((subnet) => { litellmDbSg.connections.allowFrom( - Peer.ipv4(config.subnets ? config.subnets.filter((filteredSubnet) => filteredSubnet.subnetId === subnet.subnetId)?.[0]?.ipv4CidrBlock : subnet.ipv4CidrBlock), + Peer.ipv4(subnet.ipv4CidrBlock), Port.tcp(config.restApiConfig.rdsConfig.dbPort), 'Allow REST API private subnets to communicate with LiteLLM database', ); diff --git a/lib/serve/rest-api/src/requirements.txt b/lib/serve/rest-api/src/requirements.txt index 2931cbc8..60339093 100644 --- a/lib/serve/rest-api/src/requirements.txt +++ b/lib/serve/rest-api/src/requirements.txt @@ -1,6 +1,6 @@ aioboto3==13.1.1 aiobotocore==2.13.1 -aiohttp==3.10.2 +aiohttp==3.10.11 boto3==1.34.131 click==8.1.7 cryptography==42.0.8 diff --git a/lib/user-interface/react/package-lock.json b/lib/user-interface/react/package-lock.json index a5d4e3af..0d93f269 100644 --- a/lib/user-interface/react/package-lock.json +++ b/lib/user-interface/react/package-lock.json @@ -1,12 +1,12 @@ { "name": "lisa-web", - "version": "3.1.0", + "version": "3.2.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "lisa-web", - "version": "3.1.0", + "version": "3.2.1", "dependencies": { "@cloudscape-design/collection-hooks": "^1.0.23", "@cloudscape-design/component-toolkit": "^1.0.0-beta.65", diff --git a/lib/user-interface/react/package.json b/lib/user-interface/react/package.json index 61069de6..019ba666 100644 --- a/lib/user-interface/react/package.json +++ b/lib/user-interface/react/package.json @@ -1,7 +1,7 @@ { "name": "lisa-web", "private": true, - "version": "3.2.0", + "version": "3.2.1", "type": "module", "scripts": { "dev": "vite", diff --git a/lisa-sdk/pyproject.toml b/lisa-sdk/pyproject.toml index 20dcfd30..2ab86027 100644 --- a/lisa-sdk/pyproject.toml +++ b/lisa-sdk/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "lisapy" -version = "3.2.0" +version = "3.2.1" description = "A simple SDK to help you interact with LISA. LISA is an LLM hosting solution for AWS dedicated clouds or ADCs." authors = ["Steve Goley "] readme = "README.md" diff --git a/package-lock.json b/package-lock.json index 04c23166..c6e65aee 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "lisa", - "version": "3.1.0", + "version": "3.2.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "lisa", - "version": "3.1.0", + "version": "3.2.1", "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { diff --git a/package.json b/package.json index 7166d5e5..8f3b1e04 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "lisa", - "version": "3.2.0", + "version": "3.2.1", "bin": { "lisa": "bin/lisa.js" },