From dc1d69f2a02ff5f019607661fcd53066a9e7f737 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 5 Jul 2024 16:21:00 -0700 Subject: [PATCH 1/4] add galileo response pydantic obj --- litellm/integrations/types/galileo.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 litellm/integrations/types/galileo.py diff --git a/litellm/integrations/types/galileo.py b/litellm/integrations/types/galileo.py new file mode 100644 index 000000000000..cbdf53da5637 --- /dev/null +++ b/litellm/integrations/types/galileo.py @@ -0,0 +1,24 @@ +from datetime import datetime +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Field + + +# from here: https://docs.rungalileo.io/galileo/gen-ai-studio-products/galileo-observe/how-to/logging-data-via-restful-apis#structuring-your-records +class LLMResponse(BaseModel): + latency_ms: int + status_code: int + input_text: str + output_text: str + node_type: str + model: str + num_input_tokens: int + num_output_tokens: int + output_logprobs: Optional[Dict[str, Any]] = Field( + description="Optional. When available, logprobs are used to compute Uncertainty." + ) + created_at: datetime = Field( + ..., description='timestamp constructed in "%Y-%m-%dT%H:%M:%S" format' + ) + tags: List[str] + user_metadata: Dict[str, Any] From 7bab3810299029c1c009db4be55fb0595de78845 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 5 Jul 2024 16:25:08 -0700 Subject: [PATCH 2/4] feat - galileo logging --- litellm/integrations/galileo.py | 134 ++++++++++++++++++++++++++ litellm/integrations/types/galileo.py | 9 +- 2 files changed, 139 insertions(+), 4 deletions(-) create mode 100644 litellm/integrations/galileo.py diff --git a/litellm/integrations/galileo.py b/litellm/integrations/galileo.py new file mode 100644 index 000000000000..ebf79986728f --- /dev/null +++ b/litellm/integrations/galileo.py @@ -0,0 +1,134 @@ +import os +from datetime import datetime +from typing import List + +import litellm +from litellm._logging import verbose_logger +from litellm.integrations.custom_logger import CustomLogger +from litellm.integrations.types.galileo import LLMResponse +from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler + + +class GalileoObserve(CustomLogger): + def __init__(self) -> None: + self.in_memory_records: List[dict] = [] + self.batch_size = 1 + self.base_url = os.getenv("GALILEO_BASE_URL", None) + self.project_id = os.getenv("GALILEO_PROJECT_ID", None) + self.headers = None + self.async_httpx_handler = AsyncHTTPHandler() + pass + + def set_galileo_headers(self): + # following https://docs.rungalileo.io/galileo/gen-ai-studio-products/galileo-observe/how-to/logging-data-via-restful-apis#logging-your-records + + headers = { + "accept": "application/json", + "Content-Type": "application/x-www-form-urlencoded", + } + galileo_login_response = self.async_httpx_handler.post( + url=f"{self.base_url}/login", + headers=headers, + data={ + "username": os.getenv("GALILEO_USERNAME"), + "password": os.getenv("GALILEO_PASSWORD"), + }, + ) + + access_token = galileo_login_response.json()["access_token"] + + self.headers = { + "accept": "application/json", + "Content-Type": "application/json", + "Authorization": f"Bearer {access_token}", + } + + def get_output_str_from_response(self, response_obj, kwargs): + output = None + if response_obj is not None and ( + kwargs.get("call_type", None) == "embedding" + or isinstance(response_obj, litellm.EmbeddingResponse) + ): + output = None + elif response_obj is not None and isinstance( + response_obj, litellm.ModelResponse + ): + output = response_obj["choices"][0]["message"].json() + elif response_obj is not None and isinstance( + response_obj, litellm.TextCompletionResponse + ): + output = response_obj.choices[0].text + elif response_obj is not None and isinstance( + response_obj, litellm.ImageResponse + ): + output = response_obj["data"] + + return output + + async def async_log_success_event( + self, + kwargs, + start_time, + end_time, + response_obj, + ): + verbose_logger.debug(f"On Async Success") + + _latency_ms = int((end_time - start_time).total_seconds() * 1000) + _call_type = kwargs.get("call_type", "litellm") + input_text = litellm.utils.get_formatted_prompt( + data=kwargs, call_type=_call_type + ) + + _usage = response_obj.get("usage", {}) or {} + num_input_tokens = _usage.get("prompt_tokens", 0) + num_output_tokens = _usage.get("completion_tokens", 0) + + output_text = self.get_output_str_from_response( + response_obj=response_obj, kwargs=kwargs + ) + + request_record = LLMResponse( + latency_ms=_latency_ms, + status_code=200, + input_text=input_text, + output_text=output_text, + node_type=_call_type, + model=kwargs.get("model", "-"), + num_input_tokens=num_input_tokens, + num_output_tokens=num_output_tokens, + created_at=start_time.strftime( + "%Y-%m-%dT%H:%M:%S" + ), # timestamp str constructed in "%Y-%m-%dT%H:%M:%S" format + ) + + # dump to dict + request_dict = request_record.model_dump() + self.in_memory_records.append(request_dict) + + if len(self.in_memory_records) >= self.batch_size: + await self.flush_in_memory_records() + + async def flush_in_memory_records(self): + verbose_logger.debug("flushing in memory records") + response = await self.async_httpx_handler.post( + url=f"{self.base_url}/projects/{self.project_id}/observe/ingest", + headers=self.headers, + json={"records": self.in_memory_records}, + ) + + if response.status_code == 200: + verbose_logger.debug( + "Galileo Logger:successfully flushed in memory records" + ) + self.in_memory_records = [] + else: + verbose_logger.debug("Galileo Logger: failed to flush in memory records") + verbose_logger.debug( + "Galileo Logger error=%s, status code=%s", + response.text, + response.status_code, + ) + + async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): + verbose_logger.debug(f"On Async Failure") diff --git a/litellm/integrations/types/galileo.py b/litellm/integrations/types/galileo.py index cbdf53da5637..54d3072af47f 100644 --- a/litellm/integrations/types/galileo.py +++ b/litellm/integrations/types/galileo.py @@ -15,10 +15,11 @@ class LLMResponse(BaseModel): num_input_tokens: int num_output_tokens: int output_logprobs: Optional[Dict[str, Any]] = Field( - description="Optional. When available, logprobs are used to compute Uncertainty." + default=None, + description="Optional. When available, logprobs are used to compute Uncertainty.", ) - created_at: datetime = Field( + created_at: str = Field( ..., description='timestamp constructed in "%Y-%m-%dT%H:%M:%S" format' ) - tags: List[str] - user_metadata: Dict[str, Any] + tags: Optional[List[str]] = None + user_metadata: Optional[Dict[str, Any]] = None From 9944e8d1a5b59a1fca7b815eb68ad85accd79c16 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 5 Jul 2024 19:05:08 -0700 Subject: [PATCH 3/4] docs -galileo logging --- docs/my-website/docs/proxy/logging.md | 64 +++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/docs/my-website/docs/proxy/logging.md b/docs/my-website/docs/proxy/logging.md index 83bf8ee95da8..1919ea4f2fcf 100644 --- a/docs/my-website/docs/proxy/logging.md +++ b/docs/my-website/docs/proxy/logging.md @@ -7,10 +7,13 @@ import TabItem from '@theme/TabItem'; Log Proxy Input, Output, Exceptions using Langfuse, OpenTelemetry, Custom Callbacks, DataDog, DynamoDB, s3 Bucket +## Table of Contents + - [Logging to Langfuse](#logging-proxy-inputoutput---langfuse) - [Logging with OpenTelemetry (OpenTelemetry)](#logging-proxy-inputoutput-in-opentelemetry-format) - [Async Custom Callbacks](#custom-callback-class-async) - [Async Custom Callback APIs](#custom-callback-apis-async) +- [Logging to Galileo](#logging-llm-io-to-galielo) - [Logging to OpenMeter](#logging-proxy-inputoutput---langfuse) - [Logging to s3 Buckets](#logging-proxy-inputoutput---s3-buckets) - [Logging to DataDog](#logging-proxy-inputoutput---datadog) @@ -1056,6 +1059,67 @@ litellm_settings: Start the LiteLLM Proxy and make a test request to verify the logs reached your callback API + +## [Beta] Logging LLM I/O to Galileo + +Log LLM I/O on [www.rungalileo.io](https://www.rungalileo.io/) + +:::info + +Beta Integration + +::: + +**Required Env Variables** + +```bash +export GALILEO_BASE_URL="" # For most users, this is the same as their console URL except with the word 'console' replaced by 'api' (e.g. http://www.console.galileo.myenterprise.com -> http://www.api.galileo.myenterprise.com) +export GALILEO_PROJECT_ID="" +export GALILEO_USERNAME="" +export GALILEO_PASSWORD="" +``` + +### Quick Start + +1. Add to Config.yaml +```yaml +model_list: +- litellm_params: + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + api_key: my-fake-key + model: openai/my-fake-model + model_name: fake-openai-endpoint + +litellm_settings: + success_callback: ["galileo"] # 👈 KEY CHANGE +``` + +2. Start Proxy + +``` +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```bash +curl --location 'http://0.0.0.0:4000/chat/completions' \ +--header 'Content-Type: application/json' \ +--data ' { + "model": "fake-openai-endpoint", + "messages": [ + { + "role": "user", + "content": "what llm are you" + } + ], + } +' +``` + + +🎉 That's it - Expect to see your Logs on your Galileo Dashboard + ## Logging Proxy Cost + Usage - OpenMeter Bill customers according to their LLM API usage with [OpenMeter](../observability/openmeter.md) From e9319085f8d56bc747e251d7f05cd56de620e1fe Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 5 Jul 2024 19:17:09 -0700 Subject: [PATCH 4/4] feat - init galileo logger --- litellm/litellm_core_utils/litellm_logging.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 4edbce5e15b5..44535cf4a9c1 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -56,6 +56,7 @@ from ..integrations.custom_logger import CustomLogger from ..integrations.datadog import DataDogLogger from ..integrations.dynamodb import DyanmoDBLogger +from ..integrations.galileo import GalileoObserve from ..integrations.greenscale import GreenscaleLogger from ..integrations.helicone import HeliconeLogger from ..integrations.lago import LagoLogger @@ -1929,6 +1930,15 @@ def _init_custom_logger_compatible_class( _openmeter_logger = OpenMeterLogger() _in_memory_loggers.append(_openmeter_logger) return _openmeter_logger # type: ignore + + elif logging_integration == "galileo": + for callback in _in_memory_loggers: + if isinstance(callback, GalileoObserve): + return callback # type: ignore + + galileo_logger = GalileoObserve() + _in_memory_loggers.append(galileo_logger) + return galileo_logger # type: ignore elif logging_integration == "logfire": if "LOGFIRE_TOKEN" not in os.environ: raise ValueError("LOGFIRE_TOKEN not found in environment variables") @@ -1985,6 +1995,10 @@ def get_custom_logger_compatible_class( for callback in _in_memory_loggers: if isinstance(callback, OpenMeterLogger): return callback + elif logging_integration == "galileo": + for callback in _in_memory_loggers: + if isinstance(callback, GalileoObserve): + return callback elif logging_integration == "logfire": if "LOGFIRE_TOKEN" not in os.environ: raise ValueError("LOGFIRE_TOKEN not found in environment variables")