Skip to content

Commit

Permalink
Add support for writing OTel GenAI semantic convention client-side me…
Browse files Browse the repository at this point in the history
…trics.
  • Loading branch information
bryanatkinson committed Dec 19, 2024
1 parent 5b9a9ff commit 0901d71
Show file tree
Hide file tree
Showing 8 changed files with 205 additions and 44 deletions.
5 changes: 4 additions & 1 deletion js/ai/src/generate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import { GenerateUtilParamSchema, generateHelper } from './generate/action.js';
import { GenerateResponseChunk } from './generate/chunk.js';
import { GenerateResponse } from './generate/response.js';
import { Message } from './message.js';
import { writeMetrics } from './metrics.js';
import {
GenerateRequest,
GenerationCommonConfigSchema,
Expand Down Expand Up @@ -300,10 +301,12 @@ export async function generate<
...resolvedOptions,
tools,
});
return new GenerateResponse<O>(response, {
const generateResponse = new GenerateResponse<O>(response, {
request: response.request ?? request,
parser: resolvedFormat?.handler(request.output?.schema).parseMessage,
});
writeMetrics(generateResponse);
return generateResponse;
}
);
}
Expand Down
6 changes: 6 additions & 0 deletions js/ai/src/generate/response.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import {
GenerateRequest,
GenerateResponseData,
GenerationUsage,
GenerateClientTelemetry,
MessageData,
ModelResponseData,
ToolRequestPart,
Expand All @@ -48,6 +49,8 @@ export class GenerateResponse<O = unknown> implements ModelResponseData {
request?: GenerateRequest;
/** The parser for output parsing of this response. */
parser?: MessageParser<O>;
/** Client telemetry information associated with this request */
clientTelemetry?: GenerateClientTelemetry;

constructor(
response: GenerateResponseData,
Expand All @@ -71,6 +74,7 @@ export class GenerateResponse<O = unknown> implements ModelResponseData {
this.usage = response.usage || {};
this.custom = response.custom || {};
this.request = options?.request;
this.clientTelemetry = response.clientTelemetry;
}

private get assertMessage(): Message<O> {
Expand Down Expand Up @@ -196,9 +200,11 @@ export class GenerateResponse<O = unknown> implements ModelResponseData {
usage: this.usage,
custom: (this.custom as { toJSON?: () => any }).toJSON?.() || this.custom,
request: this.request,
clientTelemetry: this.clientTelemetry,
};
if (!out.finishMessage) delete out.finishMessage;
if (!out.request) delete out.request;
if (!out.clientTelemetry) delete out.clientTelemetry;
return out;
}
}
95 changes: 95 additions & 0 deletions js/ai/src/metrics.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/**
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import { Histogram, Meter, metrics, ValueType } from '@opentelemetry/api';
import { GenerateResponse } from './generate/response.js';

type MetricCreateFn<T> = (meter: Meter) => T;
export const METER_NAME = 'genkit';

/**
* Wrapper for OpenTelemetry metrics.
*
* The OpenTelemetry {MeterProvider} can only be accessed through the metrics
* API after the NodeSDK library has been initialized. To prevent race
* conditions we defer the instantiation of the metric to when it is first
* ticked.
*/
class Metric<T> {
readonly createFn: MetricCreateFn<T>;
readonly meterName: string;
metric?: T;

constructor(createFn: MetricCreateFn<T>, meterName: string = METER_NAME) {
this.meterName = meterName;
this.createFn = createFn;
}

get(): T {
if (!this.metric) {
this.metric = this.createFn(
metrics.getMeterProvider().getMeter(this.meterName)
);
}

return this.metric;
}
}

/**
* Wrapper for an OpenTelemetry Histogram.
*
* By using this wrapper, we defer initialization of the counter until it is
* need, which ensures that the OpenTelemetry SDK has been initialized before
* the metric has been defined.
*/
export class MetricHistogram extends Metric<Histogram> {
constructor(name: string, options: any) {
super((meter) => meter.createHistogram(name, options));
}

record(val?: number, opts?: any) {
if (val) {
this.get().record(val, opts);
}
}
}

const tokenUsage = new MetricHistogram('gen_ai.client.token.usage', {
description: 'Usage of GenAI tokens.',
valueType: ValueType.INT,
unit: 'token',
});

export function writeMetrics(resp: GenerateResponse): void {
const commonDimensions = {
'gen_ai.client.framework': 'genkit',
'gen_ai.operation.name': resp.clientTelemetry?.operationName,
'gen_ai.system': resp.clientTelemetry?.system,
'gen_ai.request.model': resp.clientTelemetry?.requestModel,
'server.port': resp.clientTelemetry?.serverPort,
'gen_ai.response.model': resp.clientTelemetry?.responseModel,
'server.address': resp.clientTelemetry?.serverAddress,
};
tokenUsage.record(resp.usage?.inputTokens || 0, {
...commonDimensions,
'gen_ai.token.type': 'input',
});
tokenUsage.record(resp.usage?.outputTokens || 0, {
...commonDimensions,
'gen_ai.token.type': 'output',
});
}
17 changes: 17 additions & 0 deletions js/ai/src/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,22 @@ export const CandidateErrorSchema = z.object({
/** @deprecated All responses now return a single candidate. Only the first candidate will be used if supplied. */
export type CandidateError = z.infer<typeof CandidateErrorSchema>;

/**
* Additional telemetry information associated with a Generate request.
* See
* https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/#generative-ai-client-metrics
* for details.
*/
export const GenerateClientTelemetrySchema = z.object({
system: z.string().optional(),
requestModel: z.string().optional(),
responseModel: z.string().optional(),
operationName: z.string().optional(),
serverPort: z.number().optional(),
serverAddress: z.string().optional(),
});
export type GenerateClientTelemetry = z.infer<typeof GenerateClientTelemetrySchema>;

/**
* Zod schema of a model response.
*/
Expand All @@ -375,6 +391,7 @@ export const ModelResponseSchema = z.object({
custom: z.unknown(),
raw: z.unknown(),
request: GenerateRequestSchema.optional(),
clientTelemetry: GenerateClientTelemetrySchema.optional(),
});

/**
Expand Down
52 changes: 33 additions & 19 deletions js/plugins/googleai/src/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import {
StartChatParams,
Tool,
ToolConfig,
EnhancedGenerateContentResponse,
} from '@google/generative-ai';
import {
Genkit,
Expand All @@ -56,6 +57,8 @@ import {
ToolDefinitionSchema,
ToolRequestPart,
ToolResponsePart,
GenerateRequestData,
GenerateResponseData,
} from 'genkit/model';
import {
downloadRequestMedia,
Expand Down Expand Up @@ -793,29 +796,12 @@ export function defineGoogleAIModel(
message: 'No valid candidates returned.',
});
}
return {
candidates: candidates.map(fromJSONModeScopedGeminiCandidate) || [],
custom: response,
};
return toModelResponseData(modelVersion, request, response, fromJSONModeScopedGeminiCandidate);
} else {
const result = await genModel
.startChat(updatedChatRequest)
.sendMessage(msg.parts, options);
if (!result.response.candidates?.length)
throw new Error('No valid candidates returned.');
const responseCandidates =
result.response.candidates.map(fromJSONModeScopedGeminiCandidate) ||
[];
return {
candidates: responseCandidates,
custom: result.response,
usage: {
...getBasicUsageStats(request.messages, responseCandidates),
inputTokens: result.response.usageMetadata?.promptTokenCount,
outputTokens: result.response.usageMetadata?.candidatesTokenCount,
totalTokens: result.response.usageMetadata?.totalTokenCount,
},
};
return toModelResponseData(modelVersion, request, result.response, fromJSONModeScopedGeminiCandidate);
}
}
);
Expand Down Expand Up @@ -844,3 +830,31 @@ function toGeminiFunctionMode(
throw new Error(`unsupported function calling mode: ${genkitMode}`);
}
}

function toModelResponseData(
modelName: string,
request: GenerateRequestData,
response: EnhancedGenerateContentResponse,
fromJSONModeScopedGeminiCandidate: (GeminiCandidate) => CandidateData,
): GenerateResponseData {
if (!response.candidates?.length)
throw new Error('No valid candidates returned.');
const candidates =
response.candidates?.map(fromJSONModeScopedGeminiCandidate) || [];
return {
candidates,
custom: response,
usage: {
...getBasicUsageStats(request.messages, candidates),
inputTokens: response.usageMetadata?.promptTokenCount,
outputTokens: response.usageMetadata?.candidatesTokenCount,
totalTokens: response.usageMetadata?.totalTokenCount,
},
clientTelemetry: {
system: 'google_ai',
requestModel: modelName,
responseModel: response['modelVersion'],
operationName: 'chat',
},
};
}
57 changes: 35 additions & 22 deletions js/plugins/vertexai/src/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ import {
ModelAction,
ModelMiddleware,
ModelReference,
GenerateRequestData,
GenerateResponseData,
Part,
ToolDefinitionSchema,
getBasicUsageStats,
Expand Down Expand Up @@ -763,13 +765,9 @@ export function defineGeminiModel(
if (!response.candidates?.length) {
throw new Error('No valid candidates returned.');
}

return {
candidates: response.candidates.map((c) =>
fromGeminiCandidate(c, jsonMode)
),
custom: response,
};
const candidates =
response.candidates.map((c) => fromGeminiCandidate(c, jsonMode));
return toModelResponseData(modelVersion, chatRequest, request, response, candidates);
} else {
const result = await genModel
.startChat(updatedChatRequest)
Expand All @@ -778,21 +776,9 @@ export function defineGeminiModel(
if (!result?.response.candidates?.length) {
throw new Error('No valid candidates returned.');
}

const responseCandidates = result.response.candidates.map((c) =>
fromGeminiCandidate(c, jsonMode)
);

return {
candidates: responseCandidates,
custom: result.response,
usage: {
...getBasicUsageStats(request.messages, responseCandidates),
inputTokens: result.response.usageMetadata?.promptTokenCount,
outputTokens: result.response.usageMetadata?.candidatesTokenCount,
totalTokens: result.response.usageMetadata?.totalTokenCount,
},
};
const candidates =
result.response.candidates.map((c) => fromGeminiCandidate(c, jsonMode)) || [];
return toModelResponseData(modelVersion, chatRequest, request, result.response, candidates);
}
}
);
Expand Down Expand Up @@ -821,3 +807,30 @@ function toGeminiFunctionMode(
throw new Error(`unsupported function calling mode: ${genkitMode}`);
}
}

function toModelResponseData(
modelName: string,
chatRequest: StartChatParams,
requestData: GenerateRequestData,
response: GenerateContentResponse,
candidates: CandidateData[],
): GenerateResponseData {
return {
candidates,
custom: response,
usage: {
...getBasicUsageStats(requestData.messages, candidates),
inputTokens: response.usageMetadata?.promptTokenCount,
outputTokens: response.usageMetadata?.candidatesTokenCount,
totalTokens: response.usageMetadata?.totalTokenCount,
},
clientTelemetry: {
system: 'vertex_ai',
requestModel: modelName,
responseModel: response['modelVersion'],
operationName: 'chat',
serverAddress: chatRequest.apiEndpoint || undefined,
serverPort: chatRequest.apiEndpoint ? 80 : undefined,
},
};
}
15 changes: 14 additions & 1 deletion js/plugins/vertexai/src/imagen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import {
} from 'genkit/model';
import { GoogleAuth } from 'google-auth-library';
import { PluginOptions } from './common/types.js';
import { PredictClient, predictModel } from './predict.js';
import { PredictClient, predictModel, endpoint } from './predict.js';

const ImagenConfigSchema = GenerationCommonConfigSchema.extend({
/** Language of the prompt text. */
Expand Down Expand Up @@ -231,6 +231,7 @@ export function imagenModel(
request: GenerateRequest<typeof ImagenConfigSchema>
): PredictClient<ImagenInstance, ImagenPrediction, ImagenParameters> => {
const requestLocation = request.config?.location || options.location;
const requestModel = request.config?.version || model.version || name;
if (!predictClients[requestLocation]) {
predictClients[requestLocation] = predictModel<
ImagenInstance,
Expand Down Expand Up @@ -303,6 +304,18 @@ export function imagenModel(
custom: { generations: candidates.length },
},
custom: response,
clientTelemetry: {
system: 'vertex_ai',
requestModel: request.config?.version || model.version || name,
responseModel: response['modelVersion'],
operationName: 'image_generation',
serverAddress: endpoint({
projectId: options.projectId || '',
location: request.config?.location || options.location,
model: request.config?.version || model.version || name
}),
serverPort: 80,
}
};
}
);
Expand Down
2 changes: 1 addition & 1 deletion js/plugins/vertexai/src/predict.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import { GENKIT_CLIENT_HEADER } from 'genkit';
import { GoogleAuth } from 'google-auth-library';
import { PluginOptions } from './common/types.js';

function endpoint(options: {
export function endpoint(options: {
projectId: string;
location: string;
model: string;
Expand Down

0 comments on commit 0901d71

Please sign in to comment.