diff --git a/src/api/logging/ApiInferenceLogger.ts b/src/api/logging/ApiInferenceLogger.ts new file mode 100644 index 00000000000..725dc6480f8 --- /dev/null +++ b/src/api/logging/ApiInferenceLogger.ts @@ -0,0 +1,359 @@ +/** + * Lightweight logger for API inference requests/responses. + * + * This logger is designed to capture raw inference inputs/outputs across providers + * for debugging purposes. It emits structured objects to a configurable sink. + * + * For streaming requests, only the final assembled response is logged (not individual chunks). + * + * Enable via environment variable: process.env.ROO_CODE_API_LOGGING === "true" + */ + +export interface ApiInferenceLoggerConfig { + enabled: boolean + sink: (...args: unknown[]) => void +} + +export interface ApiInferenceContext { + provider: string + operation: string + model?: string + taskId?: string + requestId?: string +} + +export interface ApiInferenceHandle { + success: (responsePayload: unknown) => void + error: (errorPayload: unknown) => void +} + +/** + * Configuration for payload size limiting to avoid freezing the Output Channel. + */ +const PAYLOAD_LIMITS = { + /** Maximum string length before truncation */ + MAX_STRING_LENGTH: 10_000, + /** Maximum array entries to log */ + MAX_ARRAY_LENGTH: 200, + /** Maximum object keys to log */ + MAX_OBJECT_KEYS: 200, +} + +/** + * Regex pattern for detecting base64 image data URLs. + */ +const BASE64_IMAGE_PATTERN = /^data:image\/[^;]+;base64,/ + +/** + * Secret field patterns to redact in logged payloads. + * Case-insensitive matching is applied. + * Note: Patterns are designed to avoid false positives (e.g., "inputTokens" should not be redacted). + */ +const SECRET_PATTERNS = [ + "authorization", + "apikey", + "api_key", + "x-api-key", + "access_token", + "accesstoken", + "bearer", + "secret", + "password", + "credential", +] + +/** + * Patterns that indicate a field is NOT a secret (allowlist). + * These are checked before secret patterns to prevent false positives. + */ +const NON_SECRET_PATTERNS = ["inputtokens", "outputtokens", "cachetokens", "reasoningtokens", "totaltokens"] + +/** + * Check if a key name looks like a secret field. + */ +function isSecretKey(key: string): boolean { + const lowerKey = key.toLowerCase() + // Check allowlist first to avoid false positives + if (NON_SECRET_PATTERNS.some((pattern) => lowerKey.includes(pattern))) { + return false + } + return SECRET_PATTERNS.some((pattern) => lowerKey.includes(pattern)) +} + +/** + * Truncate a string if it exceeds the maximum length. + * Also replaces base64 image data with a placeholder. + */ +function sanitizeString(str: string): string { + // Check for base64 image data URLs first + if (BASE64_IMAGE_PATTERN.test(str)) { + return `[ImageData len=${str.length}]` + } + + // Truncate long strings + if (str.length > PAYLOAD_LIMITS.MAX_STRING_LENGTH) { + return `[Truncated len=${str.length}]` + } + + return str +} + +/** + * Recursively sanitize and redact secrets from an object. + * Applies size limiting to prevent Output Channel from freezing: + * - Strings longer than MAX_STRING_LENGTH are truncated + * - Arrays longer than MAX_ARRAY_LENGTH are capped + * - Objects with more than MAX_OBJECT_KEYS are capped + * - Base64 image data URLs are replaced with placeholders + * - Secret fields are redacted + * Returns a sanitized copy of the object. + */ +function sanitizePayload(obj: unknown, visited = new WeakSet()): unknown { + if (obj === null || obj === undefined) { + return obj + } + + // Handle strings + if (typeof obj === "string") { + return sanitizeString(obj) + } + + // Handle other primitives + if (typeof obj !== "object") { + return obj + } + + // Prevent infinite recursion on circular references + if (visited.has(obj as object)) { + return "[Circular Reference]" + } + visited.add(obj as object) + + // Handle arrays with length limiting + if (Array.isArray(obj)) { + const maxLen = PAYLOAD_LIMITS.MAX_ARRAY_LENGTH + if (obj.length > maxLen) { + const truncated = obj.slice(0, maxLen).map((item) => sanitizePayload(item, visited)) + truncated.push(`[...${obj.length - maxLen} more items]`) + return truncated + } + return obj.map((item) => sanitizePayload(item, visited)) + } + + // Handle objects with key limiting + const entries = Object.entries(obj as Record) + const maxKeys = PAYLOAD_LIMITS.MAX_OBJECT_KEYS + const result: Record = {} + let keyCount = 0 + + for (const [key, value] of entries) { + if (keyCount >= maxKeys) { + result["[...]"] = `${entries.length - maxKeys} more keys omitted` + break + } + + if (isSecretKey(key)) { + result[key] = "[REDACTED]" + } else if (typeof value === "string") { + result[key] = sanitizeString(value) + } else if (typeof value === "object" && value !== null) { + result[key] = sanitizePayload(value, visited) + } else { + result[key] = value + } + + keyCount++ + } + + return result +} + +/** + * Generate a unique request ID. + */ +function generateRequestId(): string { + return `req_${Date.now()}_${Math.random().toString(36).slice(2, 11)}` +} + +/** + * Singleton logger class for API inference logging. + */ +class ApiInferenceLoggerSingleton { + private enabled = false + private sink: ((...args: unknown[]) => void) | null = null + + /** + * Configure the logger with enabled state and output sink. + * Should be called once during extension activation. + */ + configure(config: ApiInferenceLoggerConfig): void { + this.enabled = config.enabled + this.sink = config.enabled ? config.sink : null + } + + /** + * Check if logging is currently enabled. + */ + isEnabled(): boolean { + return this.enabled && this.sink !== null + } + + /** + * Start logging an API inference request. + * Returns a handle to log the response or error. + * + * @param context - Context information about the request + * @param requestPayload - The request payload to log + * @returns A handle with success() and error() methods + */ + start(context: ApiInferenceContext, requestPayload: unknown): ApiInferenceHandle { + const requestId = context.requestId ?? generateRequestId() + const startTime = Date.now() + const startTimestamp = new Date().toISOString() + + // Log the request + if (this.isEnabled()) { + this.logRequest({ + ...context, + requestId, + timestamp: startTimestamp, + payload: requestPayload, + }) + } + + return { + success: (responsePayload: unknown) => { + if (this.isEnabled()) { + const endTime = Date.now() + this.logResponse({ + ...context, + requestId, + timestamp: new Date().toISOString(), + durationMs: endTime - startTime, + payload: responsePayload, + }) + } + }, + error: (errorPayload: unknown) => { + if (this.isEnabled()) { + const endTime = Date.now() + this.logError({ + ...context, + requestId, + timestamp: new Date().toISOString(), + durationMs: endTime - startTime, + error: errorPayload, + }) + } + }, + } + } + + /** + * Log a request with stable tag format. + */ + private logRequest(data: { + provider: string + operation: string + model?: string + taskId?: string + requestId: string + timestamp: string + payload: unknown + }): void { + if (!this.sink) return + + try { + this.sink("[API][request]", { + provider: data.provider, + operation: data.operation, + model: data.model, + taskId: data.taskId, + requestId: data.requestId, + timestamp: data.timestamp, + payload: sanitizePayload(data.payload), + }) + } catch { + // Silently ignore logging errors to avoid breaking the application + } + } + + /** + * Log a successful response with stable tag format. + */ + private logResponse(data: { + provider: string + operation: string + model?: string + taskId?: string + requestId: string + timestamp: string + durationMs: number + payload: unknown + }): void { + if (!this.sink) return + + try { + this.sink("[API][response]", { + provider: data.provider, + operation: data.operation, + model: data.model, + taskId: data.taskId, + requestId: data.requestId, + timestamp: data.timestamp, + durationMs: data.durationMs, + payload: sanitizePayload(data.payload), + }) + } catch { + // Silently ignore logging errors to avoid breaking the application + } + } + + /** + * Log an error response with stable tag format. + */ + private logError(data: { + provider: string + operation: string + model?: string + taskId?: string + requestId: string + timestamp: string + durationMs: number + error: unknown + }): void { + if (!this.sink) return + + try { + // Handle Error objects specially + let errorData: unknown + if (data.error instanceof Error) { + errorData = { + name: data.error.name, + message: data.error.message, + stack: data.error.stack, + } + } else { + errorData = sanitizePayload(data.error) + } + + this.sink("[API][error]", { + provider: data.provider, + operation: data.operation, + model: data.model, + taskId: data.taskId, + requestId: data.requestId, + timestamp: data.timestamp, + durationMs: data.durationMs, + error: errorData, + }) + } catch { + // Silently ignore logging errors to avoid breaking the application + } + } +} + +/** + * Singleton instance of the API inference logger. + */ +export const ApiInferenceLogger = new ApiInferenceLoggerSingleton() diff --git a/src/api/logging/__tests__/ApiInferenceLogger.spec.ts b/src/api/logging/__tests__/ApiInferenceLogger.spec.ts new file mode 100644 index 00000000000..94d65d9630d --- /dev/null +++ b/src/api/logging/__tests__/ApiInferenceLogger.spec.ts @@ -0,0 +1,519 @@ +import { ApiInferenceLogger } from "../ApiInferenceLogger" + +describe("ApiInferenceLogger", () => { + let mockSink: ReturnType + + beforeEach(() => { + mockSink = vi.fn() + // Reset the logger to disabled state before each test + ApiInferenceLogger.configure({ enabled: false, sink: () => {} }) + }) + + describe("configure", () => { + it("should enable logging when configured with enabled=true", () => { + ApiInferenceLogger.configure({ enabled: true, sink: mockSink }) + expect(ApiInferenceLogger.isEnabled()).toBe(true) + }) + + it("should disable logging when configured with enabled=false", () => { + ApiInferenceLogger.configure({ enabled: false, sink: mockSink }) + expect(ApiInferenceLogger.isEnabled()).toBe(false) + }) + + it("should not log when disabled", () => { + ApiInferenceLogger.configure({ enabled: false, sink: mockSink }) + const handle = ApiInferenceLogger.start( + { provider: "test", operation: "createMessage" }, + { test: "payload" }, + ) + handle.success({ response: "data" }) + expect(mockSink).not.toHaveBeenCalled() + }) + }) + + describe("start", () => { + beforeEach(() => { + ApiInferenceLogger.configure({ enabled: true, sink: mockSink }) + }) + + it("should emit a request log with [API][request] tag", () => { + ApiInferenceLogger.start({ provider: "OpenAI", operation: "createMessage" }, { model: "gpt-4" }) + + expect(mockSink).toHaveBeenCalledTimes(1) + expect(mockSink).toHaveBeenCalledWith( + "[API][request]", + expect.objectContaining({ + provider: "OpenAI", + operation: "createMessage", + payload: expect.objectContaining({ model: "gpt-4" }), + }), + ) + }) + + it("should include context fields in the log", () => { + ApiInferenceLogger.start( + { + provider: "Anthropic", + operation: "createMessage", + model: "claude-3", + taskId: "task-123", + requestId: "req-456", + }, + { test: "data" }, + ) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request]", + expect.objectContaining({ + provider: "Anthropic", + operation: "createMessage", + model: "claude-3", + taskId: "task-123", + requestId: "req-456", + }), + ) + }) + + it("should generate a requestId if not provided", () => { + ApiInferenceLogger.start({ provider: "test", operation: "test" }, {}) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request]", + expect.objectContaining({ + requestId: expect.stringMatching(/^req_\d+_[a-z0-9]+$/), + }), + ) + }) + }) + + describe("success", () => { + beforeEach(() => { + ApiInferenceLogger.configure({ enabled: true, sink: mockSink }) + }) + + it("should emit a response log with [API][response] tag", () => { + const handle = ApiInferenceLogger.start({ provider: "OpenAI", operation: "createMessage" }, {}) + mockSink.mockClear() + + handle.success({ text: "Hello world", usage: { inputTokens: 10, outputTokens: 20 } }) + + expect(mockSink).toHaveBeenCalledTimes(1) + expect(mockSink).toHaveBeenCalledWith( + "[API][response]", + expect.objectContaining({ + provider: "OpenAI", + operation: "createMessage", + payload: expect.objectContaining({ + text: "Hello world", + usage: { inputTokens: 10, outputTokens: 20 }, + }), + }), + ) + }) + + it("should include duration in the response log", () => { + const handle = ApiInferenceLogger.start({ provider: "test", operation: "test" }, {}) + mockSink.mockClear() + + // Small delay to ensure measurable duration + handle.success({ response: "data" }) + + expect(mockSink).toHaveBeenCalledWith( + "[API][response]", + expect.objectContaining({ + durationMs: expect.any(Number), + }), + ) + }) + }) + + describe("error", () => { + beforeEach(() => { + ApiInferenceLogger.configure({ enabled: true, sink: mockSink }) + }) + + it("should emit an error log with [API][error] tag", () => { + const handle = ApiInferenceLogger.start({ provider: "OpenAI", operation: "createMessage" }, {}) + mockSink.mockClear() + + handle.error(new Error("API request failed")) + + expect(mockSink).toHaveBeenCalledTimes(1) + expect(mockSink).toHaveBeenCalledWith( + "[API][error]", + expect.objectContaining({ + provider: "OpenAI", + operation: "createMessage", + error: expect.objectContaining({ + name: "Error", + message: "API request failed", + }), + }), + ) + }) + + it("should include duration in the error log", () => { + const handle = ApiInferenceLogger.start({ provider: "test", operation: "test" }, {}) + mockSink.mockClear() + + handle.error({ code: 500, message: "Internal error" }) + + expect(mockSink).toHaveBeenCalledWith( + "[API][error]", + expect.objectContaining({ + durationMs: expect.any(Number), + }), + ) + }) + + it("should handle non-Error objects", () => { + const handle = ApiInferenceLogger.start({ provider: "test", operation: "test" }, {}) + mockSink.mockClear() + + handle.error({ status: 401, message: "Unauthorized" }) + + expect(mockSink).toHaveBeenCalledWith( + "[API][error]", + expect.objectContaining({ + error: expect.objectContaining({ + status: 401, + message: "Unauthorized", + }), + }), + ) + }) + }) + + describe("secret redaction", () => { + beforeEach(() => { + ApiInferenceLogger.configure({ enabled: true, sink: mockSink }) + }) + + it("should redact Authorization header", () => { + ApiInferenceLogger.start( + { provider: "test", operation: "test" }, + { headers: { Authorization: "Bearer sk-secret-key-12345" } }, + ) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request]", + expect.objectContaining({ + payload: expect.objectContaining({ + headers: { Authorization: "[REDACTED]" }, + }), + }), + ) + }) + + it("should redact apiKey field", () => { + ApiInferenceLogger.start({ provider: "test", operation: "test" }, { apiKey: "sk-secret-12345" }) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request]", + expect.objectContaining({ + payload: expect.objectContaining({ + apiKey: "[REDACTED]", + }), + }), + ) + }) + + it("should redact nested secret fields", () => { + ApiInferenceLogger.start( + { provider: "test", operation: "test" }, + { + config: { + auth: { + access_token: "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...", + api_key: "secret-api-key", + }, + }, + }, + ) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request]", + expect.objectContaining({ + payload: expect.objectContaining({ + config: { + auth: { + access_token: "[REDACTED]", + api_key: "[REDACTED]", + }, + }, + }), + }), + ) + }) + + it("should redact secret fields in arrays", () => { + ApiInferenceLogger.start( + { provider: "test", operation: "test" }, + { + items: [{ apiKey: "secret1" }, { apiKey: "secret2" }], + }, + ) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request]", + expect.objectContaining({ + payload: expect.objectContaining({ + items: [{ apiKey: "[REDACTED]" }, { apiKey: "[REDACTED]" }], + }), + }), + ) + }) + + it("should not redact non-secret fields", () => { + ApiInferenceLogger.start( + { provider: "test", operation: "test" }, + { model: "gpt-4", messages: [{ role: "user", content: "Hello" }] }, + ) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request]", + expect.objectContaining({ + payload: expect.objectContaining({ + model: "gpt-4", + messages: [{ role: "user", content: "Hello" }], + }), + }), + ) + }) + }) + + describe("payload size limiting", () => { + beforeEach(() => { + ApiInferenceLogger.configure({ enabled: true, sink: mockSink }) + }) + + it("should truncate strings longer than 10,000 characters", () => { + const longString = "x".repeat(15_000) + ApiInferenceLogger.start({ provider: "test", operation: "test" }, { content: longString }) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request]", + expect.objectContaining({ + payload: expect.objectContaining({ + content: "[Truncated len=15000]", + }), + }), + ) + }) + + it("should not truncate strings within the limit", () => { + const normalString = "x".repeat(5_000) + ApiInferenceLogger.start({ provider: "test", operation: "test" }, { content: normalString }) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request]", + expect.objectContaining({ + payload: expect.objectContaining({ + content: normalString, + }), + }), + ) + }) + + it("should replace base64 image data with placeholder", () => { + const imageData = + "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==" + ApiInferenceLogger.start({ provider: "test", operation: "test" }, { image: imageData }) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request]", + expect.objectContaining({ + payload: expect.objectContaining({ + image: expect.stringMatching(/^\[ImageData len=\d+\]$/), + }), + }), + ) + }) + + it("should replace base64 image data for various image types", () => { + ApiInferenceLogger.start( + { provider: "test", operation: "test" }, + { + png: "data:image/png;base64,abc123", + jpeg: "data:image/jpeg;base64,abc123", + gif: "data:image/gif;base64,abc123", + webp: "data:image/webp;base64,abc123", + }, + ) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request]", + expect.objectContaining({ + payload: expect.objectContaining({ + png: expect.stringMatching(/^\[ImageData len=\d+\]$/), + jpeg: expect.stringMatching(/^\[ImageData len=\d+\]$/), + gif: expect.stringMatching(/^\[ImageData len=\d+\]$/), + webp: expect.stringMatching(/^\[ImageData len=\d+\]$/), + }), + }), + ) + }) + + it("should cap arrays longer than 200 entries", () => { + const longArray = Array.from({ length: 250 }, (_, i) => ({ id: i })) + ApiInferenceLogger.start({ provider: "test", operation: "test" }, { items: longArray }) + + const call = mockSink.mock.calls[0] + const payload = call[1].payload as { items: any[] } + + expect(payload.items.length).toBe(201) + expect(payload.items[200]).toBe("[...50 more items]") + }) + + it("should not cap arrays within the limit", () => { + const normalArray = Array.from({ length: 50 }, (_, i) => ({ id: i })) + ApiInferenceLogger.start({ provider: "test", operation: "test" }, { items: normalArray }) + + const call = mockSink.mock.calls[0] + const payload = call[1].payload as { items: any[] } + + expect(payload.items.length).toBe(50) + }) + + it("should cap objects with more than 200 keys", () => { + const bigObject: Record = {} + for (let i = 0; i < 250; i++) { + bigObject[`key${i}`] = i + } + ApiInferenceLogger.start({ provider: "test", operation: "test" }, bigObject) + + const call = mockSink.mock.calls[0] + const payload = call[1].payload as Record + + const keys = Object.keys(payload) + expect(keys.length).toBe(201) + expect(payload["[...]"]).toBe("50 more keys omitted") + }) + + it("should apply size limiting recursively in nested objects", () => { + const nested = { + level1: { + longString: "x".repeat(15_000), + level2: { + imageData: "data:image/png;base64,abc123", + }, + }, + } + ApiInferenceLogger.start({ provider: "test", operation: "test" }, nested) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request]", + expect.objectContaining({ + payload: expect.objectContaining({ + level1: expect.objectContaining({ + longString: "[Truncated len=15000]", + level2: expect.objectContaining({ + imageData: expect.stringMatching(/^\[ImageData len=\d+\]$/), + }), + }), + }), + }), + ) + }) + + it("should apply size limiting recursively in arrays", () => { + const messages = [ + { role: "user", content: "x".repeat(15_000) }, + { role: "assistant", content: "data:image/png;base64,abc123" }, + ] + ApiInferenceLogger.start({ provider: "test", operation: "test" }, { messages }) + + expect(mockSink).toHaveBeenCalledWith( + "[API][request]", + expect.objectContaining({ + payload: expect.objectContaining({ + messages: [ + expect.objectContaining({ + role: "user", + content: "[Truncated len=15000]", + }), + expect.objectContaining({ + role: "assistant", + content: expect.stringMatching(/^\[ImageData len=\d+\]$/), + }), + ], + }), + }), + ) + }) + }) + + describe("edge cases", () => { + beforeEach(() => { + ApiInferenceLogger.configure({ enabled: true, sink: mockSink }) + }) + + it("should handle null and undefined values", () => { + expect(() => { + ApiInferenceLogger.start({ provider: "test", operation: "test" }, { value: null, other: undefined }) + }).not.toThrow() + + expect(mockSink).toHaveBeenCalledWith( + "[API][request]", + expect.objectContaining({ + payload: expect.objectContaining({ + value: null, + other: undefined, + }), + }), + ) + }) + + it("should handle empty objects", () => { + expect(() => { + ApiInferenceLogger.start({ provider: "test", operation: "test" }, {}) + }).not.toThrow() + }) + + it("should not throw on circular references", () => { + const obj: any = { name: "test" } + obj.self = obj + + expect(() => { + ApiInferenceLogger.start({ provider: "test", operation: "test" }, obj) + }).not.toThrow() + }) + + it("should handle primitive values in payload", () => { + expect(() => { + ApiInferenceLogger.start({ provider: "test", operation: "test" }, "string payload" as any) + }).not.toThrow() + }) + + it("should handle functions in payload without throwing", () => { + expect(() => { + ApiInferenceLogger.start( + { provider: "test", operation: "test" }, + { callback: () => console.log("test") }, + ) + }).not.toThrow() + }) + + it("should handle BigInt values without throwing", () => { + expect(() => { + ApiInferenceLogger.start( + { provider: "test", operation: "test" }, + { bigValue: BigInt(9007199254740991) }, + ) + }).not.toThrow() + }) + }) + + describe("sink error handling", () => { + it("should not throw if sink throws an error", () => { + const throwingSink = vi.fn(() => { + throw new Error("Sink error") + }) + ApiInferenceLogger.configure({ enabled: true, sink: throwingSink }) + + expect(() => { + ApiInferenceLogger.start({ provider: "test", operation: "test" }, {}) + }).not.toThrow() + }) + }) +}) diff --git a/src/api/providers/anthropic-vertex.ts b/src/api/providers/anthropic-vertex.ts index cbfae08f41e..bde42796192 100644 --- a/src/api/providers/anthropic-vertex.ts +++ b/src/api/providers/anthropic-vertex.ts @@ -30,6 +30,7 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". // https://docs.anthropic.com/en/api/claude-on-vertex-ai export class AnthropicVertexHandler extends BaseProvider implements SingleCompletionHandler { + protected readonly providerName = "Anthropic Vertex" protected options: ApiHandlerOptions private client: AnthropicVertex diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index 4faf341d28f..f3c8a514d10 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -33,7 +33,7 @@ import { export class AnthropicHandler extends BaseProvider implements SingleCompletionHandler { private options: ApiHandlerOptions private client: Anthropic - private readonly providerName = "Anthropic" + protected readonly providerName = "Anthropic" constructor(options: ApiHandlerOptions) { super() @@ -63,6 +63,11 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa reasoning: thinking, } = this.getModel() + // Accumulators for final response logging + const accumulatedText: string[] = [] + const accumulatedReasoning: string[] = [] + const toolCalls: Array<{ id?: string; name?: string }> = [] + // Filter out non-Anthropic blocks (reasoning, thoughtSignature, etc.) before sending to the API const sanitizedMessages = filterNonAnthropicBlocks(messages) @@ -93,251 +98,291 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa } : {} - switch (modelId) { - case "claude-sonnet-4-5": - case "claude-sonnet-4-20250514": - case "claude-opus-4-5-20251101": - case "claude-opus-4-1-20250805": - case "claude-opus-4-20250514": - case "claude-3-7-sonnet-20250219": - case "claude-3-5-sonnet-20241022": - case "claude-3-5-haiku-20241022": - case "claude-3-opus-20240229": - case "claude-haiku-4-5-20251001": - case "claude-3-haiku-20240307": { - /** - * The latest message will be the new user message, one before - * will be the assistant message from a previous request, and - * the user message before that will be a previously cached user - * message. So we need to mark the latest user message as - * ephemeral to cache it for the next request, and mark the - * second to last user message as ephemeral to let the server - * know the last message to retrieve from the cache for the - * current request. - */ - const userMsgIndices = sanitizedMessages.reduce( - (acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc), - [] as number[], - ) + // Variable to hold the log handle - will be created after request body is built + let logHandle: ReturnType | undefined + + try { + switch (modelId) { + case "claude-sonnet-4-5": + case "claude-sonnet-4-20250514": + case "claude-opus-4-5-20251101": + case "claude-opus-4-1-20250805": + case "claude-opus-4-20250514": + case "claude-3-7-sonnet-20250219": + case "claude-3-5-sonnet-20241022": + case "claude-3-5-haiku-20241022": + case "claude-3-opus-20240229": + case "claude-haiku-4-5-20251001": + case "claude-3-haiku-20240307": { + /** + * The latest message will be the new user message, one before + * will be the assistant message from a previous request, and + * the user message before that will be a previously cached user + * message. So we need to mark the latest user message as + * ephemeral to cache it for the next request, and mark the + * second to last user message as ephemeral to let the server + * know the last message to retrieve from the cache for the + * current request. + */ + const userMsgIndices = sanitizedMessages.reduce( + (acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc), + [] as number[], + ) - const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1 - const secondLastMsgUserIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1 + const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1 + const secondLastMsgUserIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1 - try { - stream = await this.client.messages.create( + // Build the request body for logging and API call + const requestBody = { + model: modelId, + max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, + temperature, + thinking, + // Setting cache breakpoint for system prompt so new tasks can reuse it. + system: [{ text: systemPrompt, type: "text" as const, cache_control: cacheControl }], + messages: sanitizedMessages.map((message, index) => { + if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) { + return { + ...message, + content: + typeof message.content === "string" + ? [ + { + type: "text" as const, + text: message.content, + cache_control: cacheControl, + }, + ] + : message.content.map((content, contentIndex) => + contentIndex === message.content.length - 1 + ? { ...content, cache_control: cacheControl } + : content, + ), + } + } + return message + }), + stream: true as const, + ...nativeToolParams, + } + + // Start inference logging with actual request body + logHandle = this.inferenceLogger.start( { + provider: this.providerName, + operation: "createMessage", model: modelId, - max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, - temperature, - thinking, - // Setting cache breakpoint for system prompt so new tasks can reuse it. - system: [{ text: systemPrompt, type: "text", cache_control: cacheControl }], - messages: sanitizedMessages.map((message, index) => { - if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) { - return { - ...message, - content: - typeof message.content === "string" - ? [{ type: "text", text: message.content, cache_control: cacheControl }] - : message.content.map((content, contentIndex) => - contentIndex === message.content.length - 1 - ? { ...content, cache_control: cacheControl } - : content, - ), - } - } - return message - }), - stream: true, - ...nativeToolParams, }, - (() => { - // prompt caching: https://x.com/alexalbert__/status/1823751995901272068 - // https://github.com/anthropics/anthropic-sdk-typescript?tab=readme-ov-file#default-headers - // https://github.com/anthropics/anthropic-sdk-typescript/commit/c920b77fc67bd839bfeb6716ceab9d7c9bbe7393 - - // Then check for models that support prompt caching - switch (modelId) { - case "claude-sonnet-4-5": - case "claude-sonnet-4-20250514": - case "claude-opus-4-5-20251101": - case "claude-opus-4-1-20250805": - case "claude-opus-4-20250514": - case "claude-3-7-sonnet-20250219": - case "claude-3-5-sonnet-20241022": - case "claude-3-5-haiku-20241022": - case "claude-3-opus-20240229": - case "claude-haiku-4-5-20251001": - case "claude-3-haiku-20240307": - betas.push("prompt-caching-2024-07-31") - return { headers: { "anthropic-beta": betas.join(",") } } - default: - return undefined - } - })(), + requestBody, ) - } catch (error) { - TelemetryService.instance.captureException( - new ApiProviderError( - error instanceof Error ? error.message : String(error), - this.providerName, - modelId, - "createMessage", - ), - ) - throw error + + try { + // Determine request options (beta headers) + betas.push("prompt-caching-2024-07-31") + const requestOptions = { headers: { "anthropic-beta": betas.join(",") } } + + stream = await this.client.messages.create(requestBody, requestOptions) + } catch (error) { + TelemetryService.instance.captureException( + new ApiProviderError( + error instanceof Error ? error.message : String(error), + this.providerName, + modelId, + "createMessage", + ), + ) + throw error + } + break } - break - } - default: { - try { - stream = (await this.client.messages.create({ + default: { + // Build the request body for logging and API call + const requestBody = { model: modelId, max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, temperature, - system: [{ text: systemPrompt, type: "text" }], + system: [{ text: systemPrompt, type: "text" as const }], messages: sanitizedMessages, - stream: true, + stream: true as const, ...nativeToolParams, - })) as any - } catch (error) { - TelemetryService.instance.captureException( - new ApiProviderError( - error instanceof Error ? error.message : String(error), - this.providerName, - modelId, - "createMessage", - ), + } + + // Start inference logging with actual request body + logHandle = this.inferenceLogger.start( + { + provider: this.providerName, + operation: "createMessage", + model: modelId, + }, + requestBody, ) - throw error + + try { + stream = (await this.client.messages.create(requestBody)) as any + } catch (error) { + TelemetryService.instance.captureException( + new ApiProviderError( + error instanceof Error ? error.message : String(error), + this.providerName, + modelId, + "createMessage", + ), + ) + throw error + } + break } - break } - } - let inputTokens = 0 - let outputTokens = 0 - let cacheWriteTokens = 0 - let cacheReadTokens = 0 - - for await (const chunk of stream) { - switch (chunk.type) { - case "message_start": { - // Tells us cache reads/writes/input/output. - const { - input_tokens = 0, - output_tokens = 0, - cache_creation_input_tokens, - cache_read_input_tokens, - } = chunk.message.usage - - yield { - type: "usage", - inputTokens: input_tokens, - outputTokens: output_tokens, - cacheWriteTokens: cache_creation_input_tokens || undefined, - cacheReadTokens: cache_read_input_tokens || undefined, - } + let inputTokens = 0 + let outputTokens = 0 + let cacheWriteTokens = 0 + let cacheReadTokens = 0 + + for await (const chunk of stream) { + switch (chunk.type) { + case "message_start": { + // Tells us cache reads/writes/input/output. + const { + input_tokens = 0, + output_tokens = 0, + cache_creation_input_tokens, + cache_read_input_tokens, + } = chunk.message.usage + + yield { + type: "usage", + inputTokens: input_tokens, + outputTokens: output_tokens, + cacheWriteTokens: cache_creation_input_tokens || undefined, + cacheReadTokens: cache_read_input_tokens || undefined, + } - inputTokens += input_tokens - outputTokens += output_tokens - cacheWriteTokens += cache_creation_input_tokens || 0 - cacheReadTokens += cache_read_input_tokens || 0 + inputTokens += input_tokens + outputTokens += output_tokens + cacheWriteTokens += cache_creation_input_tokens || 0 + cacheReadTokens += cache_read_input_tokens || 0 - break - } - case "message_delta": - // Tells us stop_reason, stop_sequence, and output tokens - // along the way and at the end of the message. - yield { - type: "usage", - inputTokens: 0, - outputTokens: chunk.usage.output_tokens || 0, + break } + case "message_delta": + // Tells us stop_reason, stop_sequence, and output tokens + // along the way and at the end of the message. + yield { + type: "usage", + inputTokens: 0, + outputTokens: chunk.usage.output_tokens || 0, + } - break - case "message_stop": - // No usage data, just an indicator that the message is done. - break - case "content_block_start": - switch (chunk.content_block.type) { - case "thinking": - // We may receive multiple text blocks, in which - // case just insert a line break between them. - if (chunk.index > 0) { - yield { type: "reasoning", text: "\n" } - } + break + case "message_stop": + // No usage data, just an indicator that the message is done. + break + case "content_block_start": + switch (chunk.content_block.type) { + case "thinking": + // We may receive multiple text blocks, in which + // case just insert a line break between them. + if (chunk.index > 0) { + accumulatedReasoning.push("\n") + yield { type: "reasoning", text: "\n" } + } - yield { type: "reasoning", text: chunk.content_block.thinking } - break - case "text": - // We may receive multiple text blocks, in which - // case just insert a line break between them. - if (chunk.index > 0) { - yield { type: "text", text: "\n" } - } + accumulatedReasoning.push(chunk.content_block.thinking) + yield { type: "reasoning", text: chunk.content_block.thinking } + break + case "text": + // We may receive multiple text blocks, in which + // case just insert a line break between them. + if (chunk.index > 0) { + accumulatedText.push("\n") + yield { type: "text", text: "\n" } + } - yield { type: "text", text: chunk.content_block.text } - break - case "tool_use": { - // Emit initial tool call partial with id and name - yield { - type: "tool_call_partial", - index: chunk.index, - id: chunk.content_block.id, - name: chunk.content_block.name, - arguments: undefined, + accumulatedText.push(chunk.content_block.text) + yield { type: "text", text: chunk.content_block.text } + break + case "tool_use": { + // Track tool call for logging + toolCalls.push({ + id: chunk.content_block.id, + name: chunk.content_block.name, + }) + // Emit initial tool call partial with id and name + yield { + type: "tool_call_partial", + index: chunk.index, + id: chunk.content_block.id, + name: chunk.content_block.name, + arguments: undefined, + } + break } - break } - } - break - case "content_block_delta": - switch (chunk.delta.type) { - case "thinking_delta": - yield { type: "reasoning", text: chunk.delta.thinking } - break - case "text_delta": - yield { type: "text", text: chunk.delta.text } - break - case "input_json_delta": { - // Emit tool call partial chunks as arguments stream in - yield { - type: "tool_call_partial", - index: chunk.index, - id: undefined, - name: undefined, - arguments: chunk.delta.partial_json, + break + case "content_block_delta": + switch (chunk.delta.type) { + case "thinking_delta": + accumulatedReasoning.push(chunk.delta.thinking) + yield { type: "reasoning", text: chunk.delta.thinking } + break + case "text_delta": + accumulatedText.push(chunk.delta.text) + yield { type: "text", text: chunk.delta.text } + break + case "input_json_delta": { + // Emit tool call partial chunks as arguments stream in + yield { + type: "tool_call_partial", + index: chunk.index, + id: undefined, + name: undefined, + arguments: chunk.delta.partial_json, + } + break } - break } - } - break - case "content_block_stop": - // Block complete - no action needed for now. - // NativeToolCallParser handles tool call completion - // Note: Signature for multi-turn thinking would require using stream.finalMessage() - // after iteration completes, which requires restructuring the streaming approach. - break + break + case "content_block_stop": + // Block complete - no action needed for now. + // NativeToolCallParser handles tool call completion + // Note: Signature for multi-turn thinking would require using stream.finalMessage() + // after iteration completes, which requires restructuring the streaming approach. + break + } } - } - if (inputTokens > 0 || outputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0) { - const { totalCost } = calculateApiCostAnthropic( - this.getModel().info, - inputTokens, - outputTokens, - cacheWriteTokens, - cacheReadTokens, - ) + if (inputTokens > 0 || outputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0) { + const { totalCost } = calculateApiCostAnthropic( + this.getModel().info, + inputTokens, + outputTokens, + cacheWriteTokens, + cacheReadTokens, + ) - yield { - type: "usage", - inputTokens: 0, - outputTokens: 0, - totalCost, + yield { + type: "usage", + inputTokens: 0, + outputTokens: 0, + totalCost, + } } + + // Log successful response + logHandle.success({ + text: accumulatedText.join(""), + reasoning: accumulatedReasoning.length > 0 ? accumulatedReasoning.join("") : undefined, + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + usage: { inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens }, + }) + } catch (error) { + // logHandle may not be assigned if error occurs before request body is built + if (logHandle) { + logHandle.error(error) + } + throw error } } diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts index 92b9558c451..579aa80d866 100644 --- a/src/api/providers/base-openai-compatible-provider.ts +++ b/src/api/providers/base-openai-compatible-provider.ts @@ -117,73 +117,153 @@ export abstract class BaseOpenAiCompatibleProvider messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { - const stream = await this.createStream(systemPrompt, messages, metadata) - - const matcher = new XmlMatcher( - "think", - (chunk) => - ({ - type: chunk.matched ? "reasoning" : "text", - text: chunk.data, - }) as const, + const { id: model, info } = this.getModel() + + // Build the actual params object that will be passed to the SDK + const max_tokens = + getModelMaxOutputTokens({ + modelId: model, + model: info, + settings: this.options, + format: "openai", + }) ?? undefined + + const temperature = this.options.modelTemperature ?? this.defaultTemperature + + const requestParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { + model, + max_tokens, + temperature, + messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)], + stream: true, + stream_options: { include_usage: true }, + ...(metadata?.tools && { tools: this.convertToolsForOpenAI(metadata.tools) }), + ...(metadata?.tool_choice && { tool_choice: metadata.tool_choice }), + ...(metadata?.toolProtocol === "native" && { + parallel_tool_calls: metadata.parallelToolCalls ?? false, + }), + } + + // Add thinking parameter if reasoning is enabled and model supports it + if (this.options.enableReasoningEffort && info.supportsReasoningBinary) { + ;(requestParams as any).thinking = { type: "enabled" } + } + + // Start inference logging with the actual request params + const logHandle = this.inferenceLogger.start( + { + provider: this.providerName, + operation: "createMessage", + model, + }, + requestParams, ) + // Accumulators for final response logging + const accumulatedText: string[] = [] + const accumulatedReasoning: string[] = [] + const toolCalls: Array<{ id?: string; name?: string; arguments?: string }> = [] let lastUsage: OpenAI.CompletionUsage | undefined - for await (const chunk of stream) { - // Check for provider-specific error responses (e.g., MiniMax base_resp) - const chunkAny = chunk as any - if (chunkAny.base_resp?.status_code && chunkAny.base_resp.status_code !== 0) { - throw new Error( - `${this.providerName} API Error (${chunkAny.base_resp.status_code}): ${chunkAny.base_resp.status_msg || "Unknown error"}`, - ) - } + try { + const stream = await this.createStream(systemPrompt, messages, metadata) + + const matcher = new XmlMatcher( + "think", + (chunk) => + ({ + type: chunk.matched ? "reasoning" : "text", + text: chunk.data, + }) as const, + ) + + for await (const chunk of stream) { + // Check for provider-specific error responses (e.g., MiniMax base_resp) + const chunkAny = chunk as any + if (chunkAny.base_resp?.status_code && chunkAny.base_resp.status_code !== 0) { + const error = new Error( + `${this.providerName} API Error (${chunkAny.base_resp.status_code}): ${chunkAny.base_resp.status_msg || "Unknown error"}`, + ) + logHandle.error(error) + throw error + } - const delta = chunk.choices?.[0]?.delta + const delta = chunk.choices?.[0]?.delta - if (delta?.content) { - for (const processedChunk of matcher.update(delta.content)) { - yield processedChunk + if (delta?.content) { + for (const processedChunk of matcher.update(delta.content)) { + if (processedChunk.type === "text") { + accumulatedText.push(processedChunk.text) + } else if (processedChunk.type === "reasoning") { + accumulatedReasoning.push(processedChunk.text) + } + yield processedChunk + } } - } - if (delta) { - for (const key of ["reasoning_content", "reasoning"] as const) { - if (key in delta) { - const reasoning_content = ((delta as any)[key] as string | undefined) || "" - if (reasoning_content?.trim()) { - yield { type: "reasoning", text: reasoning_content } + if (delta) { + for (const key of ["reasoning_content", "reasoning"] as const) { + if (key in delta) { + const reasoning_content = ((delta as any)[key] as string | undefined) || "" + if (reasoning_content?.trim()) { + accumulatedReasoning.push(reasoning_content) + yield { type: "reasoning", text: reasoning_content } + } + break } - break } } - } - // Emit raw tool call chunks - NativeToolCallParser handles state management - if (delta?.tool_calls) { - for (const toolCall of delta.tool_calls) { - yield { - type: "tool_call_partial", - index: toolCall.index, - id: toolCall.id, - name: toolCall.function?.name, - arguments: toolCall.function?.arguments, + // Emit raw tool call chunks - NativeToolCallParser handles state management + if (delta?.tool_calls) { + for (const toolCall of delta.tool_calls) { + // Track tool calls for logging + if (toolCall.id || toolCall.function?.name) { + toolCalls.push({ + id: toolCall.id, + name: toolCall.function?.name, + arguments: toolCall.function?.arguments, + }) + } + yield { + type: "tool_call_partial", + index: toolCall.index, + id: toolCall.id, + name: toolCall.function?.name, + arguments: toolCall.function?.arguments, + } } } + + if (chunk.usage) { + lastUsage = chunk.usage + } } - if (chunk.usage) { - lastUsage = chunk.usage + if (lastUsage) { + yield this.processUsageMetrics(lastUsage, this.getModel().info) } - } - if (lastUsage) { - yield this.processUsageMetrics(lastUsage, this.getModel().info) - } + // Process any remaining content + for (const processedChunk of matcher.final()) { + if (processedChunk.type === "text") { + accumulatedText.push(processedChunk.text) + } else if (processedChunk.type === "reasoning") { + accumulatedReasoning.push(processedChunk.text) + } + yield processedChunk + } - // Process any remaining content - for (const processedChunk of matcher.final()) { - yield processedChunk + // Log successful response + logHandle.success({ + text: accumulatedText.join(""), + reasoning: accumulatedReasoning.length > 0 ? accumulatedReasoning.join("") : undefined, + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + usage: lastUsage, + }) + } catch (error) { + logHandle.error(error) + throw error } } diff --git a/src/api/providers/base-provider.ts b/src/api/providers/base-provider.ts index 64d99b3f0c1..80599e2c80b 100644 --- a/src/api/providers/base-provider.ts +++ b/src/api/providers/base-provider.ts @@ -6,11 +6,23 @@ import type { ApiHandler, ApiHandlerCreateMessageMetadata } from "../index" import { ApiStream } from "../transform/stream" import { countTokens } from "../../utils/countTokens" import { isMcpTool } from "../../utils/mcp-name" +import { ApiInferenceLogger } from "../logging/ApiInferenceLogger" /** * Base class for API providers that implements common functionality. */ export abstract class BaseProvider implements ApiHandler { + /** + * The name of this provider, used for logging and error reporting. + */ + protected abstract readonly providerName: string + + /** + * Reference to the API inference logger singleton for logging requests/responses. + * Providers can use this to log inference calls when enabled. + */ + protected readonly inferenceLogger = ApiInferenceLogger + abstract createMessage( systemPrompt: string, messages: Anthropic.Messages.MessageParam[], diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts index 761500750d0..9a24bcb044a 100644 --- a/src/api/providers/bedrock.ts +++ b/src/api/providers/bedrock.ts @@ -200,7 +200,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH protected options: ProviderSettings private client: BedrockRuntimeClient private arnInfo: any - private readonly providerName = "Bedrock" + protected readonly providerName = "Bedrock" constructor(options: ProviderSettings) { super() diff --git a/src/api/providers/cerebras.ts b/src/api/providers/cerebras.ts index 99e7c4cc3d4..0bbba28398b 100644 --- a/src/api/providers/cerebras.ts +++ b/src/api/providers/cerebras.ts @@ -20,6 +20,7 @@ const CEREBRAS_INTEGRATION_HEADER = "X-Cerebras-3rd-Party-Integration" const CEREBRAS_INTEGRATION_NAME = "roocode" export class CerebrasHandler extends BaseProvider implements SingleCompletionHandler { + protected readonly providerName = "Cerebras" private apiKey: string private providerModels: typeof cerebrasModels private defaultProviderModelId: CerebrasModelId diff --git a/src/api/providers/claude-code.ts b/src/api/providers/claude-code.ts index cdd1cb3beb7..5baf7a3864b 100644 --- a/src/api/providers/claude-code.ts +++ b/src/api/providers/claude-code.ts @@ -20,6 +20,7 @@ import { t } from "../../i18n" import { ApiHandlerOptions } from "../../shared/api" import { countTokens } from "../../utils/countTokens" import { convertOpenAIToolsToAnthropic } from "../../core/prompts/tools/native-tools/converters" +import { BaseProvider } from "./base-provider" /** * Converts OpenAI tool_choice to Anthropic ToolChoice format @@ -64,7 +65,8 @@ function convertOpenAIToolChoice( return { type: "auto", disable_parallel_tool_use: disableParallelToolUse } } -export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler { +export class ClaudeCodeHandler extends BaseProvider implements ApiHandler, SingleCompletionHandler { + protected readonly providerName = "Claude Code" private options: ApiHandlerOptions /** * Store the last thinking block signature for interleaved thinking with tool use. @@ -75,6 +77,7 @@ export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler { private lastThinkingSignature?: string constructor(options: ApiHandlerOptions) { + super() this.options = options } @@ -114,7 +117,7 @@ export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler { return null } - async *createMessage( + override async *createMessage( systemPrompt: string, messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, @@ -182,9 +185,8 @@ export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler { thinking = { type: "disabled" } } - // Create streaming request using OAuth - const stream = createStreamingMessage({ - accessToken, + // Build request params for logging + const requestParams = { model: modelId, systemPrompt, messages, @@ -195,79 +197,122 @@ export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler { metadata: { user_id: userId, }, - }) - - // Track usage for cost calculation - let inputTokens = 0 - let outputTokens = 0 - let cacheReadTokens = 0 - let cacheWriteTokens = 0 - - for await (const chunk of stream) { - switch (chunk.type) { - case "text": - yield { - type: "text", - text: chunk.text, - } - break - - case "reasoning": - yield { - type: "reasoning", - text: chunk.text, - } - break - - case "thinking_complete": - // Capture the signature for persistence in api_conversation_history - // This enables tool use continuations where thinking blocks must be passed back - if (chunk.signature) { - this.lastThinkingSignature = chunk.signature - } - // Emit a complete thinking block with signature - // This is critical for interleaved thinking with tool use - // The signature must be included when passing thinking blocks back to the API - yield { - type: "reasoning", - text: chunk.thinking, - signature: chunk.signature, - } - break - - case "tool_call_partial": - yield { - type: "tool_call_partial", - index: chunk.index, - id: chunk.id, - name: chunk.name, - arguments: chunk.arguments, - } - break + } - case "usage": { - inputTokens = chunk.inputTokens - outputTokens = chunk.outputTokens - cacheReadTokens = chunk.cacheReadTokens || 0 - cacheWriteTokens = chunk.cacheWriteTokens || 0 - - // Claude Code is subscription-based, no per-token cost - const usageChunk: ApiStreamUsageChunk = { - type: "usage", - inputTokens, - outputTokens, - cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined, - cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined, - totalCost: 0, + // Start inference logging with request params + const logHandle = this.inferenceLogger.start( + { + provider: this.providerName, + operation: "createMessage", + model: modelId, + }, + requestParams, + ) + + // Accumulators for response logging + const accumulatedText: string[] = [] + const accumulatedReasoning: string[] = [] + const toolCalls: Array<{ id?: string; name?: string }> = [] + + try { + // Create streaming request using OAuth + const stream = createStreamingMessage({ + accessToken, + ...requestParams, + }) + + // Track usage for cost calculation + let inputTokens = 0 + let outputTokens = 0 + let cacheReadTokens = 0 + let cacheWriteTokens = 0 + let lastUsage: any + + for await (const chunk of stream) { + switch (chunk.type) { + case "text": + accumulatedText.push(chunk.text) + yield { + type: "text", + text: chunk.text, + } + break + + case "reasoning": + accumulatedReasoning.push(chunk.text) + yield { + type: "reasoning", + text: chunk.text, + } + break + + case "thinking_complete": + // Capture the signature for persistence in api_conversation_history + // This enables tool use continuations where thinking blocks must be passed back + if (chunk.signature) { + this.lastThinkingSignature = chunk.signature + } + accumulatedReasoning.push(chunk.thinking) + // Emit a complete thinking block with signature + // This is critical for interleaved thinking with tool use + // The signature must be included when passing thinking blocks back to the API + yield { + type: "reasoning", + text: chunk.thinking, + signature: chunk.signature, + } + break + + case "tool_call_partial": + if (chunk.id || chunk.name) { + toolCalls.push({ id: chunk.id, name: chunk.name }) + } + yield { + type: "tool_call_partial", + index: chunk.index, + id: chunk.id, + name: chunk.name, + arguments: chunk.arguments, + } + break + + case "usage": { + inputTokens = chunk.inputTokens + outputTokens = chunk.outputTokens + cacheReadTokens = chunk.cacheReadTokens || 0 + cacheWriteTokens = chunk.cacheWriteTokens || 0 + + // Claude Code is subscription-based, no per-token cost + const usageChunk: ApiStreamUsageChunk = { + type: "usage", + inputTokens, + outputTokens, + cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined, + cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined, + totalCost: 0, + } + + lastUsage = usageChunk + + yield usageChunk + break } - yield usageChunk - break + case "error": + throw new Error(chunk.error) } - - case "error": - throw new Error(chunk.error) } + + // Log successful response + logHandle.success({ + text: accumulatedText.join(""), + reasoning: accumulatedReasoning.length > 0 ? accumulatedReasoning.join("") : undefined, + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + usage: lastUsage, + }) + } catch (error) { + logHandle.error(error) + throw error } } @@ -284,7 +329,7 @@ export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler { } } - async countTokens(content: Anthropic.Messages.ContentBlockParam[]): Promise { + override async countTokens(content: Anthropic.Messages.ContentBlockParam[]): Promise { if (content.length === 0) { return 0 } diff --git a/src/api/providers/gemini.ts b/src/api/providers/gemini.ts index 4402e3e0177..f8e86e74aeb 100644 --- a/src/api/providers/gemini.ts +++ b/src/api/providers/gemini.ts @@ -40,7 +40,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl private client: GoogleGenAI private lastThoughtSignature?: string private lastResponseId?: string - private readonly providerName = "Gemini" + protected readonly providerName = "Gemini" constructor({ isVertex, ...options }: GeminiHandlerOptions) { super() diff --git a/src/api/providers/huggingface.ts b/src/api/providers/huggingface.ts index 7b62046b99e..b9e0fd6f928 100644 --- a/src/api/providers/huggingface.ts +++ b/src/api/providers/huggingface.ts @@ -14,7 +14,7 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion private client: OpenAI private options: ApiHandlerOptions private modelCache: ModelRecord | null = null - private readonly providerName = "HuggingFace" + protected readonly providerName = "HuggingFace" constructor(options: ApiHandlerOptions) { super() diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts index 102c108dcee..29b48a7d2f3 100644 --- a/src/api/providers/lm-studio.ts +++ b/src/api/providers/lm-studio.ts @@ -21,7 +21,7 @@ import { handleOpenAIError } from "./utils/openai-error-handler" export class LmStudioHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: OpenAI - private readonly providerName = "LM Studio" + protected readonly providerName = "LM Studio" constructor(options: ApiHandlerOptions) { super() diff --git a/src/api/providers/minimax.ts b/src/api/providers/minimax.ts index 12d7934546e..07d3d50c42e 100644 --- a/src/api/providers/minimax.ts +++ b/src/api/providers/minimax.ts @@ -50,6 +50,7 @@ function convertOpenAIToolChoice( } export class MiniMaxHandler extends BaseProvider implements SingleCompletionHandler { + protected readonly providerName = "MiniMax" private options: ApiHandlerOptions private client: Anthropic diff --git a/src/api/providers/mistral.ts b/src/api/providers/mistral.ts index 95739cdcf73..c06083a505f 100644 --- a/src/api/providers/mistral.ts +++ b/src/api/providers/mistral.ts @@ -51,7 +51,7 @@ type MistralTool = { export class MistralHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: Mistral - private readonly providerName = "Mistral" + protected readonly providerName = "Mistral" constructor(options: ApiHandlerOptions) { super() diff --git a/src/api/providers/native-ollama.ts b/src/api/providers/native-ollama.ts index 712b70445cc..7725a1e18e6 100644 --- a/src/api/providers/native-ollama.ts +++ b/src/api/providers/native-ollama.ts @@ -146,6 +146,7 @@ function convertToOllamaMessages(anthropicMessages: Anthropic.Messages.MessagePa } export class NativeOllamaHandler extends BaseProvider implements SingleCompletionHandler { + protected readonly providerName = "Ollama" protected options: ApiHandlerOptions private client: Ollama | undefined protected models: Record = {} diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index 8f9cc2297f8..4506cac80ec 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -31,7 +31,7 @@ export type OpenAiNativeModel = ReturnType export class OpenAiNativeHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: OpenAI - private readonly providerName = "OpenAI Native" + protected readonly providerName = "OpenAI Native" // Resolved service tier from Responses API (actual tier used by OpenAI) private lastServiceTier: ServiceTier | undefined // Complete response output array (includes reasoning items with encrypted_content) @@ -175,8 +175,54 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio metadata, ) - // Make the request (pass systemPrompt and messages for potential retry) - yield* this.executeRequest(requestBody, model, metadata, systemPrompt, messages) + // Start inference logging with actual request body + const logHandle = this.inferenceLogger.start( + { + provider: this.providerName, + operation: "createMessage", + model: model.id, + }, + requestBody, + ) + + // Accumulators for response logging + const accumulatedText: string[] = [] + const accumulatedReasoning: string[] = [] + const toolCalls: Array<{ id?: string; name?: string }> = [] + let lastUsage: any + + try { + // Make the request (pass systemPrompt and messages for potential retry) + for await (const chunk of this.executeRequest(requestBody, model, metadata, systemPrompt, messages)) { + // Accumulate for logging + if (chunk.type === "text") { + accumulatedText.push(chunk.text) + } else if (chunk.type === "reasoning") { + accumulatedReasoning.push(chunk.text) + } else if (chunk.type === "tool_call" || chunk.type === "tool_call_partial") { + if (chunk.id || chunk.name) { + toolCalls.push({ id: chunk.id, name: chunk.name }) + } + } else if (chunk.type === "usage") { + lastUsage = chunk + } + + yield chunk + } + + // Log successful response + logHandle.success({ + text: accumulatedText.join(""), + reasoning: accumulatedReasoning.length > 0 ? accumulatedReasoning.join("") : undefined, + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + usage: lastUsage, + responseId: this.lastResponseId, + responseOutput: this.lastResponseOutput, + }) + } catch (error) { + logHandle.error(error) + throw error + } } private buildRequestBody( diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index d6f50d02691..d3acd28361f 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -32,7 +32,7 @@ import { handleOpenAIError } from "./utils/openai-error-handler" export class OpenAiHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions protected client: OpenAI - private readonly providerName = "OpenAI" + protected readonly providerName = "OpenAI" constructor(options: ApiHandlerOptions) { super() @@ -94,6 +94,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format const ark = modelUrl.includes(".volces.com") + // Accumulators for final response logging + const accumulatedText: string[] = [] + const accumulatedReasoning: string[] = [] + const toolCalls: Array<{ id?: string; name?: string }> = [] + let lastUsage: any + + // Handle O3 family models separately with their own logging if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) { yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages, metadata) return @@ -174,60 +181,102 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl // Add max_tokens if needed this.addMaxTokensIfNeeded(requestOptions, modelInfo) - let stream + // Start inference logging with actual request params + const logHandle = this.inferenceLogger.start( + { + provider: this.providerName, + operation: "createMessage", + model: modelId, + }, + requestOptions, + ) + try { - stream = await this.client.chat.completions.create( - requestOptions, - isAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {}, + let stream + try { + stream = await this.client.chat.completions.create( + requestOptions, + isAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {}, + ) + } catch (error) { + throw handleOpenAIError(error, this.providerName) + } + + const matcher = new XmlMatcher( + "think", + (chunk) => + ({ + type: chunk.matched ? "reasoning" : "text", + text: chunk.data, + }) as const, ) - } catch (error) { - throw handleOpenAIError(error, this.providerName) - } - const matcher = new XmlMatcher( - "think", - (chunk) => - ({ - type: chunk.matched ? "reasoning" : "text", - text: chunk.data, - }) as const, - ) + const activeToolCallIds = new Set() - let lastUsage - const activeToolCallIds = new Set() + for await (const chunk of stream) { + const delta = chunk.choices?.[0]?.delta ?? {} + const finishReason = chunk.choices?.[0]?.finish_reason - for await (const chunk of stream) { - const delta = chunk.choices?.[0]?.delta ?? {} - const finishReason = chunk.choices?.[0]?.finish_reason + if (delta.content) { + for (const matchedChunk of matcher.update(delta.content)) { + if (matchedChunk.type === "text") { + accumulatedText.push(matchedChunk.text) + } else if (matchedChunk.type === "reasoning") { + accumulatedReasoning.push(matchedChunk.text) + } + yield matchedChunk + } + } - if (delta.content) { - for (const chunk of matcher.update(delta.content)) { - yield chunk + if ("reasoning_content" in delta && delta.reasoning_content) { + accumulatedReasoning.push((delta.reasoning_content as string | undefined) || "") + yield { + type: "reasoning", + text: (delta.reasoning_content as string | undefined) || "", + } } - } - if ("reasoning_content" in delta && delta.reasoning_content) { - yield { - type: "reasoning", - text: (delta.reasoning_content as string | undefined) || "", + // Track tool calls for logging and use processToolCalls for proper tool_call_end events + if (delta.tool_calls) { + for (const toolCall of delta.tool_calls) { + if (toolCall.id || toolCall.function?.name) { + toolCalls.push({ id: toolCall.id, name: toolCall.function?.name }) + } + } } - } + yield* this.processToolCalls(delta, finishReason, activeToolCallIds) - yield* this.processToolCalls(delta, finishReason, activeToolCallIds) + if (chunk.usage) { + lastUsage = chunk.usage + } + } - if (chunk.usage) { - lastUsage = chunk.usage + for (const matchedChunk of matcher.final()) { + if (matchedChunk.type === "text") { + accumulatedText.push(matchedChunk.text) + } else if (matchedChunk.type === "reasoning") { + accumulatedReasoning.push(matchedChunk.text) + } + yield matchedChunk } - } - for (const chunk of matcher.final()) { - yield chunk - } + if (lastUsage) { + yield this.processUsageMetrics(lastUsage, modelInfo) + } - if (lastUsage) { - yield this.processUsageMetrics(lastUsage, modelInfo) + // Log successful response + logHandle.success({ + text: accumulatedText.join(""), + reasoning: accumulatedReasoning.length > 0 ? accumulatedReasoning.join("") : undefined, + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + usage: lastUsage, + }) + } catch (error) { + logHandle.error(error) + throw error } } else { + // Non-streaming path const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { model: modelId, messages: deepseekReasoner @@ -245,37 +294,63 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl // Add max_tokens if needed this.addMaxTokensIfNeeded(requestOptions, modelInfo) - let response - try { - response = await this.client.chat.completions.create( - requestOptions, - this._isAzureAiInference(modelUrl) ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {}, - ) - } catch (error) { - throw handleOpenAIError(error, this.providerName) - } + // Start inference logging with actual request params + const logHandle = this.inferenceLogger.start( + { + provider: this.providerName, + operation: "createMessage", + model: modelId, + }, + requestOptions, + ) - const message = response.choices?.[0]?.message + try { + let response + try { + response = await this.client.chat.completions.create( + requestOptions, + this._isAzureAiInference(modelUrl) ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {}, + ) + } catch (error) { + throw handleOpenAIError(error, this.providerName) + } - if (message?.tool_calls) { - for (const toolCall of message.tool_calls) { - if (toolCall.type === "function") { - yield { - type: "tool_call", - id: toolCall.id, - name: toolCall.function.name, - arguments: toolCall.function.arguments, + const message = response.choices?.[0]?.message + + if (message?.tool_calls) { + for (const toolCall of message.tool_calls) { + if (toolCall.type === "function") { + toolCalls.push({ id: toolCall.id, name: toolCall.function.name }) + yield { + type: "tool_call", + id: toolCall.id, + name: toolCall.function.name, + arguments: toolCall.function.arguments, + } } } } - } - yield { - type: "text", - text: message?.content || "", - } + accumulatedText.push(message?.content || "") + yield { + type: "text", + text: message?.content || "", + } + + lastUsage = response.usage + yield this.processUsageMetrics(response.usage, modelInfo) - yield this.processUsageMetrics(response.usage, modelInfo) + // Log successful response + logHandle.success({ + text: accumulatedText.join(""), + reasoning: accumulatedReasoning.length > 0 ? accumulatedReasoning.join("") : undefined, + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + usage: lastUsage, + }) + } catch (error) { + logHandle.error(error) + throw error + } } } @@ -339,6 +414,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const modelInfo = this.getModel().info const methodIsAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl) + // Accumulators for response logging + const accumulatedText: string[] = [] + const toolCalls: Array<{ id?: string; name?: string }> = [] + let lastUsage: any + if (this.options.openAiStreamingEnabled ?? true) { const isGrokXAI = this._isGrokXAI(this.options.openAiBaseUrl) @@ -367,17 +447,73 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl // This allows O3 models to limit response length when includeMaxTokens is enabled this.addMaxTokensIfNeeded(requestOptions, modelInfo) - let stream + // Start inference logging with actual request params + const logHandle = this.inferenceLogger.start( + { + provider: this.providerName, + operation: "createMessage", + model: modelId, + }, + requestOptions, + ) + try { - stream = await this.client.chat.completions.create( - requestOptions, - methodIsAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {}, - ) + let stream + try { + stream = await this.client.chat.completions.create( + requestOptions, + methodIsAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {}, + ) + } catch (error) { + throw handleOpenAIError(error, this.providerName) + } + + const activeToolCallIds = new Set() + + for await (const chunk of stream) { + const delta = chunk.choices?.[0]?.delta + const finishReason = chunk.choices?.[0]?.finish_reason + + if (delta) { + if (delta.content) { + accumulatedText.push(delta.content) + yield { + type: "text", + text: delta.content, + } + } + + // Track tool calls for logging and use processToolCalls for proper tool_call_end events + if (delta.tool_calls) { + for (const toolCall of delta.tool_calls) { + if (toolCall.id || toolCall.function?.name) { + toolCalls.push({ id: toolCall.id, name: toolCall.function?.name }) + } + } + } + yield* this.processToolCalls(delta, finishReason, activeToolCallIds) + } + + if (chunk.usage) { + lastUsage = chunk.usage + yield { + type: "usage", + inputTokens: chunk.usage.prompt_tokens || 0, + outputTokens: chunk.usage.completion_tokens || 0, + } + } + } + + // Log successful response + logHandle.success({ + text: accumulatedText.join(""), + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + usage: lastUsage, + }) } catch (error) { - throw handleOpenAIError(error, this.providerName) + logHandle.error(error) + throw error } - - yield* this.handleStreamResponse(stream) } else { const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { model: modelId, @@ -402,35 +538,61 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl // This allows O3 models to limit response length when includeMaxTokens is enabled this.addMaxTokensIfNeeded(requestOptions, modelInfo) - let response + // Start inference logging with actual request params + const logHandle = this.inferenceLogger.start( + { + provider: this.providerName, + operation: "createMessage", + model: modelId, + }, + requestOptions, + ) + try { - response = await this.client.chat.completions.create( - requestOptions, - methodIsAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {}, - ) - } catch (error) { - throw handleOpenAIError(error, this.providerName) - } + let response + try { + response = await this.client.chat.completions.create( + requestOptions, + methodIsAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {}, + ) + } catch (error) { + throw handleOpenAIError(error, this.providerName) + } - const message = response.choices?.[0]?.message - if (message?.tool_calls) { - for (const toolCall of message.tool_calls) { - if (toolCall.type === "function") { - yield { - type: "tool_call", - id: toolCall.id, - name: toolCall.function.name, - arguments: toolCall.function.arguments, + const message = response.choices?.[0]?.message + if (message?.tool_calls) { + for (const toolCall of message.tool_calls) { + if (toolCall.type === "function") { + toolCalls.push({ id: toolCall.id, name: toolCall.function.name }) + yield { + type: "tool_call", + id: toolCall.id, + name: toolCall.function.name, + arguments: toolCall.function.arguments, + } } } } - } - yield { - type: "text", - text: message?.content || "", + accumulatedText.push(message?.content || "") + yield { + type: "text", + text: message?.content || "", + } + + lastUsage = response.usage + yield this.processUsageMetrics(response.usage) + + // Log successful response + logHandle.success({ + text: accumulatedText.join(""), + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + usage: lastUsage, + }) + } catch (error) { + logHandle.error(error) + throw error } - yield this.processUsageMetrics(response.usage) } } diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index 5b8c29c337a..bb6835d6455 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -140,7 +140,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH private client: OpenAI protected models: ModelRecord = {} protected endpoints: ModelRecord = {} - private readonly providerName = "OpenRouter" + protected readonly providerName = "OpenRouter" private currentReasoningDetails: any[] = [] constructor(options: ApiHandlerOptions) { @@ -322,186 +322,223 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH ? { headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } } : undefined - let stream - try { - stream = await this.client.chat.completions.create(completionParams, requestOptions) - } catch (error) { - // Try to parse as OpenRouter error structure using Zod - const parseResult = OpenRouterErrorResponseSchema.safeParse(error) - - if (parseResult.success && parseResult.data.error) { - const openRouterError = parseResult.data - const rawString = openRouterError.error?.metadata?.raw - const parsedError = extractErrorFromMetadataRaw(rawString) - const rawErrorMessage = parsedError || openRouterError.error?.message || "Unknown error" - - const apiError = Object.assign( - new ApiProviderError( - rawErrorMessage, - this.providerName, - modelId, - "createMessage", - openRouterError.error?.code, - ), - { - status: openRouterError.error?.code, - error: openRouterError.error, - }, - ) + // Start inference logging with actual request params + const logHandle = this.inferenceLogger.start( + { + provider: this.providerName, + operation: "createMessage", + model: modelId, + }, + { completionParams, requestOptions }, + ) - TelemetryService.instance.captureException(apiError) - throw handleOpenAIError(error, this.providerName) - } else { - // Fallback for non-OpenRouter errors - const errorMessage = error instanceof Error ? error.message : String(error) - const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "createMessage") - TelemetryService.instance.captureException(apiError) - throw handleOpenAIError(error, this.providerName) - } - } + // Accumulators for response logging + const accumulatedText: string[] = [] + const accumulatedReasoning: string[] = [] + const toolCalls: Array<{ id?: string; name?: string }> = [] - let lastUsage: CompletionUsage | undefined = undefined - // Accumulator for reasoning_details: accumulate text by type-index key - const reasoningDetailsAccumulator = new Map< - string, - { - type: string - text?: string - summary?: string - data?: string - id?: string | null - format?: string - signature?: string - index: number + try { + let stream + try { + stream = await this.client.chat.completions.create(completionParams, requestOptions) + } catch (error) { + // Try to parse as OpenRouter error structure using Zod + const parseResult = OpenRouterErrorResponseSchema.safeParse(error) + + if (parseResult.success && parseResult.data.error) { + const openRouterError = parseResult.data + const rawString = openRouterError.error?.metadata?.raw + const parsedError = extractErrorFromMetadataRaw(rawString) + const rawErrorMessage = parsedError || openRouterError.error?.message || "Unknown error" + + const apiError = Object.assign( + new ApiProviderError( + rawErrorMessage, + this.providerName, + modelId, + "createMessage", + openRouterError.error?.code, + ), + { + status: openRouterError.error?.code, + error: openRouterError.error, + }, + ) + + TelemetryService.instance.captureException(apiError) + throw handleOpenAIError(error, this.providerName) + } else { + // Fallback for non-OpenRouter errors + const errorMessage = error instanceof Error ? error.message : String(error) + const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "createMessage") + TelemetryService.instance.captureException(apiError) + throw handleOpenAIError(error, this.providerName) + } } - >() - for await (const chunk of stream) { - // OpenRouter returns an error object instead of the OpenAI SDK throwing an error. - if ("error" in chunk) { - this.handleStreamingError(chunk.error as OpenRouterError, modelId, "createMessage") - } + let lastUsage: CompletionUsage | undefined = undefined + // Accumulator for reasoning_details: accumulate text by type-index key + const reasoningDetailsAccumulator = new Map< + string, + { + type: string + text?: string + summary?: string + data?: string + id?: string | null + format?: string + signature?: string + index: number + } + >() - const delta = chunk.choices[0]?.delta - const finishReason = chunk.choices[0]?.finish_reason - - if (delta) { - // Handle reasoning_details array format (used by Gemini 3, Claude, OpenAI o-series, etc.) - // See: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks - // Priority: Check for reasoning_details first, as it's the newer format - const deltaWithReasoning = delta as typeof delta & { - reasoning_details?: Array<{ - type: string - text?: string - summary?: string - data?: string - id?: string | null - format?: string - signature?: string - index?: number - }> + for await (const chunk of stream) { + // OpenRouter returns an error object instead of the OpenAI SDK throwing an error. + if ("error" in chunk) { + logHandle.error(chunk.error) + this.handleStreamingError(chunk.error as OpenRouterError, modelId, "createMessage") } - if (deltaWithReasoning.reasoning_details && Array.isArray(deltaWithReasoning.reasoning_details)) { - for (const detail of deltaWithReasoning.reasoning_details) { - const index = detail.index ?? 0 - const key = `${detail.type}-${index}` - const existing = reasoningDetailsAccumulator.get(key) + const delta = chunk.choices[0]?.delta + const finishReason = chunk.choices[0]?.finish_reason + + if (delta) { + // Handle reasoning_details array format (used by Gemini 3, Claude, OpenAI o-series, etc.) + // See: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks + // Priority: Check for reasoning_details first, as it's the newer format + const deltaWithReasoning = delta as typeof delta & { + reasoning_details?: Array<{ + type: string + text?: string + summary?: string + data?: string + id?: string | null + format?: string + signature?: string + index?: number + }> + } - if (existing) { - // Accumulate text/summary/data for existing reasoning detail - if (detail.text !== undefined) { - existing.text = (existing.text || "") + detail.text + if (deltaWithReasoning.reasoning_details && Array.isArray(deltaWithReasoning.reasoning_details)) { + for (const detail of deltaWithReasoning.reasoning_details) { + const index = detail.index ?? 0 + const key = `${detail.type}-${index}` + const existing = reasoningDetailsAccumulator.get(key) + + if (existing) { + // Accumulate text/summary/data for existing reasoning detail + if (detail.text !== undefined) { + existing.text = (existing.text || "") + detail.text + } + if (detail.summary !== undefined) { + existing.summary = (existing.summary || "") + detail.summary + } + if (detail.data !== undefined) { + existing.data = (existing.data || "") + detail.data + } + // Update other fields if provided + if (detail.id !== undefined) existing.id = detail.id + if (detail.format !== undefined) existing.format = detail.format + if (detail.signature !== undefined) existing.signature = detail.signature + } else { + // Start new reasoning detail accumulation + reasoningDetailsAccumulator.set(key, { + type: detail.type, + text: detail.text, + summary: detail.summary, + data: detail.data, + id: detail.id, + format: detail.format, + signature: detail.signature, + index, + }) } - if (detail.summary !== undefined) { - existing.summary = (existing.summary || "") + detail.summary + + // Yield text for display (still fragmented for live streaming) and accumulate + let reasoningText: string | undefined + if (detail.type === "reasoning.text" && typeof detail.text === "string") { + reasoningText = detail.text + } else if (detail.type === "reasoning.summary" && typeof detail.summary === "string") { + reasoningText = detail.summary } - if (detail.data !== undefined) { - existing.data = (existing.data || "") + detail.data + // Note: reasoning.encrypted types are intentionally skipped as they contain redacted content + + if (reasoningText) { + accumulatedReasoning.push(reasoningText) + yield { type: "reasoning", text: reasoningText } } - // Update other fields if provided - if (detail.id !== undefined) existing.id = detail.id - if (detail.format !== undefined) existing.format = detail.format - if (detail.signature !== undefined) existing.signature = detail.signature - } else { - // Start new reasoning detail accumulation - reasoningDetailsAccumulator.set(key, { - type: detail.type, - text: detail.text, - summary: detail.summary, - data: detail.data, - id: detail.id, - format: detail.format, - signature: detail.signature, - index, - }) } + } else if ("reasoning" in delta && delta.reasoning && typeof delta.reasoning === "string") { + // Handle legacy reasoning format - only if reasoning_details is not present + // See: https://openrouter.ai/docs/use-cases/reasoning-tokens + accumulatedReasoning.push(delta.reasoning) + yield { type: "reasoning", text: delta.reasoning } + } - // Yield text for display (still fragmented for live streaming) - let reasoningText: string | undefined - if (detail.type === "reasoning.text" && typeof detail.text === "string") { - reasoningText = detail.text - } else if (detail.type === "reasoning.summary" && typeof detail.summary === "string") { - reasoningText = detail.summary + // Emit raw tool call chunks - NativeToolCallParser handles state management + if ("tool_calls" in delta && Array.isArray(delta.tool_calls)) { + for (const toolCall of delta.tool_calls) { + if (toolCall.id || toolCall.function?.name) { + toolCalls.push({ id: toolCall.id, name: toolCall.function?.name }) + } + yield { + type: "tool_call_partial", + index: toolCall.index, + id: toolCall.id, + name: toolCall.function?.name, + arguments: toolCall.function?.arguments, + } } - // Note: reasoning.encrypted types are intentionally skipped as they contain redacted content + } - if (reasoningText) { - yield { type: "reasoning", text: reasoningText } - } + if (delta.content) { + accumulatedText.push(delta.content) + yield { type: "text", text: delta.content } } - } else if ("reasoning" in delta && delta.reasoning && typeof delta.reasoning === "string") { - // Handle legacy reasoning format - only if reasoning_details is not present - // See: https://openrouter.ai/docs/use-cases/reasoning-tokens - yield { type: "reasoning", text: delta.reasoning } } - // Emit raw tool call chunks - NativeToolCallParser handles state management - if ("tool_calls" in delta && Array.isArray(delta.tool_calls)) { - for (const toolCall of delta.tool_calls) { - yield { - type: "tool_call_partial", - index: toolCall.index, - id: toolCall.id, - name: toolCall.function?.name, - arguments: toolCall.function?.arguments, - } + // Process finish_reason to emit tool_call_end events + // This ensures tool calls are finalized even if the stream doesn't properly close + if (finishReason) { + const endEvents = NativeToolCallParser.processFinishReason(finishReason) + for (const event of endEvents) { + yield event } } - if (delta.content) { - yield { type: "text", text: delta.content } + if (chunk.usage) { + lastUsage = chunk.usage } } - // Process finish_reason to emit tool_call_end events - // This ensures tool calls are finalized even if the stream doesn't properly close - if (finishReason) { - const endEvents = NativeToolCallParser.processFinishReason(finishReason) - for (const event of endEvents) { - yield event - } + // After streaming completes, store the accumulated reasoning_details + if (reasoningDetailsAccumulator.size > 0) { + this.currentReasoningDetails = Array.from(reasoningDetailsAccumulator.values()) } - if (chunk.usage) { - lastUsage = chunk.usage + if (lastUsage) { + yield { + type: "usage", + inputTokens: lastUsage.prompt_tokens || 0, + outputTokens: lastUsage.completion_tokens || 0, + cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens, + reasoningTokens: lastUsage.completion_tokens_details?.reasoning_tokens, + totalCost: (lastUsage.cost_details?.upstream_inference_cost || 0) + (lastUsage.cost || 0), + } } - } - - // After streaming completes, store the accumulated reasoning_details - if (reasoningDetailsAccumulator.size > 0) { - this.currentReasoningDetails = Array.from(reasoningDetailsAccumulator.values()) - } - if (lastUsage) { - yield { - type: "usage", - inputTokens: lastUsage.prompt_tokens || 0, - outputTokens: lastUsage.completion_tokens || 0, - cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens, - reasoningTokens: lastUsage.completion_tokens_details?.reasoning_tokens, - totalCost: (lastUsage.cost_details?.upstream_inference_cost || 0) + (lastUsage.cost || 0), - } + // Log successful response + logHandle.success({ + text: accumulatedText.join(""), + reasoning: accumulatedReasoning.length > 0 ? accumulatedReasoning.join("") : undefined, + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + usage: lastUsage, + reasoningDetails: + reasoningDetailsAccumulator.size > 0 ? Array.from(reasoningDetailsAccumulator.values()) : undefined, + }) + } catch (error) { + logHandle.error(error) + throw error } } diff --git a/src/api/providers/qwen-code.ts b/src/api/providers/qwen-code.ts index 8f26273ebaf..cbeaac223f2 100644 --- a/src/api/providers/qwen-code.ts +++ b/src/api/providers/qwen-code.ts @@ -52,6 +52,7 @@ function objectToUrlEncoded(data: Record): string { } export class QwenCodeHandler extends BaseProvider implements SingleCompletionHandler { + protected readonly providerName = "Qwen Code" protected options: QwenCodeHandlerOptions private credentials: QwenOAuthCredentials | null = null private client: OpenAI | undefined diff --git a/src/api/providers/requesty.ts b/src/api/providers/requesty.ts index 85efeb800f1..f2f9a213e57 100644 --- a/src/api/providers/requesty.ts +++ b/src/api/providers/requesty.ts @@ -61,7 +61,7 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan protected models: ModelRecord = {} private client: OpenAI private baseURL: string - private readonly providerName = "Requesty" + protected readonly providerName = "Requesty" constructor(options: ApiHandlerOptions) { super() diff --git a/src/api/providers/roo.ts b/src/api/providers/roo.ts index 83eab87ef7e..c392a11aca4 100644 --- a/src/api/providers/roo.ts +++ b/src/api/providers/roo.ts @@ -125,6 +125,41 @@ export class RooHandler extends BaseOpenAiCompatibleProvider { messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { + const { id: model, info } = this.getModel() + + // Get model parameters for logging + const params = getModelParams({ + format: "openai", + modelId: model, + model: info, + settings: this.options, + defaultTemperature: this.defaultTemperature, + }) + + // Start inference logging + const logHandle = this.inferenceLogger.start( + { + provider: this.providerName, + operation: "createMessage", + model, + taskId: metadata?.taskId, + }, + { + model, + maxTokens: params.maxTokens, + temperature: params.temperature, + messageCount: messages.length, + hasTools: !!metadata?.tools, + toolCount: metadata?.tools?.length ?? 0, + toolChoice: metadata?.tool_choice, + }, + ) + + // Accumulators for final response logging + const accumulatedText: string[] = [] + const accumulatedReasoning: string[] = [] + const toolCalls: Array<{ id?: string; name?: string }> = [] + try { // Reset reasoning_details accumulator for this request this.currentReasoningDetails = [] @@ -221,17 +256,20 @@ export class RooHandler extends BaseOpenAiCompatibleProvider { // Note: reasoning.encrypted types are intentionally skipped as they contain redacted content if (reasoningText) { + accumulatedReasoning.push(reasoningText) yield { type: "reasoning", text: reasoningText } } } } else if ("reasoning" in delta && delta.reasoning && typeof delta.reasoning === "string") { // Handle legacy reasoning format - only if reasoning_details is not present + accumulatedReasoning.push(delta.reasoning) yield { type: "reasoning", text: delta.reasoning, } } else if ("reasoning_content" in delta && typeof delta.reasoning_content === "string") { // Also check for reasoning_content for backward compatibility + accumulatedReasoning.push(delta.reasoning_content) yield { type: "reasoning", text: delta.reasoning_content, @@ -241,6 +279,10 @@ export class RooHandler extends BaseOpenAiCompatibleProvider { // Emit raw tool call chunks - NativeToolCallParser handles state management if ("tool_calls" in delta && Array.isArray(delta.tool_calls)) { for (const toolCall of delta.tool_calls) { + // Track tool calls for logging + if (toolCall.id || toolCall.function?.name) { + toolCalls.push({ id: toolCall.id, name: toolCall.function?.name }) + } yield { type: "tool_call_partial", index: toolCall.index, @@ -252,6 +294,7 @@ export class RooHandler extends BaseOpenAiCompatibleProvider { } if (delta.content) { + accumulatedText.push(delta.content) yield { type: "text", text: delta.content, @@ -303,7 +346,17 @@ export class RooHandler extends BaseOpenAiCompatibleProvider { totalCost: isFreeModel ? 0 : (lastUsage.cost ?? 0), } } + + // Log successful response + logHandle.success({ + text: accumulatedText.join(""), + reasoning: accumulatedReasoning.length > 0 ? accumulatedReasoning.join("") : undefined, + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + usage: lastUsage, + }) } catch (error) { + logHandle.error(error) + const errorContext = { error: error instanceof Error ? error.message : String(error), stack: error instanceof Error ? error.stack : undefined, diff --git a/src/api/providers/router-provider.ts b/src/api/providers/router-provider.ts index 01942e21723..8b56ca9e1a6 100644 --- a/src/api/providers/router-provider.ts +++ b/src/api/providers/router-provider.ts @@ -22,6 +22,10 @@ type RouterProviderOptions = { export abstract class RouterProvider extends BaseProvider { protected readonly options: ApiHandlerOptions protected readonly name: RouterName + // Implement abstract providerName from BaseProvider using name + protected get providerName(): string { + return this.name + } protected models: ModelRecord = {} protected readonly modelId?: string protected readonly defaultModelId: string diff --git a/src/api/providers/vscode-lm.ts b/src/api/providers/vscode-lm.ts index 5c598ccd012..8770bbf1ddf 100644 --- a/src/api/providers/vscode-lm.ts +++ b/src/api/providers/vscode-lm.ts @@ -61,6 +61,7 @@ function convertToVsCodeLmTools(tools: OpenAI.Chat.ChatCompletionTool[]): vscode * ``` */ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHandler { + protected readonly providerName = "VS Code LM" protected options: ApiHandlerOptions private client: vscode.LanguageModelChat | null private disposable: vscode.Disposable | null diff --git a/src/api/providers/xai.ts b/src/api/providers/xai.ts index a1377a1317a..aa75f643e3a 100644 --- a/src/api/providers/xai.ts +++ b/src/api/providers/xai.ts @@ -21,7 +21,7 @@ const XAI_DEFAULT_TEMPERATURE = 0 export class XAIHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: OpenAI - private readonly providerName = "xAI" + protected readonly providerName = "xAI" constructor(options: ApiHandlerOptions) { super() diff --git a/src/core/condense/__tests__/condense.spec.ts b/src/core/condense/__tests__/condense.spec.ts index 2558ee5b33a..e795f5890d9 100644 --- a/src/core/condense/__tests__/condense.spec.ts +++ b/src/core/condense/__tests__/condense.spec.ts @@ -15,6 +15,7 @@ import { // Create a mock ApiHandler for testing class MockApiHandler extends BaseProvider { + protected readonly providerName = "Mock" createMessage(): any { // Mock implementation for testing - returns an async iterable stream const mockStream = { diff --git a/src/core/context-management/__tests__/context-management.spec.ts b/src/core/context-management/__tests__/context-management.spec.ts index 3ee36fc5956..4dfec368b44 100644 --- a/src/core/context-management/__tests__/context-management.spec.ts +++ b/src/core/context-management/__tests__/context-management.spec.ts @@ -19,6 +19,7 @@ import { // Create a mock ApiHandler for testing class MockApiHandler extends BaseProvider { + protected readonly providerName = "Mock" createMessage(): any { // Mock implementation for testing - returns an async iterable stream const mockStream = { diff --git a/src/extension.ts b/src/extension.ts index dcb941fa581..cce11e9813d 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -2,11 +2,14 @@ import * as vscode from "vscode" import * as dotenvx from "@dotenvx/dotenvx" import * as path from "path" -// Load environment variables from .env file +// Load environment variables from .env and .env.local files try { - // Specify path to .env file in the project root directory + // Specify paths to .env and .env.local files in the project root directory const envPath = path.join(__dirname, "..", ".env") + const envLocalPath = path.join(__dirname, "..", ".env.local") + // Load .env first, then .env.local (so .env.local can override) dotenvx.config({ path: envPath }) + dotenvx.config({ path: envLocalPath, override: true }) } catch (e) { // Silently handle environment loading errors console.warn("Failed to load environment variables:", e) @@ -19,6 +22,7 @@ import { customToolRegistry } from "@roo-code/core" import "./utils/path" // Necessary to have access to String.prototype.toPosix. import { createOutputChannelLogger, createDualLogger } from "./utils/outputChannelLogger" +import { ApiInferenceLogger } from "./api/logging/ApiInferenceLogger" import { Package } from "./shared/package" import { formatLanguage } from "./shared/language" @@ -68,6 +72,12 @@ export async function activate(context: vscode.ExtensionContext) { context.subscriptions.push(outputChannel) outputChannel.appendLine(`${Package.name} extension activated - ${JSON.stringify(Package)}`) + // Configure API inference logger for debugging (enable via ROO_CODE_API_LOGGING=true) + ApiInferenceLogger.configure({ + enabled: process.env.ROO_CODE_API_LOGGING === "true", + sink: createOutputChannelLogger(outputChannel), + }) + // Set extension path for custom tool registry to find bundled esbuild customToolRegistry.setExtensionPath(context.extensionPath)