diff --git a/src/api/logging/ApiInferenceLogger.ts b/src/api/logging/ApiInferenceLogger.ts
new file mode 100644
index 00000000000..725dc6480f8
--- /dev/null
+++ b/src/api/logging/ApiInferenceLogger.ts
@@ -0,0 +1,359 @@
+/**
+ * Lightweight logger for API inference requests/responses.
+ *
+ * This logger is designed to capture raw inference inputs/outputs across providers
+ * for debugging purposes. It emits structured objects to a configurable sink.
+ *
+ * For streaming requests, only the final assembled response is logged (not individual chunks).
+ *
+ * Enable via environment variable: process.env.ROO_CODE_API_LOGGING === "true"
+ */
+
+export interface ApiInferenceLoggerConfig {
+	enabled: boolean
+	sink: (...args: unknown[]) => void
+}
+
+export interface ApiInferenceContext {
+	provider: string
+	operation: string
+	model?: string
+	taskId?: string
+	requestId?: string
+}
+
+export interface ApiInferenceHandle {
+	success: (responsePayload: unknown) => void
+	error: (errorPayload: unknown) => void
+}
+
+/**
+ * Configuration for payload size limiting to avoid freezing the Output Channel.
+ */
+const PAYLOAD_LIMITS = {
+	/** Maximum string length before truncation */
+	MAX_STRING_LENGTH: 10_000,
+	/** Maximum array entries to log */
+	MAX_ARRAY_LENGTH: 200,
+	/** Maximum object keys to log */
+	MAX_OBJECT_KEYS: 200,
+}
+
+/**
+ * Regex pattern for detecting base64 image data URLs.
+ */
+const BASE64_IMAGE_PATTERN = /^data:image\/[^;]+;base64,/
+
+/**
+ * Secret field patterns to redact in logged payloads.
+ * Case-insensitive matching is applied.
+ * Note: Patterns are designed to avoid false positives (e.g., "inputTokens" should not be redacted).
+ */
+const SECRET_PATTERNS = [
+	"authorization",
+	"apikey",
+	"api_key",
+	"x-api-key",
+	"access_token",
+	"accesstoken",
+	"bearer",
+	"secret",
+	"password",
+	"credential",
+]
+
+/**
+ * Patterns that indicate a field is NOT a secret (allowlist).
+ * These are checked before secret patterns to prevent false positives.
+ */
+const NON_SECRET_PATTERNS = ["inputtokens", "outputtokens", "cachetokens", "reasoningtokens", "totaltokens"]
+
+/**
+ * Check if a key name looks like a secret field.
+ */
+function isSecretKey(key: string): boolean {
+	const lowerKey = key.toLowerCase()
+	// Check allowlist first to avoid false positives
+	if (NON_SECRET_PATTERNS.some((pattern) => lowerKey.includes(pattern))) {
+		return false
+	}
+	return SECRET_PATTERNS.some((pattern) => lowerKey.includes(pattern))
+}
+
+/**
+ * Truncate a string if it exceeds the maximum length.
+ * Also replaces base64 image data with a placeholder.
+ */
+function sanitizeString(str: string): string {
+	// Check for base64 image data URLs first
+	if (BASE64_IMAGE_PATTERN.test(str)) {
+		return `[ImageData len=${str.length}]`
+	}
+
+	// Truncate long strings
+	if (str.length > PAYLOAD_LIMITS.MAX_STRING_LENGTH) {
+		return `[Truncated len=${str.length}]`
+	}
+
+	return str
+}
+
+/**
+ * Recursively sanitize and redact secrets from an object.
+ * Applies size limiting to prevent Output Channel from freezing:
+ * - Strings longer than MAX_STRING_LENGTH are truncated
+ * - Arrays longer than MAX_ARRAY_LENGTH are capped
+ * - Objects with more than MAX_OBJECT_KEYS are capped
+ * - Base64 image data URLs are replaced with placeholders
+ * - Secret fields are redacted
+ * Returns a sanitized copy of the object.
+ */
+function sanitizePayload(obj: unknown, visited = new WeakSet<object>()): unknown {
+	if (obj === null || obj === undefined) {
+		return obj
+	}
+
+	// Handle strings
+	if (typeof obj === "string") {
+		return sanitizeString(obj)
+	}
+
+	// Handle other primitives
+	if (typeof obj !== "object") {
+		return obj
+	}
+
+	// Prevent infinite recursion on circular references
+	if (visited.has(obj as object)) {
+		return "[Circular Reference]"
+	}
+	visited.add(obj as object)
+
+	// Handle arrays with length limiting
+	if (Array.isArray(obj)) {
+		const maxLen = PAYLOAD_LIMITS.MAX_ARRAY_LENGTH
+		if (obj.length > maxLen) {
+			const truncated = obj.slice(0, maxLen).map((item) => sanitizePayload(item, visited))
+			truncated.push(`[...${obj.length - maxLen} more items]`)
+			return truncated
+		}
+		return obj.map((item) => sanitizePayload(item, visited))
+	}
+
+	// Handle objects with key limiting
+	const entries = Object.entries(obj as Record<string, unknown>)
+	const maxKeys = PAYLOAD_LIMITS.MAX_OBJECT_KEYS
+	const result: Record<string, unknown> = {}
+	let keyCount = 0
+
+	for (const [key, value] of entries) {
+		if (keyCount >= maxKeys) {
+			result["[...]"] = `${entries.length - maxKeys} more keys omitted`
+			break
+		}
+
+		if (isSecretKey(key)) {
+			result[key] = "[REDACTED]"
+		} else if (typeof value === "string") {
+			result[key] = sanitizeString(value)
+		} else if (typeof value === "object" && value !== null) {
+			result[key] = sanitizePayload(value, visited)
+		} else {
+			result[key] = value
+		}
+
+		keyCount++
+	}
+
+	return result
+}
+
+/**
+ * Generate a unique request ID.
+ */
+function generateRequestId(): string {
+	return `req_${Date.now()}_${Math.random().toString(36).slice(2, 11)}`
+}
+
+/**
+ * Singleton logger class for API inference logging.
+ */
+class ApiInferenceLoggerSingleton {
+	private enabled = false
+	private sink: ((...args: unknown[]) => void) | null = null
+
+	/**
+	 * Configure the logger with enabled state and output sink.
+	 * Should be called once during extension activation.
+	 */
+	configure(config: ApiInferenceLoggerConfig): void {
+		this.enabled = config.enabled
+		this.sink = config.enabled ? config.sink : null
+	}
+
+	/**
+	 * Check if logging is currently enabled.
+	 */
+	isEnabled(): boolean {
+		return this.enabled && this.sink !== null
+	}
+
+	/**
+	 * Start logging an API inference request.
+	 * Returns a handle to log the response or error.
+	 *
+	 * @param context - Context information about the request
+	 * @param requestPayload - The request payload to log
+	 * @returns A handle with success() and error() methods
+	 */
+	start(context: ApiInferenceContext, requestPayload: unknown): ApiInferenceHandle {
+		const requestId = context.requestId ?? generateRequestId()
+		const startTime = Date.now()
+		const startTimestamp = new Date().toISOString()
+
+		// Log the request
+		if (this.isEnabled()) {
+			this.logRequest({
+				...context,
+				requestId,
+				timestamp: startTimestamp,
+				payload: requestPayload,
+			})
+		}
+
+		return {
+			success: (responsePayload: unknown) => {
+				if (this.isEnabled()) {
+					const endTime = Date.now()
+					this.logResponse({
+						...context,
+						requestId,
+						timestamp: new Date().toISOString(),
+						durationMs: endTime - startTime,
+						payload: responsePayload,
+					})
+				}
+			},
+			error: (errorPayload: unknown) => {
+				if (this.isEnabled()) {
+					const endTime = Date.now()
+					this.logError({
+						...context,
+						requestId,
+						timestamp: new Date().toISOString(),
+						durationMs: endTime - startTime,
+						error: errorPayload,
+					})
+				}
+			},
+		}
+	}
+
+	/**
+	 * Log a request with stable tag format.
+	 */
+	private logRequest(data: {
+		provider: string
+		operation: string
+		model?: string
+		taskId?: string
+		requestId: string
+		timestamp: string
+		payload: unknown
+	}): void {
+		if (!this.sink) return
+
+		try {
+			this.sink("[API][request]", {
+				provider: data.provider,
+				operation: data.operation,
+				model: data.model,
+				taskId: data.taskId,
+				requestId: data.requestId,
+				timestamp: data.timestamp,
+				payload: sanitizePayload(data.payload),
+			})
+		} catch {
+			// Silently ignore logging errors to avoid breaking the application
+		}
+	}
+
+	/**
+	 * Log a successful response with stable tag format.
+	 */
+	private logResponse(data: {
+		provider: string
+		operation: string
+		model?: string
+		taskId?: string
+		requestId: string
+		timestamp: string
+		durationMs: number
+		payload: unknown
+	}): void {
+		if (!this.sink) return
+
+		try {
+			this.sink("[API][response]", {
+				provider: data.provider,
+				operation: data.operation,
+				model: data.model,
+				taskId: data.taskId,
+				requestId: data.requestId,
+				timestamp: data.timestamp,
+				durationMs: data.durationMs,
+				payload: sanitizePayload(data.payload),
+			})
+		} catch {
+			// Silently ignore logging errors to avoid breaking the application
+		}
+	}
+
+	/**
+	 * Log an error response with stable tag format.
+	 */
+	private logError(data: {
+		provider: string
+		operation: string
+		model?: string
+		taskId?: string
+		requestId: string
+		timestamp: string
+		durationMs: number
+		error: unknown
+	}): void {
+		if (!this.sink) return
+
+		try {
+			// Handle Error objects specially
+			let errorData: unknown
+			if (data.error instanceof Error) {
+				errorData = {
+					name: data.error.name,
+					message: data.error.message,
+					stack: data.error.stack,
+				}
+			} else {
+				errorData = sanitizePayload(data.error)
+			}
+
+			this.sink("[API][error]", {
+				provider: data.provider,
+				operation: data.operation,
+				model: data.model,
+				taskId: data.taskId,
+				requestId: data.requestId,
+				timestamp: data.timestamp,
+				durationMs: data.durationMs,
+				error: errorData,
+			})
+		} catch {
+			// Silently ignore logging errors to avoid breaking the application
+		}
+	}
+}
+
+/**
+ * Singleton instance of the API inference logger.
+ */
+export const ApiInferenceLogger = new ApiInferenceLoggerSingleton()
diff --git a/src/api/logging/__tests__/ApiInferenceLogger.spec.ts b/src/api/logging/__tests__/ApiInferenceLogger.spec.ts
new file mode 100644
index 00000000000..94d65d9630d
--- /dev/null
+++ b/src/api/logging/__tests__/ApiInferenceLogger.spec.ts
@@ -0,0 +1,519 @@
+import { ApiInferenceLogger } from "../ApiInferenceLogger"
+
+describe("ApiInferenceLogger", () => {
+	let mockSink: ReturnType<typeof vi.fn>
+
+	beforeEach(() => {
+		mockSink = vi.fn()
+		// Reset the logger to disabled state before each test
+		ApiInferenceLogger.configure({ enabled: false, sink: () => {} })
+	})
+
+	describe("configure", () => {
+		it("should enable logging when configured with enabled=true", () => {
+			ApiInferenceLogger.configure({ enabled: true, sink: mockSink })
+			expect(ApiInferenceLogger.isEnabled()).toBe(true)
+		})
+
+		it("should disable logging when configured with enabled=false", () => {
+			ApiInferenceLogger.configure({ enabled: false, sink: mockSink })
+			expect(ApiInferenceLogger.isEnabled()).toBe(false)
+		})
+
+		it("should not log when disabled", () => {
+			ApiInferenceLogger.configure({ enabled: false, sink: mockSink })
+			const handle = ApiInferenceLogger.start(
+				{ provider: "test", operation: "createMessage" },
+				{ test: "payload" },
+			)
+			handle.success({ response: "data" })
+			expect(mockSink).not.toHaveBeenCalled()
+		})
+	})
+
+	describe("start", () => {
+		beforeEach(() => {
+			ApiInferenceLogger.configure({ enabled: true, sink: mockSink })
+		})
+
+		it("should emit a request log with [API][request] tag", () => {
+			ApiInferenceLogger.start({ provider: "OpenAI", operation: "createMessage" }, { model: "gpt-4" })
+
+			expect(mockSink).toHaveBeenCalledTimes(1)
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][request]",
+				expect.objectContaining({
+					provider: "OpenAI",
+					operation: "createMessage",
+					payload: expect.objectContaining({ model: "gpt-4" }),
+				}),
+			)
+		})
+
+		it("should include context fields in the log", () => {
+			ApiInferenceLogger.start(
+				{
+					provider: "Anthropic",
+					operation: "createMessage",
+					model: "claude-3",
+					taskId: "task-123",
+					requestId: "req-456",
+				},
+				{ test: "data" },
+			)
+
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][request]",
+				expect.objectContaining({
+					provider: "Anthropic",
+					operation: "createMessage",
+					model: "claude-3",
+					taskId: "task-123",
+					requestId: "req-456",
+				}),
+			)
+		})
+
+		it("should generate a requestId if not provided", () => {
+			ApiInferenceLogger.start({ provider: "test", operation: "test" }, {})
+
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][request]",
+				expect.objectContaining({
+					requestId: expect.stringMatching(/^req_\d+_[a-z0-9]+$/),
+				}),
+			)
+		})
+	})
+
+	describe("success", () => {
+		beforeEach(() => {
+			ApiInferenceLogger.configure({ enabled: true, sink: mockSink })
+		})
+
+		it("should emit a response log with [API][response] tag", () => {
+			const handle = ApiInferenceLogger.start({ provider: "OpenAI", operation: "createMessage" }, {})
+			mockSink.mockClear()
+
+			handle.success({ text: "Hello world", usage: { inputTokens: 10, outputTokens: 20 } })
+
+			expect(mockSink).toHaveBeenCalledTimes(1)
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][response]",
+				expect.objectContaining({
+					provider: "OpenAI",
+					operation: "createMessage",
+					payload: expect.objectContaining({
+						text: "Hello world",
+						usage: { inputTokens: 10, outputTokens: 20 },
+					}),
+				}),
+			)
+		})
+
+		it("should include duration in the response log", () => {
+			const handle = ApiInferenceLogger.start({ provider: "test", operation: "test" }, {})
+			mockSink.mockClear()
+
+			// Small delay to ensure measurable duration
+			handle.success({ response: "data" })
+
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][response]",
+				expect.objectContaining({
+					durationMs: expect.any(Number),
+				}),
+			)
+		})
+	})
+
+	describe("error", () => {
+		beforeEach(() => {
+			ApiInferenceLogger.configure({ enabled: true, sink: mockSink })
+		})
+
+		it("should emit an error log with [API][error] tag", () => {
+			const handle = ApiInferenceLogger.start({ provider: "OpenAI", operation: "createMessage" }, {})
+			mockSink.mockClear()
+
+			handle.error(new Error("API request failed"))
+
+			expect(mockSink).toHaveBeenCalledTimes(1)
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][error]",
+				expect.objectContaining({
+					provider: "OpenAI",
+					operation: "createMessage",
+					error: expect.objectContaining({
+						name: "Error",
+						message: "API request failed",
+					}),
+				}),
+			)
+		})
+
+		it("should include duration in the error log", () => {
+			const handle = ApiInferenceLogger.start({ provider: "test", operation: "test" }, {})
+			mockSink.mockClear()
+
+			handle.error({ code: 500, message: "Internal error" })
+
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][error]",
+				expect.objectContaining({
+					durationMs: expect.any(Number),
+				}),
+			)
+		})
+
+		it("should handle non-Error objects", () => {
+			const handle = ApiInferenceLogger.start({ provider: "test", operation: "test" }, {})
+			mockSink.mockClear()
+
+			handle.error({ status: 401, message: "Unauthorized" })
+
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][error]",
+				expect.objectContaining({
+					error: expect.objectContaining({
+						status: 401,
+						message: "Unauthorized",
+					}),
+				}),
+			)
+		})
+	})
+
+	describe("secret redaction", () => {
+		beforeEach(() => {
+			ApiInferenceLogger.configure({ enabled: true, sink: mockSink })
+		})
+
+		it("should redact Authorization header", () => {
+			ApiInferenceLogger.start(
+				{ provider: "test", operation: "test" },
+				{ headers: { Authorization: "Bearer sk-secret-key-12345" } },
+			)
+
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][request]",
+				expect.objectContaining({
+					payload: expect.objectContaining({
+						headers: { Authorization: "[REDACTED]" },
+					}),
+				}),
+			)
+		})
+
+		it("should redact apiKey field", () => {
+			ApiInferenceLogger.start({ provider: "test", operation: "test" }, { apiKey: "sk-secret-12345" })
+
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][request]",
+				expect.objectContaining({
+					payload: expect.objectContaining({
+						apiKey: "[REDACTED]",
+					}),
+				}),
+			)
+		})
+
+		it("should redact nested secret fields", () => {
+			ApiInferenceLogger.start(
+				{ provider: "test", operation: "test" },
+				{
+					config: {
+						auth: {
+							access_token: "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
+							api_key: "secret-api-key",
+						},
+					},
+				},
+			)
+
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][request]",
+				expect.objectContaining({
+					payload: expect.objectContaining({
+						config: {
+							auth: {
+								access_token: "[REDACTED]",
+								api_key: "[REDACTED]",
+							},
+						},
+					}),
+				}),
+			)
+		})
+
+		it("should redact secret fields in arrays", () => {
+			ApiInferenceLogger.start(
+				{ provider: "test", operation: "test" },
+				{
+					items: [{ apiKey: "secret1" }, { apiKey: "secret2" }],
+				},
+			)
+
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][request]",
+				expect.objectContaining({
+					payload: expect.objectContaining({
+						items: [{ apiKey: "[REDACTED]" }, { apiKey: "[REDACTED]" }],
+					}),
+				}),
+			)
+		})
+
+		it("should not redact non-secret fields", () => {
+			ApiInferenceLogger.start(
+				{ provider: "test", operation: "test" },
+				{ model: "gpt-4", messages: [{ role: "user", content: "Hello" }] },
+			)
+
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][request]",
+				expect.objectContaining({
+					payload: expect.objectContaining({
+						model: "gpt-4",
+						messages: [{ role: "user", content: "Hello" }],
+					}),
+				}),
+			)
+		})
+	})
+
+	describe("payload size limiting", () => {
+		beforeEach(() => {
+			ApiInferenceLogger.configure({ enabled: true, sink: mockSink })
+		})
+
+		it("should truncate strings longer than 10,000 characters", () => {
+			const longString = "x".repeat(15_000)
+			ApiInferenceLogger.start({ provider: "test", operation: "test" }, { content: longString })
+
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][request]",
+				expect.objectContaining({
+					payload: expect.objectContaining({
+						content: "[Truncated len=15000]",
+					}),
+				}),
+			)
+		})
+
+		it("should not truncate strings within the limit", () => {
+			const normalString = "x".repeat(5_000)
+			ApiInferenceLogger.start({ provider: "test", operation: "test" }, { content: normalString })
+
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][request]",
+				expect.objectContaining({
+					payload: expect.objectContaining({
+						content: normalString,
+					}),
+				}),
+			)
+		})
+
+		it("should replace base64 image data with placeholder", () => {
+			const imageData =
+				"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
+			ApiInferenceLogger.start({ provider: "test", operation: "test" }, { image: imageData })
+
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][request]",
+				expect.objectContaining({
+					payload: expect.objectContaining({
+						image: expect.stringMatching(/^\[ImageData len=\d+\]$/),
+					}),
+				}),
+			)
+		})
+
+		it("should replace base64 image data for various image types", () => {
+			ApiInferenceLogger.start(
+				{ provider: "test", operation: "test" },
+				{
+					png: "data:image/png;base64,abc123",
+					jpeg: "data:image/jpeg;base64,abc123",
+					gif: "data:image/gif;base64,abc123",
+					webp: "data:image/webp;base64,abc123",
+				},
+			)
+
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][request]",
+				expect.objectContaining({
+					payload: expect.objectContaining({
+						png: expect.stringMatching(/^\[ImageData len=\d+\]$/),
+						jpeg: expect.stringMatching(/^\[ImageData len=\d+\]$/),
+						gif: expect.stringMatching(/^\[ImageData len=\d+\]$/),
+						webp: expect.stringMatching(/^\[ImageData len=\d+\]$/),
+					}),
+				}),
+			)
+		})
+
+		it("should cap arrays longer than 200 entries", () => {
+			const longArray = Array.from({ length: 250 }, (_, i) => ({ id: i }))
+			ApiInferenceLogger.start({ provider: "test", operation: "test" }, { items: longArray })
+
+			const call = mockSink.mock.calls[0]
+			const payload = call[1].payload as { items: any[] }
+
+			expect(payload.items.length).toBe(201)
+			expect(payload.items[200]).toBe("[...50 more items]")
+		})
+
+		it("should not cap arrays within the limit", () => {
+			const normalArray = Array.from({ length: 50 }, (_, i) => ({ id: i }))
+			ApiInferenceLogger.start({ provider: "test", operation: "test" }, { items: normalArray })
+
+			const call = mockSink.mock.calls[0]
+			const payload = call[1].payload as { items: any[] }
+
+			expect(payload.items.length).toBe(50)
+		})
+
+		it("should cap objects with more than 200 keys", () => {
+			const bigObject: Record<string, number> = {}
+			for (let i = 0; i < 250; i++) {
+				bigObject[`key${i}`] = i
+			}
+			ApiInferenceLogger.start({ provider: "test", operation: "test" }, bigObject)
+
+			const call = mockSink.mock.calls[0]
+			const payload = call[1].payload as Record<string, unknown>
+
+			const keys = Object.keys(payload)
+			expect(keys.length).toBe(201)
+			expect(payload["[...]"]).toBe("50 more keys omitted")
+		})
+
+		it("should apply size limiting recursively in nested objects", () => {
+			const nested = {
+				level1: {
+					longString: "x".repeat(15_000),
+					level2: {
+						imageData: "data:image/png;base64,abc123",
+					},
+				},
+			}
+			ApiInferenceLogger.start({ provider: "test", operation: "test" }, nested)
+
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][request]",
+				expect.objectContaining({
+					payload: expect.objectContaining({
+						level1: expect.objectContaining({
+							longString: "[Truncated len=15000]",
+							level2: expect.objectContaining({
+								imageData: expect.stringMatching(/^\[ImageData len=\d+\]$/),
+							}),
+						}),
+					}),
+				}),
+			)
+		})
+
+		it("should apply size limiting recursively in arrays", () => {
+			const messages = [
+				{ role: "user", content: "x".repeat(15_000) },
+				{ role: "assistant", content: "data:image/png;base64,abc123" },
+			]
+			ApiInferenceLogger.start({ provider: "test", operation: "test" }, { messages })
+
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][request]",
+				expect.objectContaining({
+					payload: expect.objectContaining({
+						messages: [
+							expect.objectContaining({
+								role: "user",
+								content: "[Truncated len=15000]",
+							}),
+							expect.objectContaining({
+								role: "assistant",
+								content: expect.stringMatching(/^\[ImageData len=\d+\]$/),
+							}),
+						],
+					}),
+				}),
+			)
+		})
+	})
+
+	describe("edge cases", () => {
+		beforeEach(() => {
+			ApiInferenceLogger.configure({ enabled: true, sink: mockSink })
+		})
+
+		it("should handle null and undefined values", () => {
+			expect(() => {
+				ApiInferenceLogger.start({ provider: "test", operation: "test" }, { value: null, other: undefined })
+			}).not.toThrow()
+
+			expect(mockSink).toHaveBeenCalledWith(
+				"[API][request]",
+				expect.objectContaining({
+					payload: expect.objectContaining({
+						value: null,
+						other: undefined,
+					}),
+				}),
+			)
+		})
+
+		it("should handle empty objects", () => {
+			expect(() => {
+				ApiInferenceLogger.start({ provider: "test", operation: "test" }, {})
+			}).not.toThrow()
+		})
+
+		it("should not throw on circular references", () => {
+			const obj: any = { name: "test" }
+			obj.self = obj
+
+			expect(() => {
+				ApiInferenceLogger.start({ provider: "test", operation: "test" }, obj)
+			}).not.toThrow()
+		})
+
+		it("should handle primitive values in payload", () => {
+			expect(() => {
+				ApiInferenceLogger.start({ provider: "test", operation: "test" }, "string payload" as any)
+			}).not.toThrow()
+		})
+
+		it("should handle functions in payload without throwing", () => {
+			expect(() => {
+				ApiInferenceLogger.start(
+					{ provider: "test", operation: "test" },
+					{ callback: () => console.log("test") },
+				)
+			}).not.toThrow()
+		})
+
+		it("should handle BigInt values without throwing", () => {
+			expect(() => {
+				ApiInferenceLogger.start(
+					{ provider: "test", operation: "test" },
+					{ bigValue: BigInt(9007199254740991) },
+				)
+			}).not.toThrow()
+		})
+	})
+
+	describe("sink error handling", () => {
+		it("should not throw if sink throws an error", () => {
+			const throwingSink = vi.fn(() => {
+				throw new Error("Sink error")
+			})
+			ApiInferenceLogger.configure({ enabled: true, sink: throwingSink })
+
+			expect(() => {
+				ApiInferenceLogger.start({ provider: "test", operation: "test" }, {})
+			}).not.toThrow()
+		})
+	})
+})
diff --git a/src/api/providers/anthropic-vertex.ts b/src/api/providers/anthropic-vertex.ts
index cbfae08f41e..bde42796192 100644
--- a/src/api/providers/anthropic-vertex.ts
+++ b/src/api/providers/anthropic-vertex.ts
@@ -30,6 +30,7 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ".
 
 // https://docs.anthropic.com/en/api/claude-on-vertex-ai
 export class AnthropicVertexHandler extends BaseProvider implements SingleCompletionHandler {
+	protected readonly providerName = "Anthropic Vertex"
 	protected options: ApiHandlerOptions
 	private client: AnthropicVertex
 
diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts
index 4faf341d28f..f3c8a514d10 100644
--- a/src/api/providers/anthropic.ts
+++ b/src/api/providers/anthropic.ts
@@ -33,7 +33,7 @@ import {
 export class AnthropicHandler extends BaseProvider implements SingleCompletionHandler {
 	private options: ApiHandlerOptions
 	private client: Anthropic
-	private readonly providerName = "Anthropic"
+	protected readonly providerName = "Anthropic"
 
 	constructor(options: ApiHandlerOptions) {
 		super()
@@ -63,6 +63,11 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 			reasoning: thinking,
 		} = this.getModel()
 
+		// Accumulators for final response logging
+		const accumulatedText: string[] = []
+		const accumulatedReasoning: string[] = []
+		const toolCalls: Array<{ id?: string; name?: string }> = []
+
 		// Filter out non-Anthropic blocks (reasoning, thoughtSignature, etc.) before sending to the API
 		const sanitizedMessages = filterNonAnthropicBlocks(messages)
 
@@ -93,251 +98,291 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 				}
 			: {}
 
-		switch (modelId) {
-			case "claude-sonnet-4-5":
-			case "claude-sonnet-4-20250514":
-			case "claude-opus-4-5-20251101":
-			case "claude-opus-4-1-20250805":
-			case "claude-opus-4-20250514":
-			case "claude-3-7-sonnet-20250219":
-			case "claude-3-5-sonnet-20241022":
-			case "claude-3-5-haiku-20241022":
-			case "claude-3-opus-20240229":
-			case "claude-haiku-4-5-20251001":
-			case "claude-3-haiku-20240307": {
-				/**
-				 * The latest message will be the new user message, one before
-				 * will be the assistant message from a previous request, and
-				 * the user message before that will be a previously cached user
-				 * message. So we need to mark the latest user message as
-				 * ephemeral to cache it for the next request, and mark the
-				 * second to last user message as ephemeral to let the server
-				 * know the last message to retrieve from the cache for the
-				 * current request.
-				 */
-				const userMsgIndices = sanitizedMessages.reduce(
-					(acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc),
-					[] as number[],
-				)
+		// Variable to hold the log handle - will be created after request body is built
+		let logHandle: ReturnType<typeof this.inferenceLogger.start> | undefined
+
+		try {
+			switch (modelId) {
+				case "claude-sonnet-4-5":
+				case "claude-sonnet-4-20250514":
+				case "claude-opus-4-5-20251101":
+				case "claude-opus-4-1-20250805":
+				case "claude-opus-4-20250514":
+				case "claude-3-7-sonnet-20250219":
+				case "claude-3-5-sonnet-20241022":
+				case "claude-3-5-haiku-20241022":
+				case "claude-3-opus-20240229":
+				case "claude-haiku-4-5-20251001":
+				case "claude-3-haiku-20240307": {
+					/**
+					 * The latest message will be the new user message, one before
+					 * will be the assistant message from a previous request, and
+					 * the user message before that will be a previously cached user
+					 * message. So we need to mark the latest user message as
+					 * ephemeral to cache it for the next request, and mark the
+					 * second to last user message as ephemeral to let the server
+					 * know the last message to retrieve from the cache for the
+					 * current request.
+					 */
+					const userMsgIndices = sanitizedMessages.reduce(
+						(acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc),
+						[] as number[],
+					)
 
-				const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
-				const secondLastMsgUserIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1
+					const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
+					const secondLastMsgUserIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1
 
-				try {
-					stream = await this.client.messages.create(
+					// Build the request body for logging and API call
+					const requestBody = {
+						model: modelId,
+						max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
+						temperature,
+						thinking,
+						// Setting cache breakpoint for system prompt so new tasks can reuse it.
+						system: [{ text: systemPrompt, type: "text" as const, cache_control: cacheControl }],
+						messages: sanitizedMessages.map((message, index) => {
+							if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) {
+								return {
+									...message,
+									content:
+										typeof message.content === "string"
+											? [
+													{
+														type: "text" as const,
+														text: message.content,
+														cache_control: cacheControl,
+													},
+												]
+											: message.content.map((content, contentIndex) =>
+													contentIndex === message.content.length - 1
+														? { ...content, cache_control: cacheControl }
+														: content,
+												),
+								}
+							}
+							return message
+						}),
+						stream: true as const,
+						...nativeToolParams,
+					}
+
+					// Start inference logging with actual request body
+					logHandle = this.inferenceLogger.start(
 						{
+							provider: this.providerName,
+							operation: "createMessage",
 							model: modelId,
-							max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
-							temperature,
-							thinking,
-							// Setting cache breakpoint for system prompt so new tasks can reuse it.
-							system: [{ text: systemPrompt, type: "text", cache_control: cacheControl }],
-							messages: sanitizedMessages.map((message, index) => {
-								if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) {
-									return {
-										...message,
-										content:
-											typeof message.content === "string"
-												? [{ type: "text", text: message.content, cache_control: cacheControl }]
-												: message.content.map((content, contentIndex) =>
-														contentIndex === message.content.length - 1
-															? { ...content, cache_control: cacheControl }
-															: content,
-													),
-									}
-								}
-								return message
-							}),
-							stream: true,
-							...nativeToolParams,
 						},
-						(() => {
-							// prompt caching: https://x.com/alexalbert__/status/1823751995901272068
-							// https://github.com/anthropics/anthropic-sdk-typescript?tab=readme-ov-file#default-headers
-							// https://github.com/anthropics/anthropic-sdk-typescript/commit/c920b77fc67bd839bfeb6716ceab9d7c9bbe7393
-
-							// Then check for models that support prompt caching
-							switch (modelId) {
-								case "claude-sonnet-4-5":
-								case "claude-sonnet-4-20250514":
-								case "claude-opus-4-5-20251101":
-								case "claude-opus-4-1-20250805":
-								case "claude-opus-4-20250514":
-								case "claude-3-7-sonnet-20250219":
-								case "claude-3-5-sonnet-20241022":
-								case "claude-3-5-haiku-20241022":
-								case "claude-3-opus-20240229":
-								case "claude-haiku-4-5-20251001":
-								case "claude-3-haiku-20240307":
-									betas.push("prompt-caching-2024-07-31")
-									return { headers: { "anthropic-beta": betas.join(",") } }
-								default:
-									return undefined
-							}
-						})(),
+						requestBody,
 					)
-				} catch (error) {
-					TelemetryService.instance.captureException(
-						new ApiProviderError(
-							error instanceof Error ? error.message : String(error),
-							this.providerName,
-							modelId,
-							"createMessage",
-						),
-					)
-					throw error
+
+					try {
+						// Determine request options (beta headers)
+						betas.push("prompt-caching-2024-07-31")
+						const requestOptions = { headers: { "anthropic-beta": betas.join(",") } }
+
+						stream = await this.client.messages.create(requestBody, requestOptions)
+					} catch (error) {
+						TelemetryService.instance.captureException(
+							new ApiProviderError(
+								error instanceof Error ? error.message : String(error),
+								this.providerName,
+								modelId,
+								"createMessage",
+							),
+						)
+						throw error
+					}
+					break
 				}
-				break
-			}
-			default: {
-				try {
-					stream = (await this.client.messages.create({
+				default: {
+					// Build the request body for logging and API call
+					const requestBody = {
 						model: modelId,
 						max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
 						temperature,
-						system: [{ text: systemPrompt, type: "text" }],
+						system: [{ text: systemPrompt, type: "text" as const }],
 						messages: sanitizedMessages,
-						stream: true,
+						stream: true as const,
 						...nativeToolParams,
-					})) as any
-				} catch (error) {
-					TelemetryService.instance.captureException(
-						new ApiProviderError(
-							error instanceof Error ? error.message : String(error),
-							this.providerName,
-							modelId,
-							"createMessage",
-						),
+					}
+
+					// Start inference logging with actual request body
+					logHandle = this.inferenceLogger.start(
+						{
+							provider: this.providerName,
+							operation: "createMessage",
+							model: modelId,
+						},
+						requestBody,
 					)
-					throw error
+
+					try {
+						stream = (await this.client.messages.create(requestBody)) as any
+					} catch (error) {
+						TelemetryService.instance.captureException(
+							new ApiProviderError(
+								error instanceof Error ? error.message : String(error),
+								this.providerName,
+								modelId,
+								"createMessage",
+							),
+						)
+						throw error
+					}
+					break
 				}
-				break
 			}
-		}
 
-		let inputTokens = 0
-		let outputTokens = 0
-		let cacheWriteTokens = 0
-		let cacheReadTokens = 0
-
-		for await (const chunk of stream) {
-			switch (chunk.type) {
-				case "message_start": {
-					// Tells us cache reads/writes/input/output.
-					const {
-						input_tokens = 0,
-						output_tokens = 0,
-						cache_creation_input_tokens,
-						cache_read_input_tokens,
-					} = chunk.message.usage
-
-					yield {
-						type: "usage",
-						inputTokens: input_tokens,
-						outputTokens: output_tokens,
-						cacheWriteTokens: cache_creation_input_tokens || undefined,
-						cacheReadTokens: cache_read_input_tokens || undefined,
-					}
+			let inputTokens = 0
+			let outputTokens = 0
+			let cacheWriteTokens = 0
+			let cacheReadTokens = 0
+
+			for await (const chunk of stream) {
+				switch (chunk.type) {
+					case "message_start": {
+						// Tells us cache reads/writes/input/output.
+						const {
+							input_tokens = 0,
+							output_tokens = 0,
+							cache_creation_input_tokens,
+							cache_read_input_tokens,
+						} = chunk.message.usage
+
+						yield {
+							type: "usage",
+							inputTokens: input_tokens,
+							outputTokens: output_tokens,
+							cacheWriteTokens: cache_creation_input_tokens || undefined,
+							cacheReadTokens: cache_read_input_tokens || undefined,
+						}
 
-					inputTokens += input_tokens
-					outputTokens += output_tokens
-					cacheWriteTokens += cache_creation_input_tokens || 0
-					cacheReadTokens += cache_read_input_tokens || 0
+						inputTokens += input_tokens
+						outputTokens += output_tokens
+						cacheWriteTokens += cache_creation_input_tokens || 0
+						cacheReadTokens += cache_read_input_tokens || 0
 
-					break
-				}
-				case "message_delta":
-					// Tells us stop_reason, stop_sequence, and output tokens
-					// along the way and at the end of the message.
-					yield {
-						type: "usage",
-						inputTokens: 0,
-						outputTokens: chunk.usage.output_tokens || 0,
+						break
 					}
+					case "message_delta":
+						// Tells us stop_reason, stop_sequence, and output tokens
+						// along the way and at the end of the message.
+						yield {
+							type: "usage",
+							inputTokens: 0,
+							outputTokens: chunk.usage.output_tokens || 0,
+						}
 
-					break
-				case "message_stop":
-					// No usage data, just an indicator that the message is done.
-					break
-				case "content_block_start":
-					switch (chunk.content_block.type) {
-						case "thinking":
-							// We may receive multiple text blocks, in which
-							// case just insert a line break between them.
-							if (chunk.index > 0) {
-								yield { type: "reasoning", text: "\n" }
-							}
+						break
+					case "message_stop":
+						// No usage data, just an indicator that the message is done.
+						break
+					case "content_block_start":
+						switch (chunk.content_block.type) {
+							case "thinking":
+								// We may receive multiple text blocks, in which
+								// case just insert a line break between them.
+								if (chunk.index > 0) {
+									accumulatedReasoning.push("\n")
+									yield { type: "reasoning", text: "\n" }
+								}
 
-							yield { type: "reasoning", text: chunk.content_block.thinking }
-							break
-						case "text":
-							// We may receive multiple text blocks, in which
-							// case just insert a line break between them.
-							if (chunk.index > 0) {
-								yield { type: "text", text: "\n" }
-							}
+								accumulatedReasoning.push(chunk.content_block.thinking)
+								yield { type: "reasoning", text: chunk.content_block.thinking }
+								break
+							case "text":
+								// We may receive multiple text blocks, in which
+								// case just insert a line break between them.
+								if (chunk.index > 0) {
+									accumulatedText.push("\n")
+									yield { type: "text", text: "\n" }
+								}
 
-							yield { type: "text", text: chunk.content_block.text }
-							break
-						case "tool_use": {
-							// Emit initial tool call partial with id and name
-							yield {
-								type: "tool_call_partial",
-								index: chunk.index,
-								id: chunk.content_block.id,
-								name: chunk.content_block.name,
-								arguments: undefined,
+								accumulatedText.push(chunk.content_block.text)
+								yield { type: "text", text: chunk.content_block.text }
+								break
+							case "tool_use": {
+								// Track tool call for logging
+								toolCalls.push({
+									id: chunk.content_block.id,
+									name: chunk.content_block.name,
+								})
+								// Emit initial tool call partial with id and name
+								yield {
+									type: "tool_call_partial",
+									index: chunk.index,
+									id: chunk.content_block.id,
+									name: chunk.content_block.name,
+									arguments: undefined,
+								}
+								break
 							}
-							break
 						}
-					}
-					break
-				case "content_block_delta":
-					switch (chunk.delta.type) {
-						case "thinking_delta":
-							yield { type: "reasoning", text: chunk.delta.thinking }
-							break
-						case "text_delta":
-							yield { type: "text", text: chunk.delta.text }
-							break
-						case "input_json_delta": {
-							// Emit tool call partial chunks as arguments stream in
-							yield {
-								type: "tool_call_partial",
-								index: chunk.index,
-								id: undefined,
-								name: undefined,
-								arguments: chunk.delta.partial_json,
+						break
+					case "content_block_delta":
+						switch (chunk.delta.type) {
+							case "thinking_delta":
+								accumulatedReasoning.push(chunk.delta.thinking)
+								yield { type: "reasoning", text: chunk.delta.thinking }
+								break
+							case "text_delta":
+								accumulatedText.push(chunk.delta.text)
+								yield { type: "text", text: chunk.delta.text }
+								break
+							case "input_json_delta": {
+								// Emit tool call partial chunks as arguments stream in
+								yield {
+									type: "tool_call_partial",
+									index: chunk.index,
+									id: undefined,
+									name: undefined,
+									arguments: chunk.delta.partial_json,
+								}
+								break
 							}
-							break
 						}
-					}
 
-					break
-				case "content_block_stop":
-					// Block complete - no action needed for now.
-					// NativeToolCallParser handles tool call completion
-					// Note: Signature for multi-turn thinking would require using stream.finalMessage()
-					// after iteration completes, which requires restructuring the streaming approach.
-					break
+						break
+					case "content_block_stop":
+						// Block complete - no action needed for now.
+						// NativeToolCallParser handles tool call completion
+						// Note: Signature for multi-turn thinking would require using stream.finalMessage()
+						// after iteration completes, which requires restructuring the streaming approach.
+						break
+				}
 			}
-		}
 
-		if (inputTokens > 0 || outputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0) {
-			const { totalCost } = calculateApiCostAnthropic(
-				this.getModel().info,
-				inputTokens,
-				outputTokens,
-				cacheWriteTokens,
-				cacheReadTokens,
-			)
+			if (inputTokens > 0 || outputTokens > 0 || cacheWriteTokens > 0 || cacheReadTokens > 0) {
+				const { totalCost } = calculateApiCostAnthropic(
+					this.getModel().info,
+					inputTokens,
+					outputTokens,
+					cacheWriteTokens,
+					cacheReadTokens,
+				)
 
-			yield {
-				type: "usage",
-				inputTokens: 0,
-				outputTokens: 0,
-				totalCost,
+				yield {
+					type: "usage",
+					inputTokens: 0,
+					outputTokens: 0,
+					totalCost,
+				}
 			}
+
+			// Log successful response
+			logHandle.success({
+				text: accumulatedText.join(""),
+				reasoning: accumulatedReasoning.length > 0 ? accumulatedReasoning.join("") : undefined,
+				toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+				usage: { inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens },
+			})
+		} catch (error) {
+			// logHandle may not be assigned if error occurs before request body is built
+			if (logHandle) {
+				logHandle.error(error)
+			}
+			throw error
 		}
 	}
 
diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts
index 92b9558c451..579aa80d866 100644
--- a/src/api/providers/base-openai-compatible-provider.ts
+++ b/src/api/providers/base-openai-compatible-provider.ts
@@ -117,73 +117,153 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
-		const stream = await this.createStream(systemPrompt, messages, metadata)
-
-		const matcher = new XmlMatcher(
-			"think",
-			(chunk) =>
-				({
-					type: chunk.matched ? "reasoning" : "text",
-					text: chunk.data,
-				}) as const,
+		const { id: model, info } = this.getModel()
+
+		// Build the actual params object that will be passed to the SDK
+		const max_tokens =
+			getModelMaxOutputTokens({
+				modelId: model,
+				model: info,
+				settings: this.options,
+				format: "openai",
+			}) ?? undefined
+
+		const temperature = this.options.modelTemperature ?? this.defaultTemperature
+
+		const requestParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
+			model,
+			max_tokens,
+			temperature,
+			messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
+			stream: true,
+			stream_options: { include_usage: true },
+			...(metadata?.tools && { tools: this.convertToolsForOpenAI(metadata.tools) }),
+			...(metadata?.tool_choice && { tool_choice: metadata.tool_choice }),
+			...(metadata?.toolProtocol === "native" && {
+				parallel_tool_calls: metadata.parallelToolCalls ?? false,
+			}),
+		}
+
+		// Add thinking parameter if reasoning is enabled and model supports it
+		if (this.options.enableReasoningEffort && info.supportsReasoningBinary) {
+			;(requestParams as any).thinking = { type: "enabled" }
+		}
+
+		// Start inference logging with the actual request params
+		const logHandle = this.inferenceLogger.start(
+			{
+				provider: this.providerName,
+				operation: "createMessage",
+				model,
+			},
+			requestParams,
 		)
 
+		// Accumulators for final response logging
+		const accumulatedText: string[] = []
+		const accumulatedReasoning: string[] = []
+		const toolCalls: Array<{ id?: string; name?: string; arguments?: string }> = []
 		let lastUsage: OpenAI.CompletionUsage | undefined
 
-		for await (const chunk of stream) {
-			// Check for provider-specific error responses (e.g., MiniMax base_resp)
-			const chunkAny = chunk as any
-			if (chunkAny.base_resp?.status_code && chunkAny.base_resp.status_code !== 0) {
-				throw new Error(
-					`${this.providerName} API Error (${chunkAny.base_resp.status_code}): ${chunkAny.base_resp.status_msg || "Unknown error"}`,
-				)
-			}
+		try {
+			const stream = await this.createStream(systemPrompt, messages, metadata)
+
+			const matcher = new XmlMatcher(
+				"think",
+				(chunk) =>
+					({
+						type: chunk.matched ? "reasoning" : "text",
+						text: chunk.data,
+					}) as const,
+			)
+
+			for await (const chunk of stream) {
+				// Check for provider-specific error responses (e.g., MiniMax base_resp)
+				const chunkAny = chunk as any
+				if (chunkAny.base_resp?.status_code && chunkAny.base_resp.status_code !== 0) {
+					const error = new Error(
+						`${this.providerName} API Error (${chunkAny.base_resp.status_code}): ${chunkAny.base_resp.status_msg || "Unknown error"}`,
+					)
+					logHandle.error(error)
+					throw error
+				}
 
-			const delta = chunk.choices?.[0]?.delta
+				const delta = chunk.choices?.[0]?.delta
 
-			if (delta?.content) {
-				for (const processedChunk of matcher.update(delta.content)) {
-					yield processedChunk
+				if (delta?.content) {
+					for (const processedChunk of matcher.update(delta.content)) {
+						if (processedChunk.type === "text") {
+							accumulatedText.push(processedChunk.text)
+						} else if (processedChunk.type === "reasoning") {
+							accumulatedReasoning.push(processedChunk.text)
+						}
+						yield processedChunk
+					}
 				}
-			}
 
-			if (delta) {
-				for (const key of ["reasoning_content", "reasoning"] as const) {
-					if (key in delta) {
-						const reasoning_content = ((delta as any)[key] as string | undefined) || ""
-						if (reasoning_content?.trim()) {
-							yield { type: "reasoning", text: reasoning_content }
+				if (delta) {
+					for (const key of ["reasoning_content", "reasoning"] as const) {
+						if (key in delta) {
+							const reasoning_content = ((delta as any)[key] as string | undefined) || ""
+							if (reasoning_content?.trim()) {
+								accumulatedReasoning.push(reasoning_content)
+								yield { type: "reasoning", text: reasoning_content }
+							}
+							break
 						}
-						break
 					}
 				}
-			}
 
-			// Emit raw tool call chunks - NativeToolCallParser handles state management
-			if (delta?.tool_calls) {
-				for (const toolCall of delta.tool_calls) {
-					yield {
-						type: "tool_call_partial",
-						index: toolCall.index,
-						id: toolCall.id,
-						name: toolCall.function?.name,
-						arguments: toolCall.function?.arguments,
+				// Emit raw tool call chunks - NativeToolCallParser handles state management
+				if (delta?.tool_calls) {
+					for (const toolCall of delta.tool_calls) {
+						// Track tool calls for logging
+						if (toolCall.id || toolCall.function?.name) {
+							toolCalls.push({
+								id: toolCall.id,
+								name: toolCall.function?.name,
+								arguments: toolCall.function?.arguments,
+							})
+						}
+						yield {
+							type: "tool_call_partial",
+							index: toolCall.index,
+							id: toolCall.id,
+							name: toolCall.function?.name,
+							arguments: toolCall.function?.arguments,
+						}
 					}
 				}
+
+				if (chunk.usage) {
+					lastUsage = chunk.usage
+				}
 			}
 
-			if (chunk.usage) {
-				lastUsage = chunk.usage
+			if (lastUsage) {
+				yield this.processUsageMetrics(lastUsage, this.getModel().info)
 			}
-		}
 
-		if (lastUsage) {
-			yield this.processUsageMetrics(lastUsage, this.getModel().info)
-		}
+			// Process any remaining content
+			for (const processedChunk of matcher.final()) {
+				if (processedChunk.type === "text") {
+					accumulatedText.push(processedChunk.text)
+				} else if (processedChunk.type === "reasoning") {
+					accumulatedReasoning.push(processedChunk.text)
+				}
+				yield processedChunk
+			}
 
-		// Process any remaining content
-		for (const processedChunk of matcher.final()) {
-			yield processedChunk
+			// Log successful response
+			logHandle.success({
+				text: accumulatedText.join(""),
+				reasoning: accumulatedReasoning.length > 0 ? accumulatedReasoning.join("") : undefined,
+				toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+				usage: lastUsage,
+			})
+		} catch (error) {
+			logHandle.error(error)
+			throw error
 		}
 	}
 
diff --git a/src/api/providers/base-provider.ts b/src/api/providers/base-provider.ts
index 64d99b3f0c1..80599e2c80b 100644
--- a/src/api/providers/base-provider.ts
+++ b/src/api/providers/base-provider.ts
@@ -6,11 +6,23 @@ import type { ApiHandler, ApiHandlerCreateMessageMetadata } from "../index"
 import { ApiStream } from "../transform/stream"
 import { countTokens } from "../../utils/countTokens"
 import { isMcpTool } from "../../utils/mcp-name"
+import { ApiInferenceLogger } from "../logging/ApiInferenceLogger"
 
 /**
  * Base class for API providers that implements common functionality.
  */
 export abstract class BaseProvider implements ApiHandler {
+	/**
+	 * The name of this provider, used for logging and error reporting.
+	 */
+	protected abstract readonly providerName: string
+
+	/**
+	 * Reference to the API inference logger singleton for logging requests/responses.
+	 * Providers can use this to log inference calls when enabled.
+	 */
+	protected readonly inferenceLogger = ApiInferenceLogger
+
 	abstract createMessage(
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts
index 761500750d0..9a24bcb044a 100644
--- a/src/api/providers/bedrock.ts
+++ b/src/api/providers/bedrock.ts
@@ -200,7 +200,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 	protected options: ProviderSettings
 	private client: BedrockRuntimeClient
 	private arnInfo: any
-	private readonly providerName = "Bedrock"
+	protected readonly providerName = "Bedrock"
 
 	constructor(options: ProviderSettings) {
 		super()
diff --git a/src/api/providers/cerebras.ts b/src/api/providers/cerebras.ts
index 99e7c4cc3d4..0bbba28398b 100644
--- a/src/api/providers/cerebras.ts
+++ b/src/api/providers/cerebras.ts
@@ -20,6 +20,7 @@ const CEREBRAS_INTEGRATION_HEADER = "X-Cerebras-3rd-Party-Integration"
 const CEREBRAS_INTEGRATION_NAME = "roocode"
 
 export class CerebrasHandler extends BaseProvider implements SingleCompletionHandler {
+	protected readonly providerName = "Cerebras"
 	private apiKey: string
 	private providerModels: typeof cerebrasModels
 	private defaultProviderModelId: CerebrasModelId
diff --git a/src/api/providers/claude-code.ts b/src/api/providers/claude-code.ts
index cdd1cb3beb7..5baf7a3864b 100644
--- a/src/api/providers/claude-code.ts
+++ b/src/api/providers/claude-code.ts
@@ -20,6 +20,7 @@ import { t } from "../../i18n"
 import { ApiHandlerOptions } from "../../shared/api"
 import { countTokens } from "../../utils/countTokens"
 import { convertOpenAIToolsToAnthropic } from "../../core/prompts/tools/native-tools/converters"
+import { BaseProvider } from "./base-provider"
 
 /**
  * Converts OpenAI tool_choice to Anthropic ToolChoice format
@@ -64,7 +65,8 @@ function convertOpenAIToolChoice(
 	return { type: "auto", disable_parallel_tool_use: disableParallelToolUse }
 }
 
-export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler {
+export class ClaudeCodeHandler extends BaseProvider implements ApiHandler, SingleCompletionHandler {
+	protected readonly providerName = "Claude Code"
 	private options: ApiHandlerOptions
 	/**
 	 * Store the last thinking block signature for interleaved thinking with tool use.
@@ -75,6 +77,7 @@ export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler {
 	private lastThinkingSignature?: string
 
 	constructor(options: ApiHandlerOptions) {
+		super()
 		this.options = options
 	}
 
@@ -114,7 +117,7 @@ export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler {
 		return null
 	}
 
-	async *createMessage(
+	override async *createMessage(
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
@@ -182,9 +185,8 @@ export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler {
 			thinking = { type: "disabled" }
 		}
 
-		// Create streaming request using OAuth
-		const stream = createStreamingMessage({
-			accessToken,
+		// Build request params for logging
+		const requestParams = {
 			model: modelId,
 			systemPrompt,
 			messages,
@@ -195,79 +197,122 @@ export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler {
 			metadata: {
 				user_id: userId,
 			},
-		})
-
-		// Track usage for cost calculation
-		let inputTokens = 0
-		let outputTokens = 0
-		let cacheReadTokens = 0
-		let cacheWriteTokens = 0
-
-		for await (const chunk of stream) {
-			switch (chunk.type) {
-				case "text":
-					yield {
-						type: "text",
-						text: chunk.text,
-					}
-					break
-
-				case "reasoning":
-					yield {
-						type: "reasoning",
-						text: chunk.text,
-					}
-					break
-
-				case "thinking_complete":
-					// Capture the signature for persistence in api_conversation_history
-					// This enables tool use continuations where thinking blocks must be passed back
-					if (chunk.signature) {
-						this.lastThinkingSignature = chunk.signature
-					}
-					// Emit a complete thinking block with signature
-					// This is critical for interleaved thinking with tool use
-					// The signature must be included when passing thinking blocks back to the API
-					yield {
-						type: "reasoning",
-						text: chunk.thinking,
-						signature: chunk.signature,
-					}
-					break
-
-				case "tool_call_partial":
-					yield {
-						type: "tool_call_partial",
-						index: chunk.index,
-						id: chunk.id,
-						name: chunk.name,
-						arguments: chunk.arguments,
-					}
-					break
+		}
 
-				case "usage": {
-					inputTokens = chunk.inputTokens
-					outputTokens = chunk.outputTokens
-					cacheReadTokens = chunk.cacheReadTokens || 0
-					cacheWriteTokens = chunk.cacheWriteTokens || 0
-
-					// Claude Code is subscription-based, no per-token cost
-					const usageChunk: ApiStreamUsageChunk = {
-						type: "usage",
-						inputTokens,
-						outputTokens,
-						cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined,
-						cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined,
-						totalCost: 0,
+		// Start inference logging with request params
+		const logHandle = this.inferenceLogger.start(
+			{
+				provider: this.providerName,
+				operation: "createMessage",
+				model: modelId,
+			},
+			requestParams,
+		)
+
+		// Accumulators for response logging
+		const accumulatedText: string[] = []
+		const accumulatedReasoning: string[] = []
+		const toolCalls: Array<{ id?: string; name?: string }> = []
+
+		try {
+			// Create streaming request using OAuth
+			const stream = createStreamingMessage({
+				accessToken,
+				...requestParams,
+			})
+
+			// Track usage for cost calculation
+			let inputTokens = 0
+			let outputTokens = 0
+			let cacheReadTokens = 0
+			let cacheWriteTokens = 0
+			let lastUsage: any
+
+			for await (const chunk of stream) {
+				switch (chunk.type) {
+					case "text":
+						accumulatedText.push(chunk.text)
+						yield {
+							type: "text",
+							text: chunk.text,
+						}
+						break
+
+					case "reasoning":
+						accumulatedReasoning.push(chunk.text)
+						yield {
+							type: "reasoning",
+							text: chunk.text,
+						}
+						break
+
+					case "thinking_complete":
+						// Capture the signature for persistence in api_conversation_history
+						// This enables tool use continuations where thinking blocks must be passed back
+						if (chunk.signature) {
+							this.lastThinkingSignature = chunk.signature
+						}
+						accumulatedReasoning.push(chunk.thinking)
+						// Emit a complete thinking block with signature
+						// This is critical for interleaved thinking with tool use
+						// The signature must be included when passing thinking blocks back to the API
+						yield {
+							type: "reasoning",
+							text: chunk.thinking,
+							signature: chunk.signature,
+						}
+						break
+
+					case "tool_call_partial":
+						if (chunk.id || chunk.name) {
+							toolCalls.push({ id: chunk.id, name: chunk.name })
+						}
+						yield {
+							type: "tool_call_partial",
+							index: chunk.index,
+							id: chunk.id,
+							name: chunk.name,
+							arguments: chunk.arguments,
+						}
+						break
+
+					case "usage": {
+						inputTokens = chunk.inputTokens
+						outputTokens = chunk.outputTokens
+						cacheReadTokens = chunk.cacheReadTokens || 0
+						cacheWriteTokens = chunk.cacheWriteTokens || 0
+
+						// Claude Code is subscription-based, no per-token cost
+						const usageChunk: ApiStreamUsageChunk = {
+							type: "usage",
+							inputTokens,
+							outputTokens,
+							cacheReadTokens: cacheReadTokens > 0 ? cacheReadTokens : undefined,
+							cacheWriteTokens: cacheWriteTokens > 0 ? cacheWriteTokens : undefined,
+							totalCost: 0,
+						}
+
+						lastUsage = usageChunk
+
+						yield usageChunk
+						break
 					}
 
-					yield usageChunk
-					break
+					case "error":
+						throw new Error(chunk.error)
 				}
-
-				case "error":
-					throw new Error(chunk.error)
 			}
+
+			// Log successful response
+			logHandle.success({
+				text: accumulatedText.join(""),
+				reasoning: accumulatedReasoning.length > 0 ? accumulatedReasoning.join("") : undefined,
+				toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+				usage: lastUsage,
+			})
+		} catch (error) {
+			logHandle.error(error)
+			throw error
 		}
 	}
 
@@ -284,7 +329,7 @@ export class ClaudeCodeHandler implements ApiHandler, SingleCompletionHandler {
 		}
 	}
 
-	async countTokens(content: Anthropic.Messages.ContentBlockParam[]): Promise<number> {
+	override async countTokens(content: Anthropic.Messages.ContentBlockParam[]): Promise<number> {
 		if (content.length === 0) {
 			return 0
 		}
diff --git a/src/api/providers/gemini.ts b/src/api/providers/gemini.ts
index 4402e3e0177..f8e86e74aeb 100644
--- a/src/api/providers/gemini.ts
+++ b/src/api/providers/gemini.ts
@@ -40,7 +40,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
 	private client: GoogleGenAI
 	private lastThoughtSignature?: string
 	private lastResponseId?: string
-	private readonly providerName = "Gemini"
+	protected readonly providerName = "Gemini"
 
 	constructor({ isVertex, ...options }: GeminiHandlerOptions) {
 		super()
diff --git a/src/api/providers/huggingface.ts b/src/api/providers/huggingface.ts
index 7b62046b99e..b9e0fd6f928 100644
--- a/src/api/providers/huggingface.ts
+++ b/src/api/providers/huggingface.ts
@@ -14,7 +14,7 @@ export class HuggingFaceHandler extends BaseProvider implements SingleCompletion
 	private client: OpenAI
 	private options: ApiHandlerOptions
 	private modelCache: ModelRecord | null = null
-	private readonly providerName = "HuggingFace"
+	protected readonly providerName = "HuggingFace"
 
 	constructor(options: ApiHandlerOptions) {
 		super()
diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts
index 102c108dcee..29b48a7d2f3 100644
--- a/src/api/providers/lm-studio.ts
+++ b/src/api/providers/lm-studio.ts
@@ -21,7 +21,7 @@ import { handleOpenAIError } from "./utils/openai-error-handler"
 export class LmStudioHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
-	private readonly providerName = "LM Studio"
+	protected readonly providerName = "LM Studio"
 
 	constructor(options: ApiHandlerOptions) {
 		super()
diff --git a/src/api/providers/minimax.ts b/src/api/providers/minimax.ts
index 12d7934546e..07d3d50c42e 100644
--- a/src/api/providers/minimax.ts
+++ b/src/api/providers/minimax.ts
@@ -50,6 +50,7 @@ function convertOpenAIToolChoice(
 }
 
 export class MiniMaxHandler extends BaseProvider implements SingleCompletionHandler {
+	protected readonly providerName = "MiniMax"
 	private options: ApiHandlerOptions
 	private client: Anthropic
 
diff --git a/src/api/providers/mistral.ts b/src/api/providers/mistral.ts
index 95739cdcf73..c06083a505f 100644
--- a/src/api/providers/mistral.ts
+++ b/src/api/providers/mistral.ts
@@ -51,7 +51,7 @@ type MistralTool = {
 export class MistralHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: Mistral
-	private readonly providerName = "Mistral"
+	protected readonly providerName = "Mistral"
 
 	constructor(options: ApiHandlerOptions) {
 		super()
diff --git a/src/api/providers/native-ollama.ts b/src/api/providers/native-ollama.ts
index 712b70445cc..7725a1e18e6 100644
--- a/src/api/providers/native-ollama.ts
+++ b/src/api/providers/native-ollama.ts
@@ -146,6 +146,7 @@ function convertToOllamaMessages(anthropicMessages: Anthropic.Messages.MessagePa
 }
 
 export class NativeOllamaHandler extends BaseProvider implements SingleCompletionHandler {
+	protected readonly providerName = "Ollama"
 	protected options: ApiHandlerOptions
 	private client: Ollama | undefined
 	protected models: Record<string, ModelInfo> = {}
diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
index 8f9cc2297f8..4506cac80ec 100644
--- a/src/api/providers/openai-native.ts
+++ b/src/api/providers/openai-native.ts
@@ -31,7 +31,7 @@ export type OpenAiNativeModel = ReturnType<OpenAiNativeHandler["getModel"]>
 export class OpenAiNativeHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
-	private readonly providerName = "OpenAI Native"
+	protected readonly providerName = "OpenAI Native"
 	// Resolved service tier from Responses API (actual tier used by OpenAI)
 	private lastServiceTier: ServiceTier | undefined
 	// Complete response output array (includes reasoning items with encrypted_content)
@@ -175,8 +175,54 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			metadata,
 		)
 
-		// Make the request (pass systemPrompt and messages for potential retry)
-		yield* this.executeRequest(requestBody, model, metadata, systemPrompt, messages)
+		// Start inference logging with actual request body
+		const logHandle = this.inferenceLogger.start(
+			{
+				provider: this.providerName,
+				operation: "createMessage",
+				model: model.id,
+			},
+			requestBody,
+		)
+
+		// Accumulators for response logging
+		const accumulatedText: string[] = []
+		const accumulatedReasoning: string[] = []
+		const toolCalls: Array<{ id?: string; name?: string }> = []
+		let lastUsage: any
+
+		try {
+			// Make the request (pass systemPrompt and messages for potential retry)
+			for await (const chunk of this.executeRequest(requestBody, model, metadata, systemPrompt, messages)) {
+				// Accumulate for logging
+				if (chunk.type === "text") {
+					accumulatedText.push(chunk.text)
+				} else if (chunk.type === "reasoning") {
+					accumulatedReasoning.push(chunk.text)
+				} else if (chunk.type === "tool_call" || chunk.type === "tool_call_partial") {
+					if (chunk.id || chunk.name) {
+						toolCalls.push({ id: chunk.id, name: chunk.name })
+					}
+				} else if (chunk.type === "usage") {
+					lastUsage = chunk
+				}
+
+				yield chunk
+			}
+
+			// Log successful response
+			logHandle.success({
+				text: accumulatedText.join(""),
+				reasoning: accumulatedReasoning.length > 0 ? accumulatedReasoning.join("") : undefined,
+				toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+				usage: lastUsage,
+				responseId: this.lastResponseId,
+				responseOutput: this.lastResponseOutput,
+			})
+		} catch (error) {
+			logHandle.error(error)
+			throw error
+		}
 	}
 
 	private buildRequestBody(
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index d6f50d02691..d3acd28361f 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -32,7 +32,7 @@ import { handleOpenAIError } from "./utils/openai-error-handler"
 export class OpenAiHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	protected client: OpenAI
-	private readonly providerName = "OpenAI"
+	protected readonly providerName = "OpenAI"
 
 	constructor(options: ApiHandlerOptions) {
 		super()
@@ -94,6 +94,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
 		const ark = modelUrl.includes(".volces.com")
 
+		// Accumulators for final response logging
+		const accumulatedText: string[] = []
+		const accumulatedReasoning: string[] = []
+		const toolCalls: Array<{ id?: string; name?: string }> = []
+		let lastUsage: any
+
+		// Handle O3 family models separately with their own logging
 		if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) {
 			yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages, metadata)
 			return
@@ -174,60 +181,102 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			// Add max_tokens if needed
 			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
 
-			let stream
+			// Start inference logging with actual request params
+			const logHandle = this.inferenceLogger.start(
+				{
+					provider: this.providerName,
+					operation: "createMessage",
+					model: modelId,
+				},
+				requestOptions,
+			)
+
 			try {
-				stream = await this.client.chat.completions.create(
-					requestOptions,
-					isAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
+				let stream
+				try {
+					stream = await this.client.chat.completions.create(
+						requestOptions,
+						isAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
+					)
+				} catch (error) {
+					throw handleOpenAIError(error, this.providerName)
+				}
+
+				const matcher = new XmlMatcher(
+					"think",
+					(chunk) =>
+						({
+							type: chunk.matched ? "reasoning" : "text",
+							text: chunk.data,
+						}) as const,
 				)
-			} catch (error) {
-				throw handleOpenAIError(error, this.providerName)
-			}
 
-			const matcher = new XmlMatcher(
-				"think",
-				(chunk) =>
-					({
-						type: chunk.matched ? "reasoning" : "text",
-						text: chunk.data,
-					}) as const,
-			)
+				const activeToolCallIds = new Set<string>()
 
-			let lastUsage
-			const activeToolCallIds = new Set<string>()
+				for await (const chunk of stream) {
+					const delta = chunk.choices?.[0]?.delta ?? {}
+					const finishReason = chunk.choices?.[0]?.finish_reason
 
-			for await (const chunk of stream) {
-				const delta = chunk.choices?.[0]?.delta ?? {}
-				const finishReason = chunk.choices?.[0]?.finish_reason
+					if (delta.content) {
+						for (const matchedChunk of matcher.update(delta.content)) {
+							if (matchedChunk.type === "text") {
+								accumulatedText.push(matchedChunk.text)
+							} else if (matchedChunk.type === "reasoning") {
+								accumulatedReasoning.push(matchedChunk.text)
+							}
+							yield matchedChunk
+						}
+					}
 
-				if (delta.content) {
-					for (const chunk of matcher.update(delta.content)) {
-						yield chunk
+					if ("reasoning_content" in delta && delta.reasoning_content) {
+						accumulatedReasoning.push((delta.reasoning_content as string | undefined) || "")
+						yield {
+							type: "reasoning",
+							text: (delta.reasoning_content as string | undefined) || "",
+						}
 					}
-				}
 
-				if ("reasoning_content" in delta && delta.reasoning_content) {
-					yield {
-						type: "reasoning",
-						text: (delta.reasoning_content as string | undefined) || "",
+					// Track tool calls for logging and use processToolCalls for proper tool_call_end events
+					if (delta.tool_calls) {
+						for (const toolCall of delta.tool_calls) {
+							if (toolCall.id || toolCall.function?.name) {
+								toolCalls.push({ id: toolCall.id, name: toolCall.function?.name })
+							}
+						}
 					}
-				}
+					yield* this.processToolCalls(delta, finishReason, activeToolCallIds)
 
-				yield* this.processToolCalls(delta, finishReason, activeToolCallIds)
+					if (chunk.usage) {
+						lastUsage = chunk.usage
+					}
+				}
 
-				if (chunk.usage) {
-					lastUsage = chunk.usage
+				for (const matchedChunk of matcher.final()) {
+					if (matchedChunk.type === "text") {
+						accumulatedText.push(matchedChunk.text)
+					} else if (matchedChunk.type === "reasoning") {
+						accumulatedReasoning.push(matchedChunk.text)
+					}
+					yield matchedChunk
 				}
-			}
 
-			for (const chunk of matcher.final()) {
-				yield chunk
-			}
+				if (lastUsage) {
+					yield this.processUsageMetrics(lastUsage, modelInfo)
+				}
 
-			if (lastUsage) {
-				yield this.processUsageMetrics(lastUsage, modelInfo)
+				// Log successful response
+				logHandle.success({
+					text: accumulatedText.join(""),
+					reasoning: accumulatedReasoning.length > 0 ? accumulatedReasoning.join("") : undefined,
+					toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+					usage: lastUsage,
+				})
+			} catch (error) {
+				logHandle.error(error)
+				throw error
 			}
 		} else {
+			// Non-streaming path
 			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
 				model: modelId,
 				messages: deepseekReasoner
@@ -245,37 +294,63 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			// Add max_tokens if needed
 			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
 
-			let response
-			try {
-				response = await this.client.chat.completions.create(
-					requestOptions,
-					this._isAzureAiInference(modelUrl) ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
-				)
-			} catch (error) {
-				throw handleOpenAIError(error, this.providerName)
-			}
+			// Start inference logging with actual request params
+			const logHandle = this.inferenceLogger.start(
+				{
+					provider: this.providerName,
+					operation: "createMessage",
+					model: modelId,
+				},
+				requestOptions,
+			)
 
-			const message = response.choices?.[0]?.message
+			try {
+				let response
+				try {
+					response = await this.client.chat.completions.create(
+						requestOptions,
+						this._isAzureAiInference(modelUrl) ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
+					)
+				} catch (error) {
+					throw handleOpenAIError(error, this.providerName)
+				}
 
-			if (message?.tool_calls) {
-				for (const toolCall of message.tool_calls) {
-					if (toolCall.type === "function") {
-						yield {
-							type: "tool_call",
-							id: toolCall.id,
-							name: toolCall.function.name,
-							arguments: toolCall.function.arguments,
+				const message = response.choices?.[0]?.message
+
+				if (message?.tool_calls) {
+					for (const toolCall of message.tool_calls) {
+						if (toolCall.type === "function") {
+							toolCalls.push({ id: toolCall.id, name: toolCall.function.name })
+							yield {
+								type: "tool_call",
+								id: toolCall.id,
+								name: toolCall.function.name,
+								arguments: toolCall.function.arguments,
+							}
 						}
 					}
 				}
-			}
 
-			yield {
-				type: "text",
-				text: message?.content || "",
-			}
+				accumulatedText.push(message?.content || "")
+				yield {
+					type: "text",
+					text: message?.content || "",
+				}
+
+				lastUsage = response.usage
+				yield this.processUsageMetrics(response.usage, modelInfo)
 
-			yield this.processUsageMetrics(response.usage, modelInfo)
+				// Log successful response
+				logHandle.success({
+					text: accumulatedText.join(""),
+					reasoning: accumulatedReasoning.length > 0 ? accumulatedReasoning.join("") : undefined,
+					toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+					usage: lastUsage,
+				})
+			} catch (error) {
+				logHandle.error(error)
+				throw error
+			}
 		}
 	}
 
@@ -339,6 +414,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		const modelInfo = this.getModel().info
 		const methodIsAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl)
 
+		// Accumulators for response logging
+		const accumulatedText: string[] = []
+		const toolCalls: Array<{ id?: string; name?: string }> = []
+		let lastUsage: any
+
 		if (this.options.openAiStreamingEnabled ?? true) {
 			const isGrokXAI = this._isGrokXAI(this.options.openAiBaseUrl)
 
@@ -367,17 +447,73 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			// This allows O3 models to limit response length when includeMaxTokens is enabled
 			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
 
-			let stream
+			// Start inference logging with actual request params
+			const logHandle = this.inferenceLogger.start(
+				{
+					provider: this.providerName,
+					operation: "createMessage",
+					model: modelId,
+				},
+				requestOptions,
+			)
+
 			try {
-				stream = await this.client.chat.completions.create(
-					requestOptions,
-					methodIsAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
-				)
+				let stream
+				try {
+					stream = await this.client.chat.completions.create(
+						requestOptions,
+						methodIsAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
+					)
+				} catch (error) {
+					throw handleOpenAIError(error, this.providerName)
+				}
+
+				const activeToolCallIds = new Set<string>()
+
+				for await (const chunk of stream) {
+					const delta = chunk.choices?.[0]?.delta
+					const finishReason = chunk.choices?.[0]?.finish_reason
+
+					if (delta) {
+						if (delta.content) {
+							accumulatedText.push(delta.content)
+							yield {
+								type: "text",
+								text: delta.content,
+							}
+						}
+
+						// Track tool calls for logging and use processToolCalls for proper tool_call_end events
+						if (delta.tool_calls) {
+							for (const toolCall of delta.tool_calls) {
+								if (toolCall.id || toolCall.function?.name) {
+									toolCalls.push({ id: toolCall.id, name: toolCall.function?.name })
+								}
+							}
+						}
+						yield* this.processToolCalls(delta, finishReason, activeToolCallIds)
+					}
+
+					if (chunk.usage) {
+						lastUsage = chunk.usage
+						yield {
+							type: "usage",
+							inputTokens: chunk.usage.prompt_tokens || 0,
+							outputTokens: chunk.usage.completion_tokens || 0,
+						}
+					}
+				}
+
+				// Log successful response
+				logHandle.success({
+					text: accumulatedText.join(""),
+					toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+					usage: lastUsage,
+				})
 			} catch (error) {
-				throw handleOpenAIError(error, this.providerName)
+				logHandle.error(error)
+				throw error
 			}
-
-			yield* this.handleStreamResponse(stream)
 		} else {
 			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
 				model: modelId,
@@ -402,35 +538,61 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			// This allows O3 models to limit response length when includeMaxTokens is enabled
 			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
 
-			let response
+			// Start inference logging with actual request params
+			const logHandle = this.inferenceLogger.start(
+				{
+					provider: this.providerName,
+					operation: "createMessage",
+					model: modelId,
+				},
+				requestOptions,
+			)
+
 			try {
-				response = await this.client.chat.completions.create(
-					requestOptions,
-					methodIsAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
-				)
-			} catch (error) {
-				throw handleOpenAIError(error, this.providerName)
-			}
+				let response
+				try {
+					response = await this.client.chat.completions.create(
+						requestOptions,
+						methodIsAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
+					)
+				} catch (error) {
+					throw handleOpenAIError(error, this.providerName)
+				}
 
-			const message = response.choices?.[0]?.message
-			if (message?.tool_calls) {
-				for (const toolCall of message.tool_calls) {
-					if (toolCall.type === "function") {
-						yield {
-							type: "tool_call",
-							id: toolCall.id,
-							name: toolCall.function.name,
-							arguments: toolCall.function.arguments,
+				const message = response.choices?.[0]?.message
+				if (message?.tool_calls) {
+					for (const toolCall of message.tool_calls) {
+						if (toolCall.type === "function") {
+							toolCalls.push({ id: toolCall.id, name: toolCall.function.name })
+							yield {
+								type: "tool_call",
+								id: toolCall.id,
+								name: toolCall.function.name,
+								arguments: toolCall.function.arguments,
+							}
 						}
 					}
 				}
-			}
 
-			yield {
-				type: "text",
-				text: message?.content || "",
+				accumulatedText.push(message?.content || "")
+				yield {
+					type: "text",
+					text: message?.content || "",
+				}
+
+				lastUsage = response.usage
+				yield this.processUsageMetrics(response.usage)
+
+				// Log successful response
+				logHandle.success({
+					text: accumulatedText.join(""),
+					toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+					usage: lastUsage,
+				})
+			} catch (error) {
+				logHandle.error(error)
+				throw error
 			}
-			yield this.processUsageMetrics(response.usage)
 		}
 	}
 
diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts
index 5b8c29c337a..bb6835d6455 100644
--- a/src/api/providers/openrouter.ts
+++ b/src/api/providers/openrouter.ts
@@ -140,7 +140,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 	private client: OpenAI
 	protected models: ModelRecord = {}
 	protected endpoints: ModelRecord = {}
-	private readonly providerName = "OpenRouter"
+	protected readonly providerName = "OpenRouter"
 	private currentReasoningDetails: any[] = []
 
 	constructor(options: ApiHandlerOptions) {
@@ -322,186 +322,223 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 			? { headers: { "x-anthropic-beta": "fine-grained-tool-streaming-2025-05-14" } }
 			: undefined
 
-		let stream
-		try {
-			stream = await this.client.chat.completions.create(completionParams, requestOptions)
-		} catch (error) {
-			// Try to parse as OpenRouter error structure using Zod
-			const parseResult = OpenRouterErrorResponseSchema.safeParse(error)
-
-			if (parseResult.success && parseResult.data.error) {
-				const openRouterError = parseResult.data
-				const rawString = openRouterError.error?.metadata?.raw
-				const parsedError = extractErrorFromMetadataRaw(rawString)
-				const rawErrorMessage = parsedError || openRouterError.error?.message || "Unknown error"
-
-				const apiError = Object.assign(
-					new ApiProviderError(
-						rawErrorMessage,
-						this.providerName,
-						modelId,
-						"createMessage",
-						openRouterError.error?.code,
-					),
-					{
-						status: openRouterError.error?.code,
-						error: openRouterError.error,
-					},
-				)
+		// Start inference logging with actual request params
+		const logHandle = this.inferenceLogger.start(
+			{
+				provider: this.providerName,
+				operation: "createMessage",
+				model: modelId,
+			},
+			{ completionParams, requestOptions },
+		)
 
-				TelemetryService.instance.captureException(apiError)
-				throw handleOpenAIError(error, this.providerName)
-			} else {
-				// Fallback for non-OpenRouter errors
-				const errorMessage = error instanceof Error ? error.message : String(error)
-				const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "createMessage")
-				TelemetryService.instance.captureException(apiError)
-				throw handleOpenAIError(error, this.providerName)
-			}
-		}
+		// Accumulators for response logging
+		const accumulatedText: string[] = []
+		const accumulatedReasoning: string[] = []
+		const toolCalls: Array<{ id?: string; name?: string }> = []
 
-		let lastUsage: CompletionUsage | undefined = undefined
-		// Accumulator for reasoning_details: accumulate text by type-index key
-		const reasoningDetailsAccumulator = new Map<
-			string,
-			{
-				type: string
-				text?: string
-				summary?: string
-				data?: string
-				id?: string | null
-				format?: string
-				signature?: string
-				index: number
+		try {
+			let stream
+			try {
+				stream = await this.client.chat.completions.create(completionParams, requestOptions)
+			} catch (error) {
+				// Try to parse as OpenRouter error structure using Zod
+				const parseResult = OpenRouterErrorResponseSchema.safeParse(error)
+
+				if (parseResult.success && parseResult.data.error) {
+					const openRouterError = parseResult.data
+					const rawString = openRouterError.error?.metadata?.raw
+					const parsedError = extractErrorFromMetadataRaw(rawString)
+					const rawErrorMessage = parsedError || openRouterError.error?.message || "Unknown error"
+
+					const apiError = Object.assign(
+						new ApiProviderError(
+							rawErrorMessage,
+							this.providerName,
+							modelId,
+							"createMessage",
+							openRouterError.error?.code,
+						),
+						{
+							status: openRouterError.error?.code,
+							error: openRouterError.error,
+						},
+					)
+
+					TelemetryService.instance.captureException(apiError)
+					throw handleOpenAIError(error, this.providerName)
+				} else {
+					// Fallback for non-OpenRouter errors
+					const errorMessage = error instanceof Error ? error.message : String(error)
+					const apiError = new ApiProviderError(errorMessage, this.providerName, modelId, "createMessage")
+					TelemetryService.instance.captureException(apiError)
+					throw handleOpenAIError(error, this.providerName)
+				}
 			}
-		>()
 
-		for await (const chunk of stream) {
-			// OpenRouter returns an error object instead of the OpenAI SDK throwing an error.
-			if ("error" in chunk) {
-				this.handleStreamingError(chunk.error as OpenRouterError, modelId, "createMessage")
-			}
+			let lastUsage: CompletionUsage | undefined = undefined
+			// Accumulator for reasoning_details: accumulate text by type-index key
+			const reasoningDetailsAccumulator = new Map<
+				string,
+				{
+					type: string
+					text?: string
+					summary?: string
+					data?: string
+					id?: string | null
+					format?: string
+					signature?: string
+					index: number
+				}
+			>()
 
-			const delta = chunk.choices[0]?.delta
-			const finishReason = chunk.choices[0]?.finish_reason
-
-			if (delta) {
-				// Handle reasoning_details array format (used by Gemini 3, Claude, OpenAI o-series, etc.)
-				// See: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks
-				// Priority: Check for reasoning_details first, as it's the newer format
-				const deltaWithReasoning = delta as typeof delta & {
-					reasoning_details?: Array<{
-						type: string
-						text?: string
-						summary?: string
-						data?: string
-						id?: string | null
-						format?: string
-						signature?: string
-						index?: number
-					}>
+			for await (const chunk of stream) {
+				// OpenRouter returns an error object instead of the OpenAI SDK throwing an error.
+				if ("error" in chunk) {
+					logHandle.error(chunk.error)
+					this.handleStreamingError(chunk.error as OpenRouterError, modelId, "createMessage")
 				}
 
-				if (deltaWithReasoning.reasoning_details && Array.isArray(deltaWithReasoning.reasoning_details)) {
-					for (const detail of deltaWithReasoning.reasoning_details) {
-						const index = detail.index ?? 0
-						const key = `${detail.type}-${index}`
-						const existing = reasoningDetailsAccumulator.get(key)
+				const delta = chunk.choices[0]?.delta
+				const finishReason = chunk.choices[0]?.finish_reason
+
+				if (delta) {
+					// Handle reasoning_details array format (used by Gemini 3, Claude, OpenAI o-series, etc.)
+					// See: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks
+					// Priority: Check for reasoning_details first, as it's the newer format
+					const deltaWithReasoning = delta as typeof delta & {
+						reasoning_details?: Array<{
+							type: string
+							text?: string
+							summary?: string
+							data?: string
+							id?: string | null
+							format?: string
+							signature?: string
+							index?: number
+						}>
+					}
 
-						if (existing) {
-							// Accumulate text/summary/data for existing reasoning detail
-							if (detail.text !== undefined) {
-								existing.text = (existing.text || "") + detail.text
+					if (deltaWithReasoning.reasoning_details && Array.isArray(deltaWithReasoning.reasoning_details)) {
+						for (const detail of deltaWithReasoning.reasoning_details) {
+							const index = detail.index ?? 0
+							const key = `${detail.type}-${index}`
+							const existing = reasoningDetailsAccumulator.get(key)
+
+							if (existing) {
+								// Accumulate text/summary/data for existing reasoning detail
+								if (detail.text !== undefined) {
+									existing.text = (existing.text || "") + detail.text
+								}
+								if (detail.summary !== undefined) {
+									existing.summary = (existing.summary || "") + detail.summary
+								}
+								if (detail.data !== undefined) {
+									existing.data = (existing.data || "") + detail.data
+								}
+								// Update other fields if provided
+								if (detail.id !== undefined) existing.id = detail.id
+								if (detail.format !== undefined) existing.format = detail.format
+								if (detail.signature !== undefined) existing.signature = detail.signature
+							} else {
+								// Start new reasoning detail accumulation
+								reasoningDetailsAccumulator.set(key, {
+									type: detail.type,
+									text: detail.text,
+									summary: detail.summary,
+									data: detail.data,
+									id: detail.id,
+									format: detail.format,
+									signature: detail.signature,
+									index,
+								})
 							}
-							if (detail.summary !== undefined) {
-								existing.summary = (existing.summary || "") + detail.summary
+
+							// Yield text for display (still fragmented for live streaming) and accumulate
+							let reasoningText: string | undefined
+							if (detail.type === "reasoning.text" && typeof detail.text === "string") {
+								reasoningText = detail.text
+							} else if (detail.type === "reasoning.summary" && typeof detail.summary === "string") {
+								reasoningText = detail.summary
 							}
-							if (detail.data !== undefined) {
-								existing.data = (existing.data || "") + detail.data
+							// Note: reasoning.encrypted types are intentionally skipped as they contain redacted content
+
+							if (reasoningText) {
+								accumulatedReasoning.push(reasoningText)
+								yield { type: "reasoning", text: reasoningText }
 							}
-							// Update other fields if provided
-							if (detail.id !== undefined) existing.id = detail.id
-							if (detail.format !== undefined) existing.format = detail.format
-							if (detail.signature !== undefined) existing.signature = detail.signature
-						} else {
-							// Start new reasoning detail accumulation
-							reasoningDetailsAccumulator.set(key, {
-								type: detail.type,
-								text: detail.text,
-								summary: detail.summary,
-								data: detail.data,
-								id: detail.id,
-								format: detail.format,
-								signature: detail.signature,
-								index,
-							})
 						}
+					} else if ("reasoning" in delta && delta.reasoning && typeof delta.reasoning === "string") {
+						// Handle legacy reasoning format - only if reasoning_details is not present
+						// See: https://openrouter.ai/docs/use-cases/reasoning-tokens
+						accumulatedReasoning.push(delta.reasoning)
+						yield { type: "reasoning", text: delta.reasoning }
+					}
 
-						// Yield text for display (still fragmented for live streaming)
-						let reasoningText: string | undefined
-						if (detail.type === "reasoning.text" && typeof detail.text === "string") {
-							reasoningText = detail.text
-						} else if (detail.type === "reasoning.summary" && typeof detail.summary === "string") {
-							reasoningText = detail.summary
+					// Emit raw tool call chunks - NativeToolCallParser handles state management
+					if ("tool_calls" in delta && Array.isArray(delta.tool_calls)) {
+						for (const toolCall of delta.tool_calls) {
+							if (toolCall.id || toolCall.function?.name) {
+								toolCalls.push({ id: toolCall.id, name: toolCall.function?.name })
+							}
+							yield {
+								type: "tool_call_partial",
+								index: toolCall.index,
+								id: toolCall.id,
+								name: toolCall.function?.name,
+								arguments: toolCall.function?.arguments,
+							}
 						}
-						// Note: reasoning.encrypted types are intentionally skipped as they contain redacted content
+					}
 
-						if (reasoningText) {
-							yield { type: "reasoning", text: reasoningText }
-						}
+					if (delta.content) {
+						accumulatedText.push(delta.content)
+						yield { type: "text", text: delta.content }
 					}
-				} else if ("reasoning" in delta && delta.reasoning && typeof delta.reasoning === "string") {
-					// Handle legacy reasoning format - only if reasoning_details is not present
-					// See: https://openrouter.ai/docs/use-cases/reasoning-tokens
-					yield { type: "reasoning", text: delta.reasoning }
 				}
 
-				// Emit raw tool call chunks - NativeToolCallParser handles state management
-				if ("tool_calls" in delta && Array.isArray(delta.tool_calls)) {
-					for (const toolCall of delta.tool_calls) {
-						yield {
-							type: "tool_call_partial",
-							index: toolCall.index,
-							id: toolCall.id,
-							name: toolCall.function?.name,
-							arguments: toolCall.function?.arguments,
-						}
+				// Process finish_reason to emit tool_call_end events
+				// This ensures tool calls are finalized even if the stream doesn't properly close
+				if (finishReason) {
+					const endEvents = NativeToolCallParser.processFinishReason(finishReason)
+					for (const event of endEvents) {
+						yield event
 					}
 				}
 
-				if (delta.content) {
-					yield { type: "text", text: delta.content }
+				if (chunk.usage) {
+					lastUsage = chunk.usage
 				}
 			}
 
-			// Process finish_reason to emit tool_call_end events
-			// This ensures tool calls are finalized even if the stream doesn't properly close
-			if (finishReason) {
-				const endEvents = NativeToolCallParser.processFinishReason(finishReason)
-				for (const event of endEvents) {
-					yield event
-				}
+			// After streaming completes, store the accumulated reasoning_details
+			if (reasoningDetailsAccumulator.size > 0) {
+				this.currentReasoningDetails = Array.from(reasoningDetailsAccumulator.values())
 			}
 
-			if (chunk.usage) {
-				lastUsage = chunk.usage
+			if (lastUsage) {
+				yield {
+					type: "usage",
+					inputTokens: lastUsage.prompt_tokens || 0,
+					outputTokens: lastUsage.completion_tokens || 0,
+					cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens,
+					reasoningTokens: lastUsage.completion_tokens_details?.reasoning_tokens,
+					totalCost: (lastUsage.cost_details?.upstream_inference_cost || 0) + (lastUsage.cost || 0),
+				}
 			}
-		}
-
-		// After streaming completes, store the accumulated reasoning_details
-		if (reasoningDetailsAccumulator.size > 0) {
-			this.currentReasoningDetails = Array.from(reasoningDetailsAccumulator.values())
-		}
 
-		if (lastUsage) {
-			yield {
-				type: "usage",
-				inputTokens: lastUsage.prompt_tokens || 0,
-				outputTokens: lastUsage.completion_tokens || 0,
-				cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens,
-				reasoningTokens: lastUsage.completion_tokens_details?.reasoning_tokens,
-				totalCost: (lastUsage.cost_details?.upstream_inference_cost || 0) + (lastUsage.cost || 0),
-			}
+			// Log successful response
+			logHandle.success({
+				text: accumulatedText.join(""),
+				reasoning: accumulatedReasoning.length > 0 ? accumulatedReasoning.join("") : undefined,
+				toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+				usage: lastUsage,
+				reasoningDetails:
+					reasoningDetailsAccumulator.size > 0 ? Array.from(reasoningDetailsAccumulator.values()) : undefined,
+			})
+		} catch (error) {
+			logHandle.error(error)
+			throw error
 		}
 	}
 
diff --git a/src/api/providers/qwen-code.ts b/src/api/providers/qwen-code.ts
index 8f26273ebaf..cbeaac223f2 100644
--- a/src/api/providers/qwen-code.ts
+++ b/src/api/providers/qwen-code.ts
@@ -52,6 +52,7 @@ function objectToUrlEncoded(data: Record<string, string>): string {
 }
 
 export class QwenCodeHandler extends BaseProvider implements SingleCompletionHandler {
+	protected readonly providerName = "Qwen Code"
 	protected options: QwenCodeHandlerOptions
 	private credentials: QwenOAuthCredentials | null = null
 	private client: OpenAI | undefined
diff --git a/src/api/providers/requesty.ts b/src/api/providers/requesty.ts
index 85efeb800f1..f2f9a213e57 100644
--- a/src/api/providers/requesty.ts
+++ b/src/api/providers/requesty.ts
@@ -61,7 +61,7 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan
 	protected models: ModelRecord = {}
 	private client: OpenAI
 	private baseURL: string
-	private readonly providerName = "Requesty"
+	protected readonly providerName = "Requesty"
 
 	constructor(options: ApiHandlerOptions) {
 		super()
diff --git a/src/api/providers/roo.ts b/src/api/providers/roo.ts
index 83eab87ef7e..c392a11aca4 100644
--- a/src/api/providers/roo.ts
+++ b/src/api/providers/roo.ts
@@ -125,6 +125,41 @@ export class RooHandler extends BaseOpenAiCompatibleProvider<string> {
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
+		const { id: model, info } = this.getModel()
+
+		// Get model parameters for logging
+		const params = getModelParams({
+			format: "openai",
+			modelId: model,
+			model: info,
+			settings: this.options,
+			defaultTemperature: this.defaultTemperature,
+		})
+
+		// Start inference logging
+		const logHandle = this.inferenceLogger.start(
+			{
+				provider: this.providerName,
+				operation: "createMessage",
+				model,
+				taskId: metadata?.taskId,
+			},
+			{
+				model,
+				maxTokens: params.maxTokens,
+				temperature: params.temperature,
+				messageCount: messages.length,
+				hasTools: !!metadata?.tools,
+				toolCount: metadata?.tools?.length ?? 0,
+				toolChoice: metadata?.tool_choice,
+			},
+		)
+
+		// Accumulators for final response logging
+		const accumulatedText: string[] = []
+		const accumulatedReasoning: string[] = []
+		const toolCalls: Array<{ id?: string; name?: string }> = []
+
 		try {
 			// Reset reasoning_details accumulator for this request
 			this.currentReasoningDetails = []
@@ -221,17 +256,20 @@ export class RooHandler extends BaseOpenAiCompatibleProvider<string> {
 							// Note: reasoning.encrypted types are intentionally skipped as they contain redacted content
 
 							if (reasoningText) {
+								accumulatedReasoning.push(reasoningText)
 								yield { type: "reasoning", text: reasoningText }
 							}
 						}
 					} else if ("reasoning" in delta && delta.reasoning && typeof delta.reasoning === "string") {
 						// Handle legacy reasoning format - only if reasoning_details is not present
+						accumulatedReasoning.push(delta.reasoning)
 						yield {
 							type: "reasoning",
 							text: delta.reasoning,
 						}
 					} else if ("reasoning_content" in delta && typeof delta.reasoning_content === "string") {
 						// Also check for reasoning_content for backward compatibility
+						accumulatedReasoning.push(delta.reasoning_content)
 						yield {
 							type: "reasoning",
 							text: delta.reasoning_content,
@@ -241,6 +279,10 @@ export class RooHandler extends BaseOpenAiCompatibleProvider<string> {
 					// Emit raw tool call chunks - NativeToolCallParser handles state management
 					if ("tool_calls" in delta && Array.isArray(delta.tool_calls)) {
 						for (const toolCall of delta.tool_calls) {
+							// Track tool calls for logging
+							if (toolCall.id || toolCall.function?.name) {
+								toolCalls.push({ id: toolCall.id, name: toolCall.function?.name })
+							}
 							yield {
 								type: "tool_call_partial",
 								index: toolCall.index,
@@ -252,6 +294,7 @@ export class RooHandler extends BaseOpenAiCompatibleProvider<string> {
 					}
 
 					if (delta.content) {
+						accumulatedText.push(delta.content)
 						yield {
 							type: "text",
 							text: delta.content,
@@ -303,7 +346,17 @@ export class RooHandler extends BaseOpenAiCompatibleProvider<string> {
 					totalCost: isFreeModel ? 0 : (lastUsage.cost ?? 0),
 				}
 			}
+
+			// Log successful response
+			logHandle.success({
+				text: accumulatedText.join(""),
+				reasoning: accumulatedReasoning.length > 0 ? accumulatedReasoning.join("") : undefined,
+				toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+				usage: lastUsage,
+			})
 		} catch (error) {
+			logHandle.error(error)
+
 			const errorContext = {
 				error: error instanceof Error ? error.message : String(error),
 				stack: error instanceof Error ? error.stack : undefined,
diff --git a/src/api/providers/router-provider.ts b/src/api/providers/router-provider.ts
index 01942e21723..8b56ca9e1a6 100644
--- a/src/api/providers/router-provider.ts
+++ b/src/api/providers/router-provider.ts
@@ -22,6 +22,10 @@ type RouterProviderOptions = {
 export abstract class RouterProvider extends BaseProvider {
 	protected readonly options: ApiHandlerOptions
 	protected readonly name: RouterName
+	// Implement abstract providerName from BaseProvider using name
+	protected get providerName(): string {
+		return this.name
+	}
 	protected models: ModelRecord = {}
 	protected readonly modelId?: string
 	protected readonly defaultModelId: string
diff --git a/src/api/providers/vscode-lm.ts b/src/api/providers/vscode-lm.ts
index 5c598ccd012..8770bbf1ddf 100644
--- a/src/api/providers/vscode-lm.ts
+++ b/src/api/providers/vscode-lm.ts
@@ -61,6 +61,7 @@ function convertToVsCodeLmTools(tools: OpenAI.Chat.ChatCompletionTool[]): vscode
  * ```
  */
 export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHandler {
+	protected readonly providerName = "VS Code LM"
 	protected options: ApiHandlerOptions
 	private client: vscode.LanguageModelChat | null
 	private disposable: vscode.Disposable | null
diff --git a/src/api/providers/xai.ts b/src/api/providers/xai.ts
index a1377a1317a..aa75f643e3a 100644
--- a/src/api/providers/xai.ts
+++ b/src/api/providers/xai.ts
@@ -21,7 +21,7 @@ const XAI_DEFAULT_TEMPERATURE = 0
 export class XAIHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
-	private readonly providerName = "xAI"
+	protected readonly providerName = "xAI"
 
 	constructor(options: ApiHandlerOptions) {
 		super()
diff --git a/src/core/condense/__tests__/condense.spec.ts b/src/core/condense/__tests__/condense.spec.ts
index 2558ee5b33a..e795f5890d9 100644
--- a/src/core/condense/__tests__/condense.spec.ts
+++ b/src/core/condense/__tests__/condense.spec.ts
@@ -15,6 +15,7 @@ import {
 
 // Create a mock ApiHandler for testing
 class MockApiHandler extends BaseProvider {
+	protected readonly providerName = "Mock"
 	createMessage(): any {
 		// Mock implementation for testing - returns an async iterable stream
 		const mockStream = {
diff --git a/src/core/context-management/__tests__/context-management.spec.ts b/src/core/context-management/__tests__/context-management.spec.ts
index 3ee36fc5956..4dfec368b44 100644
--- a/src/core/context-management/__tests__/context-management.spec.ts
+++ b/src/core/context-management/__tests__/context-management.spec.ts
@@ -19,6 +19,7 @@ import {
 
 // Create a mock ApiHandler for testing
 class MockApiHandler extends BaseProvider {
+	protected readonly providerName = "Mock"
 	createMessage(): any {
 		// Mock implementation for testing - returns an async iterable stream
 		const mockStream = {
diff --git a/src/extension.ts b/src/extension.ts
index dcb941fa581..cce11e9813d 100644
--- a/src/extension.ts
+++ b/src/extension.ts
@@ -2,11 +2,14 @@ import * as vscode from "vscode"
 import * as dotenvx from "@dotenvx/dotenvx"
 import * as path from "path"
 
-// Load environment variables from .env file
+// Load environment variables from .env and .env.local files
 try {
-	// Specify path to .env file in the project root directory
+	// Specify paths to .env and .env.local files in the project root directory
 	const envPath = path.join(__dirname, "..", ".env")
+	const envLocalPath = path.join(__dirname, "..", ".env.local")
+	// Load .env first, then .env.local (so .env.local can override)
 	dotenvx.config({ path: envPath })
+	dotenvx.config({ path: envLocalPath, override: true })
 } catch (e) {
 	// Silently handle environment loading errors
 	console.warn("Failed to load environment variables:", e)
@@ -19,6 +22,7 @@ import { customToolRegistry } from "@roo-code/core"
 
 import "./utils/path" // Necessary to have access to String.prototype.toPosix.
 import { createOutputChannelLogger, createDualLogger } from "./utils/outputChannelLogger"
+import { ApiInferenceLogger } from "./api/logging/ApiInferenceLogger"
 
 import { Package } from "./shared/package"
 import { formatLanguage } from "./shared/language"
@@ -68,6 +72,12 @@ export async function activate(context: vscode.ExtensionContext) {
 	context.subscriptions.push(outputChannel)
 	outputChannel.appendLine(`${Package.name} extension activated - ${JSON.stringify(Package)}`)
 
+	// Configure API inference logger for debugging (enable via ROO_CODE_API_LOGGING=true)
+	ApiInferenceLogger.configure({
+		enabled: process.env.ROO_CODE_API_LOGGING === "true",
+		sink: createOutputChannelLogger(outputChannel),
+	})
+
 	// Set extension path for custom tool registry to find bundled esbuild
 	customToolRegistry.setExtensionPath(context.extensionPath)