From b1e77777a726336cc63c9589b240a215203983b7 Mon Sep 17 00:00:00 2001 From: wangsong Date: Sun, 21 Dec 2025 09:09:09 +0800 Subject: [PATCH] refactor: improve Ollama and LM Studio configuration with environment variable support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add environment variable support for Ollama configuration (OLLAMA_BASE_URL, OLLAMA_SERVER_URL, OLLAMA_API_KEY, OLLAMA_NUM_CTX, OLLAMA_MODEL_ID) - Implement proper configuration priority: environment variables > VS Code settings > defaults - Update lmstudio.ts to use refreshModels with baseUrl parameter - Refactor webviewMessageHandler to use new refreshModels function for forced cache refresh - Add normalizeToolSchema to ensure JSON Schema compatibility with Ollama's Go unmarshaler - Improve comments to clarify configuration behavior 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Haiku 4.5 --- src/api/providers/fetchers/lmstudio.ts | 6 +-- src/api/providers/native-ollama.ts | 47 +++++++++++++++++------ src/core/webview/webviewMessageHandler.ts | 14 +++---- 3 files changed, 43 insertions(+), 24 deletions(-) diff --git a/src/api/providers/fetchers/lmstudio.ts b/src/api/providers/fetchers/lmstudio.ts index 3068a962d85..183f16434cd 100644 --- a/src/api/providers/fetchers/lmstudio.ts +++ b/src/api/providers/fetchers/lmstudio.ts @@ -3,7 +3,7 @@ import { LLM, LLMInfo, LLMInstanceInfo, LMStudioClient } from "@lmstudio/sdk" import { type ModelInfo, lMStudioDefaultModelInfo } from "@roo-code/types" -import { flushModels, getModels } from "./modelCache" +import { refreshModels } from "./modelCache" const modelsWithLoadedDetails = new Set() @@ -18,8 +18,8 @@ export const forceFullModelDetailsLoad = async (baseUrl: string, modelId: string const client = new LMStudioClient({ baseUrl: lmsUrl }) await client.llm.model(modelId) - // Flush and refresh cache to get updated model details - await flushModels("lmstudio", true) + // Refresh cache to get updated model details using the provided baseUrl + await refreshModels({ provider: "lmstudio", baseUrl }) // Mark this model as having full details loaded. modelsWithLoadedDetails.add(modelId) diff --git a/src/api/providers/native-ollama.ts b/src/api/providers/native-ollama.ts index 712b70445cc..017c0e3001b 100644 --- a/src/api/providers/native-ollama.ts +++ b/src/api/providers/native-ollama.ts @@ -7,6 +7,7 @@ import { BaseProvider } from "./base-provider" import type { ApiHandlerOptions } from "../../shared/api" import { getOllamaModels } from "./fetchers/ollama" import { XmlMatcher } from "../../utils/xml-matcher" +import { normalizeToolSchema } from "../../utils/json-schema" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" interface OllamaChatOptions { @@ -158,15 +159,23 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio private ensureClient(): Ollama { if (!this.client) { try { + // Priority: environment variables > VS Code settings > default + const host = + process.env.OLLAMA_BASE_URL || + process.env.OLLAMA_SERVER_URL || + this.options.ollamaBaseUrl || + "http://localhost:11434" + const clientOptions: OllamaOptions = { - host: this.options.ollamaBaseUrl || "http://localhost:11434", + host, // Note: The ollama npm package handles timeouts internally } // Add API key if provided (for Ollama cloud or authenticated instances) - if (this.options.ollamaApiKey) { + const apiKey = process.env.OLLAMA_API_KEY || this.options.ollamaApiKey + if (apiKey) { clientOptions.headers = { - Authorization: `Bearer ${this.options.ollamaApiKey}`, + Authorization: `Bearer ${apiKey}`, } } @@ -182,6 +191,9 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio * Converts OpenAI-format tools to Ollama's native tool format. * This allows NativeOllamaHandler to use the same tool definitions * that are passed to OpenAI-compatible providers. + * + * Normalizes JSON schemas to ensure compatibility with Ollama's Go unmarshaler, + * which expects `type` to be a string (not an array like ["string", "null"]). */ private convertToolsToOllama(tools: OpenAI.Chat.ChatCompletionTool[] | undefined): OllamaTool[] | undefined { if (!tools || tools.length === 0) { @@ -195,7 +207,9 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio function: { name: tool.function.name, description: tool.function.description, - parameters: tool.function.parameters as OllamaTool["function"]["parameters"], + parameters: normalizeToolSchema( + tool.function.parameters as Record, + ) as OllamaTool["function"]["parameters"], }, })) } @@ -234,9 +248,12 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio temperature: this.options.modelTemperature ?? (useR1Format ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0), } - // Only include num_ctx if explicitly set via ollamaNumCtx - if (this.options.ollamaNumCtx !== undefined) { - chatOptions.num_ctx = this.options.ollamaNumCtx + // Only include num_ctx if explicitly set (env overrides settings) + const numCtx = process.env.OLLAMA_NUM_CTX + ? parseInt(process.env.OLLAMA_NUM_CTX, 10) + : this.options.ollamaNumCtx + if (numCtx !== undefined && !isNaN(numCtx)) { + chatOptions.num_ctx = numCtx } // Create the actual API request promise @@ -328,12 +345,15 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio } async fetchModel() { - this.models = await getOllamaModels(this.options.ollamaBaseUrl, this.options.ollamaApiKey) + const baseUrl = process.env.OLLAMA_BASE_URL || process.env.OLLAMA_SERVER_URL || this.options.ollamaBaseUrl + const apiKey = process.env.OLLAMA_API_KEY || this.options.ollamaApiKey + + this.models = await getOllamaModels(baseUrl, apiKey) return this.getModel() } override getModel(): { id: string; info: ModelInfo } { - const modelId = this.options.ollamaModelId || "" + const modelId = process.env.OLLAMA_MODEL_ID || this.options.ollamaModelId || "" return { id: modelId, info: this.models[modelId] || openAiModelInfoSaneDefaults, @@ -351,9 +371,12 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio temperature: this.options.modelTemperature ?? (useR1Format ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0), } - // Only include num_ctx if explicitly set via ollamaNumCtx - if (this.options.ollamaNumCtx !== undefined) { - chatOptions.num_ctx = this.options.ollamaNumCtx + // Only include num_ctx if explicitly set (env overrides settings) + const numCtx = process.env.OLLAMA_NUM_CTX + ? parseInt(process.env.OLLAMA_NUM_CTX, 10) + : this.options.ollamaNumCtx + if (numCtx !== undefined && !isNaN(numCtx)) { + chatOptions.num_ctx = numCtx } const response = await client.chat({ diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index e1640d3f2a8..cbbd40cc446 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -54,7 +54,7 @@ import { getVsCodeLmModels } from "../../api/providers/vscode-lm" import { openMention } from "../mentions" import { getWorkspacePath } from "../../utils/path" import { Mode, defaultModeSlug } from "../../shared/modes" -import { getModels, flushModels } from "../../api/providers/fetchers/modelCache" +import { getModels, flushModels, refreshModels } from "../../api/providers/fetchers/modelCache" import { GetModelsOptions } from "../../shared/api" import { generateSystemPrompt } from "./generateSystemPrompt" import { getCommand } from "../../utils/commands" @@ -923,10 +923,8 @@ export const webviewMessageHandler = async ( // Specific handler for Ollama models only. const { apiConfiguration: ollamaApiConfig } = await provider.getState() try { - // Flush cache and refresh to ensure fresh models. - await flushModels("ollama", true) - - const ollamaModels = await getModels({ + // Force-refresh with the user-provided baseUrl/apiKey so we don't hit stale localhost cache. + const ollamaModels = await refreshModels({ provider: "ollama", baseUrl: ollamaApiConfig.ollamaBaseUrl, apiKey: ollamaApiConfig.ollamaApiKey, @@ -945,10 +943,8 @@ export const webviewMessageHandler = async ( // Specific handler for LM Studio models only. const { apiConfiguration: lmStudioApiConfig } = await provider.getState() try { - // Flush cache and refresh to ensure fresh models. - await flushModels("lmstudio", true) - - const lmStudioModels = await getModels({ + // Force-refresh with the user-provided baseUrl so we don't hit stale localhost cache. + const lmStudioModels = await refreshModels({ provider: "lmstudio", baseUrl: lmStudioApiConfig.lmStudioBaseUrl, })