🤖 Add Haiku 4-5 support and centralize default model logic (#267)

ammar-agent · web-flow · commit f95eed3f796d · 2025-10-15T19:06:44.000Z
## Summary

Adds support for Claude Haiku 4-5 (released today) and simplifies
default model selection by centralizing logic in the LRU system.

## Changes

### Haiku 4-5 Support
- Added `haiku: "anthropic:claude-haiku-4-5"` to MODEL_ABBREVIATIONS
- Added pricing/config to `models-extra.ts`:
  - Input: $1 per million tokens
  - Output: $5 per million tokens
  - Cache creation: $1.25 per million tokens
  - Cache read: $0.10 per million tokens
  - 200K context window, 8K output

### Centralized Default Model Logic

**Problem:** `defaultModel` was imported in 7+ files, spreading model
prescription throughout the codebase.

**Solution:** Created `getDefaultModelFromLRU()` helper that reads the
most recently used model from the LRU cache. This is now the **single
source of truth** for default model selection.

**Architecture:**
```
MODEL_ABBREVIATIONS → useModelLRU initialization → getDefaultModelFromLRU() → all consumers
```

**Updated files:**
- `src/hooks/useModelLRU.ts` - Added `getDefaultModelFromLRU()` helper
- `src/hooks/useSendMessageOptions.ts` - Use LRU instead of hardcoded
default
- `src/utils/messages/sendOptions.ts` - Use LRU for non-hook contexts
- `src/hooks/useAIViewKeybinds.ts` - Use LRU for keybind fallbacks
- Debug scripts - Use LRU instead of hardcoded defaults

### Model Ordering
- Reordered MODEL_ABBREVIATIONS to put `sonnet` first
- Sonnet 4-5 is now the default for first-time users
- After that, LRU drives defaults (user behavior takes over)

## Benefits

✅ **Less prescriptive:** Most recently used model becomes the default  
✅ **Single source of truth:** Only `useModelLRU.ts` imports
`defaultModel`
✅ **Cross-workspace memory:** Using Haiku in workspace A makes it
default for workspace B
✅ **Natural discovery:** As users try models, they automatically become
defaults

## Testing

- ✅ Typechecks pass
- ✅ All imports verified (only 2 files import `defaultModel` now)
- ✅ Model selection paths tested via existing hooks

_Generated with `cmux`_
diff --git a/scripts/update_models.ts b/scripts/update_models.ts
@@ -2,12 +2,12 @@
 
 /**
  * Downloads the latest model prices and context window data from LiteLLM
- * and saves it to src/utils/models.json
+ * and saves it to src/utils/tokens/models.json
  */
 
 const LITELLM_URL =
   "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json";
-const OUTPUT_PATH = "src/utils/models.json";
+const OUTPUT_PATH = "src/utils/tokens/models.json";
 
 async function updateModels() {
   console.log(`Fetching model data from ${LITELLM_URL}...`);
diff --git a/src/components/ChatInput.tsx b/src/components/ChatInput.tsx
@@ -350,7 +350,7 @@ export const ChatInput: React.FC<ChatInputProps> = ({
   const inputRef = useRef<HTMLTextAreaElement>(null);
   const modelSelectorRef = useRef<ModelSelectorRef>(null);
   const [mode, setMode] = useMode();
-  const { recentModels } = useModelLRU();
+  const { recentModels, addModel } = useModelLRU();
   const commandListId = useId();
 
   // Get current send message options from shared hook (must be at component top level)
@@ -359,8 +359,11 @@ export const ChatInput: React.FC<ChatInputProps> = ({
   const preferredModel = sendMessageOptions.model;
   // Setter for model - updates localStorage directly so useSendMessageOptions picks it up
   const setPreferredModel = useCallback(
-    (model: string) => updatePersistedState(getModelKey(workspaceId), model),
-    [workspaceId]
+    (model: string) => {
+      addModel(model); // Update LRU
+      updatePersistedState(getModelKey(workspaceId), model); // Update workspace-specific
+    },
+    [workspaceId, addModel]
   );
 
   const focusMessageInput = useCallback(() => {
diff --git a/src/debug/agentSessionCli.ts b/src/debug/agentSessionCli.ts
@@ -22,7 +22,7 @@ import {
   type SendMessageOptions,
   type WorkspaceChatMessage,
 } from "@/types/ipc";
-import { defaultModel } from "@/utils/ai/models";
+import { getDefaultModelFromLRU } from "@/hooks/useModelLRU";
 import { ensureProvidersConfig } from "@/utils/providers/ensureProvidersConfig";
 import { modeToToolPolicy, PLAN_MODE_INSTRUCTION } from "@/utils/ui/modeUtils";
 import { extractAssistantText, extractReasoning, extractToolCalls } from "@/debug/chatExtractors";
@@ -184,7 +184,8 @@ async function main(): Promise<void> {
     throw new Error("Message must be provided via --message or stdin");
   }
 
-  const model = values.model && values.model.trim().length > 0 ? values.model.trim() : defaultModel;
+  const model =
+    values.model && values.model.trim().length > 0 ? values.model.trim() : getDefaultModelFromLRU();
   const timeoutMs = parseTimeout(values.timeout);
   const thinkingLevel = parseThinkingLevel(values["thinking-level"]);
   const initialMode = parseMode(values.mode);
diff --git a/src/debug/costs.ts b/src/debug/costs.ts
@@ -3,7 +3,7 @@ import * as path from "path";
 import { defaultConfig } from "@/config";
 import type { CmuxMessage } from "@/types/message";
 import { calculateTokenStats } from "@/utils/tokens/tokenStatsCalculator";
-import { defaultModel } from "@/utils/ai/models";
+import { getDefaultModelFromLRU } from "@/hooks/useModelLRU";
 
 /**
  * Debug command to display cost/token statistics for a workspace
@@ -35,7 +35,7 @@ export function costsCommand(workspaceId: string) {
 
   // Detect model from first assistant message
   const firstAssistantMessage = messages.find((msg) => msg.role === "assistant");
-  const model = firstAssistantMessage?.metadata?.model ?? defaultModel;
+  const model = firstAssistantMessage?.metadata?.model ?? getDefaultModelFromLRU();
 
   // Calculate stats using shared logic (now synchronous)
   const stats = calculateTokenStats(messages, model);
diff --git a/src/debug/send-message.ts b/src/debug/send-message.ts
@@ -3,7 +3,7 @@ import * as path from "path";
 import { defaultConfig } from "@/config";
 import type { CmuxMessage } from "@/types/message";
 import type { SendMessageOptions } from "@/types/ipc";
-import { defaultModel } from "@/utils/ai/models";
+import { getDefaultModelFromLRU } from "@/hooks/useModelLRU";
 
 /**
  * Debug command to send a message to a workspace, optionally editing an existing message
@@ -103,7 +103,7 @@ export function sendMessageCommand(
 
   // Prepare options
   const options: SendMessageOptions = {
-    model: defaultModel,
+    model: getDefaultModelFromLRU(),
   };
 
   if (editMessageId) {
diff --git a/src/hooks/useAIViewKeybinds.ts b/src/hooks/useAIViewKeybinds.ts
@@ -6,7 +6,7 @@ import { updatePersistedState, readPersistedState } from "@/hooks/usePersistedSt
 import type { ThinkingLevel, ThinkingLevelOn } from "@/types/thinking";
 import { DEFAULT_THINKING_LEVEL } from "@/types/thinking";
 import { getThinkingPolicyForModel } from "@/utils/thinking/policy";
-import { defaultModel } from "@/utils/ai/models";
+import { getDefaultModelFromLRU } from "@/hooks/useModelLRU";
 
 interface UseAIViewKeybindsParams {
   workspaceId: string;
@@ -66,10 +66,10 @@ export function useAIViewKeybinds({
         e.preventDefault();
 
         // Get selected model from localStorage (what user sees in UI)
-        // Fall back to message history model, then to default model
+        // Fall back to message history model, then to most recent model from LRU
         // This matches the same logic as useSendMessageOptions
         const selectedModel = readPersistedState<string | null>(getModelKey(workspaceId), null);
-        const modelToUse = selectedModel ?? currentModel ?? defaultModel;
+        const modelToUse = selectedModel ?? currentModel ?? getDefaultModelFromLRU();
 
         // Storage key for remembering this model's last-used active thinking level
         const lastThinkingKey = getLastThinkingByModelKey(modelToUse);
diff --git a/src/hooks/useModelLRU.ts b/src/hooks/useModelLRU.ts
@@ -1,38 +1,45 @@
 import { useCallback, useEffect } from "react";
-import { usePersistedState } from "./usePersistedState";
+import { usePersistedState, readPersistedState } from "./usePersistedState";
 import { MODEL_ABBREVIATIONS } from "@/utils/slashCommands/registry";
+import { defaultModel } from "@/utils/ai/models";
 
 const MAX_LRU_SIZE = 8;
 const LRU_KEY = "model-lru";
 
 // Default models from abbreviations (for initial LRU population)
 const DEFAULT_MODELS = Object.values(MODEL_ABBREVIATIONS);
 
+/**
+ * Get the default model from LRU (non-hook version for use outside React)
+ * This is the ONLY place that reads from LRU outside of the hook.
+ *
+ * @returns The most recently used model, or defaultModel if LRU is empty
+ */
+export function getDefaultModelFromLRU(): string {
+  const lru = readPersistedState<string[]>(LRU_KEY, DEFAULT_MODELS.slice(0, MAX_LRU_SIZE));
+  return lru[0] ?? defaultModel;
+}
+
 /**
  * Hook to manage a Least Recently Used (LRU) cache of AI models.
  * Stores up to 8 recently used models in localStorage.
  * Initializes with default abbreviated models if empty.
  */
 export function useModelLRU() {
-  const [recentModels, setRecentModels] = usePersistedState<string[]>(LRU_KEY, []);
+  const [recentModels, setRecentModels] = usePersistedState<string[]>(
+    LRU_KEY,
+    DEFAULT_MODELS.slice(0, MAX_LRU_SIZE)
+  );
 
-  // Ensure default models are always present in the LRU (only once on mount)
+  // Merge any new defaults from MODEL_ABBREVIATIONS (only once on mount)
   useEffect(() => {
     setRecentModels((prev) => {
-      // If empty, just use defaults
-      if (prev.length === 0) {
-        return DEFAULT_MODELS.slice(0, MAX_LRU_SIZE);
-      }
-
-      // If we have some models, merge with defaults (keeping existing order, adding missing defaults at end)
       const merged = [...prev];
       for (const defaultModel of DEFAULT_MODELS) {
         if (!merged.includes(defaultModel)) {
           merged.push(defaultModel);
         }
       }
-
-      // Limit to MAX_LRU_SIZE
       return merged.slice(0, MAX_LRU_SIZE);
     });
     // eslint-disable-next-line react-hooks/exhaustive-deps
diff --git a/src/hooks/useSendMessageOptions.ts b/src/hooks/useSendMessageOptions.ts
@@ -2,8 +2,8 @@ import { use1MContext } from "./use1MContext";
 import { useThinkingLevel } from "./useThinkingLevel";
 import { useMode } from "@/contexts/ModeContext";
 import { usePersistedState } from "./usePersistedState";
+import { useModelLRU } from "./useModelLRU";
 import { modeToToolPolicy, PLAN_MODE_INSTRUCTION } from "@/utils/ui/modeUtils";
-import { defaultModel } from "@/utils/ai/models";
 import { getModelKey } from "@/constants/storage";
 import type { SendMessageOptions } from "@/types/ipc";
 import type { UIMode } from "@/types/mode";
@@ -19,13 +19,14 @@ function constructSendMessageOptions(
   mode: UIMode,
   thinkingLevel: ThinkingLevel,
   preferredModel: string | null | undefined,
-  use1M: boolean
+  use1M: boolean,
+  fallbackModel: string
 ): SendMessageOptions {
   const additionalSystemInstructions = mode === "plan" ? PLAN_MODE_INSTRUCTION : undefined;
 
   // Ensure model is always a valid string (defensive against corrupted localStorage)
   const model =
-    typeof preferredModel === "string" && preferredModel ? preferredModel : defaultModel;
+    typeof preferredModel === "string" && preferredModel ? preferredModel : fallbackModel;
 
   // Enforce thinking policy at the UI boundary as well (e.g., gpt-5-pro → high only)
   const uiThinking = enforceThinkingPolicy(model, thinkingLevel);
@@ -58,13 +59,14 @@ export function useSendMessageOptions(workspaceId: string): SendMessageOptions {
   const [use1M] = use1MContext();
   const [thinkingLevel] = useThinkingLevel();
   const [mode] = useMode();
+  const { recentModels } = useModelLRU();
   const [preferredModel] = usePersistedState<string>(
     getModelKey(workspaceId),
-    defaultModel,
+    recentModels[0], // Most recently used model (LRU is never empty)
     { listener: true } // Listen for changes from ModelSelector and other sources
   );
 
-  return constructSendMessageOptions(mode, thinkingLevel, preferredModel, use1M);
+  return constructSendMessageOptions(mode, thinkingLevel, preferredModel, use1M, recentModels[0]);
 }
 
 /**
diff --git a/src/utils/messages/sendOptions.ts b/src/utils/messages/sendOptions.ts
@@ -4,13 +4,13 @@ import {
   getModeKey,
   USE_1M_CONTEXT_KEY,
 } from "@/constants/storage";
-import { defaultModel } from "@/utils/ai/models";
 import { modeToToolPolicy, PLAN_MODE_INSTRUCTION } from "@/utils/ui/modeUtils";
 import { readPersistedState } from "@/hooks/usePersistedState";
 import type { SendMessageOptions } from "@/types/ipc";
 import type { UIMode } from "@/types/mode";
 import type { ThinkingLevel } from "@/types/thinking";
 import { enforceThinkingPolicy } from "@/utils/thinking/policy";
+import { getDefaultModelFromLRU } from "@/hooks/useModelLRU";
 
 /**
  * Get send options from localStorage
@@ -20,8 +20,8 @@ import { enforceThinkingPolicy } from "@/utils/thinking/policy";
  * This ensures DRY - single source of truth for option extraction.
  */
 export function getSendOptionsFromStorage(workspaceId: string): SendMessageOptions {
-  // Read model preference (workspace-specific)
-  const model = readPersistedState<string>(getModelKey(workspaceId), defaultModel);
+  // Read model preference (workspace-specific), fallback to most recent from LRU
+  const model = readPersistedState<string>(getModelKey(workspaceId), getDefaultModelFromLRU());
 
   // Read thinking level (workspace-specific)
   const thinkingLevel = readPersistedState<ThinkingLevel>(
diff --git a/src/utils/slashCommands/registry.ts b/src/utils/slashCommands/registry.ts
@@ -11,9 +11,11 @@ import type {
 import minimist from "minimist";
 
 // Model abbreviations for common models
+// Order matters: first model becomes the default for new chats
 export const MODEL_ABBREVIATIONS: Record<string, string> = {
-  opus: "anthropic:claude-opus-4-1",
   sonnet: "anthropic:claude-sonnet-4-5",
+  haiku: "anthropic:claude-haiku-4-5",
+  opus: "anthropic:claude-opus-4-1",
   "gpt-5": "openai:gpt-5",
   "gpt-5-pro": "openai:gpt-5-pro",
   codex: "openai:gpt-5-codex",
diff --git a/src/utils/tokens/models-extra.ts b/src/utils/tokens/models-extra.ts
@@ -39,4 +39,20 @@ export const modelsExtra: Record<string, ModelData> = {
     knowledge_cutoff: "2024-09-30",
     supported_endpoints: ["/v1/responses"],
   },
+
+  // Claude Haiku 4.5 - Released October 15, 2025
+  // $1/M input, $5/M output
+  "claude-haiku-4-5": {
+    max_input_tokens: 200000,
+    max_output_tokens: 8192,
+    input_cost_per_token: 0.000001, // $1 per million input tokens
+    output_cost_per_token: 0.000005, // $5 per million output tokens
+    cache_creation_input_token_cost: 0.00000125, // $1.25 per million tokens
+    cache_read_input_token_cost: 0.0000001, // $0.10 per million tokens
+    litellm_provider: "anthropic",
+    mode: "chat",
+    supports_function_calling: true,
+    supports_vision: true,
+    supports_response_schema: true,
+  },
 };
diff --git a/src/utils/tokens/models.json b/src/utils/tokens/models.json