🤖 feat: add OpenAI promptCacheKey for improved caching (#1174)

ibetitsmike · web-flow · commit 429a6dd8f696 · 2025-12-16T12:12:27.000Z
Wire AI SDK's `providerOptions.openai.promptCacheKey` to improve OpenAI
prompt cache hit rates.

## Changes

- Derive cache key as `mux-v1-{workspaceId}` for OpenAI requests
- Pass workspaceId from `AIService.streamMessage` to
`buildProviderOptions`
- Only set promptCacheKey when workspaceId is available (always true in
real requests)

This enables OpenAI to route requests to cached prefixes within a
workspace, improving cache hit rates for repeated calls.

---
_Generated with `mux` • Model: `anthropic:claude-opus-4-5` • Thinking:
`high`_
diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts
@@ -2,6 +2,7 @@
  * Tests for provider options builder
  */
 
+import type { OpenAIResponsesProviderOptions } from "@ai-sdk/openai";
 import { describe, test, expect, mock } from "bun:test";
 import { buildProviderOptions } from "./providerOptions";
 
@@ -120,3 +121,47 @@ describe("buildProviderOptions - Anthropic", () => {
     });
   });
 });
+
+describe("buildProviderOptions - OpenAI promptCacheKey", () => {
+  // Helper to extract OpenAI options from the result
+  const getOpenAIOptions = (
+    result: ReturnType<typeof buildProviderOptions>
+  ): OpenAIResponsesProviderOptions | undefined => {
+    if ("openai" in result) {
+      return result.openai;
+    }
+    return undefined;
+  };
+
+  describe("promptCacheKey derivation", () => {
+    test("should derive promptCacheKey from workspaceId when provided", () => {
+      const result = buildProviderOptions(
+        "openai:gpt-5.2",
+        "off",
+        undefined,
+        undefined,
+        undefined,
+        "abc123"
+      );
+      const openai = getOpenAIOptions(result);
+
+      expect(openai).toBeDefined();
+      expect(openai!.promptCacheKey).toBe("mux-v1-abc123");
+    });
+
+    test("should derive promptCacheKey for gateway OpenAI model", () => {
+      const result = buildProviderOptions(
+        "mux-gateway:openai/gpt-5.2",
+        "off",
+        undefined,
+        undefined,
+        undefined,
+        "workspace-xyz"
+      );
+      const openai = getOpenAIOptions(result);
+
+      expect(openai).toBeDefined();
+      expect(openai!.promptCacheKey).toBe("mux-v1-workspace-xyz");
+    });
+  });
+});
diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts
@@ -65,7 +65,8 @@ export function buildProviderOptions(
   thinkingLevel: ThinkingLevel,
   messages?: MuxMessage[],
   lostResponseIds?: (id: string) => boolean,
-  muxProviderOptions?: MuxProviderOptions
+  muxProviderOptions?: MuxProviderOptions,
+  workspaceId?: string // Optional for non-OpenAI providers
 ): ProviderOptions {
   // Always clamp to the model's supported thinking policy (e.g., gpt-5-pro = HIGH only)
   const effectiveThinking = enforceThinkingPolicy(modelString, thinkingLevel);
@@ -210,11 +211,17 @@ export function buildProviderOptions(
     // Check if auto-truncation should be disabled (for testing context limit errors)
     const disableAutoTruncation = muxProviderOptions?.openai?.disableAutoTruncation ?? false;
 
+    // Prompt cache key: derive from workspaceId
+    // This helps OpenAI route requests to cached prefixes for improved hit rates
+    // workspaceId is always passed from AIService.streamMessage for real requests
+    const promptCacheKey = workspaceId ? `mux-v1-${workspaceId}` : undefined;
+
     log.debug("buildProviderOptions: OpenAI config", {
       reasoningEffort,
       thinkingLevel: effectiveThinking,
       previousResponseId,
       disableAutoTruncation,
+      promptCacheKey,
     });
 
     const serviceTier = muxProviderOptions?.openai?.serviceTier ?? "auto";
@@ -225,6 +232,9 @@ export function buildProviderOptions(
         serviceTier,
         // Automatically truncate conversation to fit context window, unless disabled for testing
         truncation: disableAutoTruncation ? "disabled" : "auto",
+        // Stable prompt cache key to improve OpenAI cache hit rates
+        // See: https://sdk.vercel.ai/providers/ai-sdk-providers/openai#responses-models
+        ...(promptCacheKey && { promptCacheKey }),
         // Conditionally add reasoning configuration
         ...(reasoningEffort && {
           reasoningEffort,
diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts
@@ -1370,12 +1370,14 @@ export class AIService extends EventEmitter {
       // Build provider options based on thinking level and message history
       // Pass filtered messages so OpenAI can extract previousResponseId for persistence
       // Also pass callback to filter out lost responseIds (OpenAI invalidated them)
+      // Pass workspaceId to derive stable promptCacheKey for OpenAI caching
       const providerOptions = buildProviderOptions(
         modelString,
         thinkingLevel ?? "off",
         filteredMessages,
         (id) => this.streamManager.isResponseIdLost(id),
-        effectiveMuxProviderOptions
+        effectiveMuxProviderOptions,
+        workspaceId
       );
 
       // Debug dump: Log the complete LLM request when MUX_DEBUG_LLM_REQUEST is set