Skip to content

Commit 429a6dd

Browse files
authored
🤖 feat: add OpenAI promptCacheKey for improved caching (#1174)
Wire AI SDK's `providerOptions.openai.promptCacheKey` to improve OpenAI prompt cache hit rates. ## Changes - Derive cache key as `mux-v1-{workspaceId}` for OpenAI requests - Pass workspaceId from `AIService.streamMessage` to `buildProviderOptions` - Only set promptCacheKey when workspaceId is available (always true in real requests) This enables OpenAI to route requests to cached prefixes within a workspace, improving cache hit rates for repeated calls. --- _Generated with `mux` • Model: `anthropic:claude-opus-4-5` • Thinking: `high`_
1 parent e1be6b4 commit 429a6dd

File tree

3 files changed

+59
-2
lines changed

3 files changed

+59
-2
lines changed

src/common/utils/ai/providerOptions.test.ts

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
* Tests for provider options builder
33
*/
44

5+
import type { OpenAIResponsesProviderOptions } from "@ai-sdk/openai";
56
import { describe, test, expect, mock } from "bun:test";
67
import { buildProviderOptions } from "./providerOptions";
78

@@ -120,3 +121,47 @@ describe("buildProviderOptions - Anthropic", () => {
120121
});
121122
});
122123
});
124+
125+
describe("buildProviderOptions - OpenAI promptCacheKey", () => {
126+
// Helper to extract OpenAI options from the result
127+
const getOpenAIOptions = (
128+
result: ReturnType<typeof buildProviderOptions>
129+
): OpenAIResponsesProviderOptions | undefined => {
130+
if ("openai" in result) {
131+
return result.openai;
132+
}
133+
return undefined;
134+
};
135+
136+
describe("promptCacheKey derivation", () => {
137+
test("should derive promptCacheKey from workspaceId when provided", () => {
138+
const result = buildProviderOptions(
139+
"openai:gpt-5.2",
140+
"off",
141+
undefined,
142+
undefined,
143+
undefined,
144+
"abc123"
145+
);
146+
const openai = getOpenAIOptions(result);
147+
148+
expect(openai).toBeDefined();
149+
expect(openai!.promptCacheKey).toBe("mux-v1-abc123");
150+
});
151+
152+
test("should derive promptCacheKey for gateway OpenAI model", () => {
153+
const result = buildProviderOptions(
154+
"mux-gateway:openai/gpt-5.2",
155+
"off",
156+
undefined,
157+
undefined,
158+
undefined,
159+
"workspace-xyz"
160+
);
161+
const openai = getOpenAIOptions(result);
162+
163+
expect(openai).toBeDefined();
164+
expect(openai!.promptCacheKey).toBe("mux-v1-workspace-xyz");
165+
});
166+
});
167+
});

src/common/utils/ai/providerOptions.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ export function buildProviderOptions(
6565
thinkingLevel: ThinkingLevel,
6666
messages?: MuxMessage[],
6767
lostResponseIds?: (id: string) => boolean,
68-
muxProviderOptions?: MuxProviderOptions
68+
muxProviderOptions?: MuxProviderOptions,
69+
workspaceId?: string // Optional for non-OpenAI providers
6970
): ProviderOptions {
7071
// Always clamp to the model's supported thinking policy (e.g., gpt-5-pro = HIGH only)
7172
const effectiveThinking = enforceThinkingPolicy(modelString, thinkingLevel);
@@ -210,11 +211,17 @@ export function buildProviderOptions(
210211
// Check if auto-truncation should be disabled (for testing context limit errors)
211212
const disableAutoTruncation = muxProviderOptions?.openai?.disableAutoTruncation ?? false;
212213

214+
// Prompt cache key: derive from workspaceId
215+
// This helps OpenAI route requests to cached prefixes for improved hit rates
216+
// workspaceId is always passed from AIService.streamMessage for real requests
217+
const promptCacheKey = workspaceId ? `mux-v1-${workspaceId}` : undefined;
218+
213219
log.debug("buildProviderOptions: OpenAI config", {
214220
reasoningEffort,
215221
thinkingLevel: effectiveThinking,
216222
previousResponseId,
217223
disableAutoTruncation,
224+
promptCacheKey,
218225
});
219226

220227
const serviceTier = muxProviderOptions?.openai?.serviceTier ?? "auto";
@@ -225,6 +232,9 @@ export function buildProviderOptions(
225232
serviceTier,
226233
// Automatically truncate conversation to fit context window, unless disabled for testing
227234
truncation: disableAutoTruncation ? "disabled" : "auto",
235+
// Stable prompt cache key to improve OpenAI cache hit rates
236+
// See: https://sdk.vercel.ai/providers/ai-sdk-providers/openai#responses-models
237+
...(promptCacheKey && { promptCacheKey }),
228238
// Conditionally add reasoning configuration
229239
...(reasoningEffort && {
230240
reasoningEffort,

src/node/services/aiService.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1370,12 +1370,14 @@ export class AIService extends EventEmitter {
13701370
// Build provider options based on thinking level and message history
13711371
// Pass filtered messages so OpenAI can extract previousResponseId for persistence
13721372
// Also pass callback to filter out lost responseIds (OpenAI invalidated them)
1373+
// Pass workspaceId to derive stable promptCacheKey for OpenAI caching
13731374
const providerOptions = buildProviderOptions(
13741375
modelString,
13751376
thinkingLevel ?? "off",
13761377
filteredMessages,
13771378
(id) => this.streamManager.isResponseIdLost(id),
1378-
effectiveMuxProviderOptions
1379+
effectiveMuxProviderOptions,
1380+
workspaceId
13791381
);
13801382

13811383
// Debug dump: Log the complete LLM request when MUX_DEBUG_LLM_REQUEST is set

0 commit comments

Comments
 (0)