diff --git a/src/data/index.ts b/src/data/index.ts index b7bd28c49a..c36331ceb1 100644 --- a/src/data/index.ts +++ b/src/data/index.ts @@ -1,4 +1,12 @@ -import { chatNavData, liveObjectsNavData, liveSyncNavData, platformNavData, pubsubNavData, spacesNavData } from './nav'; +import { + aiTransportNavData, + chatNavData, + liveObjectsNavData, + liveSyncNavData, + platformNavData, + pubsubNavData, + spacesNavData, +} from './nav'; import { languageData } from './languages'; import { PageData, ProductData } from './types'; import homepageContentData from './content/homepage'; @@ -16,6 +24,10 @@ export const productData = { nav: chatNavData, languages: languageData.chat, }, + aiTransport: { + nav: aiTransportNavData, + languages: languageData.aiTransport, + }, spaces: { nav: spacesNavData, languages: languageData.spaces, diff --git a/src/data/languages/languageData.ts b/src/data/languages/languageData.ts index 24bebd829c..83992637e8 100644 --- a/src/data/languages/languageData.ts +++ b/src/data/languages/languageData.ts @@ -29,6 +29,9 @@ export default { swift: '1.0', kotlin: '1.0', }, + aiTransport: { + javascript: '2.11', + }, spaces: { javascript: '0.4', react: '0.4', diff --git a/src/data/nav/aitransport.ts b/src/data/nav/aitransport.ts new file mode 100644 index 0000000000..87d90936ca --- /dev/null +++ b/src/data/nav/aitransport.ts @@ -0,0 +1,36 @@ +import { NavProduct } from './types'; + +export default { + name: 'Ably AI Transport', + link: '/docs/ai-transport', + icon: { + closed: 'icon-gui-prod-ai-transport-outline', + open: 'icon-gui-prod-ai-transport-solid', + }, + content: [ + { + name: 'Introduction', + pages: [ + { + name: 'About AI Transport', + link: '/docs/ai-transport', + index: true, + }, + ], + }, + { + name: 'Sessions & Identity', + pages: [ + { + name: 'Overview', + link: '/docs/ai-transport/sessions-identity/overview', + }, + { + name: 'Session abandonment', + link: '/docs/ai-transport/sessions-identity/session-abandonment', + }, + ], + }, + ], + api: [], +} satisfies NavProduct; diff --git a/src/data/nav/index.ts b/src/data/nav/index.ts index aac3975f22..e5ed49fddb 100644 --- a/src/data/nav/index.ts +++ b/src/data/nav/index.ts @@ -1,8 +1,17 @@ import platformNavData from './platform'; import pubsubNavData from './pubsub'; import chatNavData from './chat'; +import aiTransportNavData from './aitransport'; import liveObjectsNavData from './liveobjects'; import spacesNavData from './spaces'; import liveSyncNavData from './livesync'; -export { platformNavData, pubsubNavData, chatNavData, liveObjectsNavData, spacesNavData, liveSyncNavData }; +export { + platformNavData, + pubsubNavData, + chatNavData, + aiTransportNavData, + liveObjectsNavData, + spacesNavData, + liveSyncNavData, +}; diff --git a/src/data/types.ts b/src/data/types.ts index 0884a04a8b..a9c2b2977b 100644 --- a/src/data/types.ts +++ b/src/data/types.ts @@ -3,7 +3,7 @@ import { LanguageData } from './languages/types'; import { NavProduct } from './nav/types'; const pageKeys = ['homepage'] as const; -const productKeys = ['platform', 'pubsub', 'chat', 'spaces', 'liveObjects', 'liveSync'] as const; +const productKeys = ['platform', 'pubsub', 'chat', 'aiTransport', 'spaces', 'liveObjects', 'liveSync'] as const; export type ProductKey = (typeof productKeys)[number]; type PageKey = (typeof pageKeys)[number]; diff --git a/src/images/content/ai-transport/ait-http-streaming.webp b/src/images/content/ai-transport/ait-http-streaming.webp new file mode 100644 index 0000000000..e48fd9754c Binary files /dev/null and b/src/images/content/ai-transport/ait-http-streaming.webp differ diff --git a/src/images/content/ai-transport/ait-with-ait.webp b/src/images/content/ai-transport/ait-with-ait.webp new file mode 100644 index 0000000000..410b032608 Binary files /dev/null and b/src/images/content/ai-transport/ait-with-ait.webp differ diff --git a/src/pages/docs/ai-transport/index.mdx b/src/pages/docs/ai-transport/index.mdx new file mode 100644 index 0000000000..fb2f2b271e --- /dev/null +++ b/src/pages/docs/ai-transport/index.mdx @@ -0,0 +1,6 @@ +--- +title: About AI Transport +meta_description: "Learn more about Ably's AI Transport and the features that enable you to quickly build functionality into new and existing applications." +redirect_from: + - /docs/products/ai-transport +--- diff --git a/src/pages/docs/ai-transport/sessions-identity/overview.mdx b/src/pages/docs/ai-transport/sessions-identity/overview.mdx new file mode 100644 index 0000000000..836be8eee2 --- /dev/null +++ b/src/pages/docs/ai-transport/sessions-identity/overview.mdx @@ -0,0 +1,65 @@ +--- +title: "Sessions & identity overview" +meta_description: "Manage session lifecycle and identity in decoupled AI architectures" +--- + +Ably AI Transport provides robust session management and identity capabilities designed for modern AI applications. Sessions persist beyond individual connections, enabling agents and clients to connect independently through shared channels. Built-in token-based authentication provides verified user identity and fine-grained authorization for channel operations. + +## What is a session? + +A session is an interaction between a user (or multiple users) and an AI agent where messages and data are exchanged, building up shared context over time. In AI Transport, sessions are designed to persist beyond the boundaries of individual connections, enabling modern AI experiences where users expect to: + +- **Resume conversations across devices** - Start a conversation on mobile and seamlessly continue on desktop with full context preserved +- **Return to long-running work** - Close the browser while agents continue processing in the background, delivering results when you return +- **Recover from interruptions** - Experience connection drops, browser refreshes, or network instability without losing conversation progress +- **Collaborate in shared sessions** - Multiple users can participate in the same conversation simultaneously and remain in sync + +These capabilities represent a fundamental shift from traditional request/response AI experiences to continuous, resumable interactions that follow users wherever they go. Sessions have a lifecycle: they begin when a user starts interacting with an agent, remain active while the interaction continues, and can persist even when users disconnect - enabling truly asynchronous AI workflows. + +Managing this lifecycle in AI Transport's decoupled architecture involves detecting when users are present, deciding when to stop or continue agent work, and handling scenarios where users disconnect and return. + +## Connection-oriented vs channel-oriented sessions + +In traditional connection-oriented architectures, sessions are bound to the lifecycle of a WebSocket or SSE connection: + +1. Client opens connection to agent server to establish a session +2. Agent streams response over the connection +3. When the connection closes, the session ends + +This tight coupling means network interruptions terminate sessions, agents can't continue work after disconnections, and supporting multiple devices or users introduces significant complexity. + +![Traditional HTTP streaming architecture](../../../../images/content/ai-transport/ait-http-streaming.webp) + + +AI Transport uses a channel-oriented model where sessions persist independently of individual connections. Clients and agents communicate through [Channels](/docs/channels): + +1. Client sends a single request to agent server to establish a session +2. Server responds with a unique ID for the session, which is used to identify the channel +3. All further communication happens over the channel + +In this model, sessions are associated with the channel, enabling seamless reconnection, background agent work, and multi-device access without additional complexity. + +![AI Transport architecture](../../../../images/content/ai-transport/ait-with-ait.webp) + + +The channel-oriented model provides key benefits for modern AI applications: sessions maintain continuity in the face of disconnections, users can refresh or navigate back to the ongoing session, multiple users or devices can participate in the same session, and agents can continue long-running or asynchronous workloads even when clients disconnect. + +The following table compares how each architecture addresses the engineering challenges of delivering these capabilities: + +| Challenge | Connection-oriented sessions | Channel-oriented sessions | +|-----------|------------------------------|---------------------------| +| **Routing** | Agents must track which instance holds each session. Reconnecting clients need routing logic to find the correct agent instance across your infrastructure. | Agents and clients only need the channel name. Ably handles message delivery to all subscribers without agents tracking sessions or implementing routing logic. | +| **Message resume** | Agents must buffer sent messages and implement replay logic. When clients reconnect, agents must determine what was missed and retransmit without duplicates or gaps, distinctly for each connection. | When clients reconnect, they automatically receive messages published while disconnected. The channel maintains history without agents implementing buffering or replay logic, eliminating the need for server-side session state. | +| **Abandonment detection** | Agents must implement logic to distinguish between brief network interruptions and users who have actually left, so they can decide whether to continue work or clean up resources. | Built-in presence tracking signals when users enter and leave channels, providing clear lifecycle events to agents without custom detection logic. | +| **Multi-user and multi-device** | Agents must manage multiple concurrent connections from the same user across devices, or from multiple users in collaborative sessions. This requires tracking connections, synchronizing state, and ensuring all participants receive consistent updates. | Multiple users and devices can connect to the same channel. The channel handles message delivery to all participants, simplifying agent logic for multi-user and multi-device scenarios. | + +## Identity in channel-oriented sessions + +In connection-oriented architectures, the agent server handles authentication directly when establishing the connection. When the connection is opened, the server verifies credentials and associates the authenticated user identity with that specific connection. + +In channel-oriented sessions, agents don't manage connections or handle authentication directly. Instead, your server authenticates users and issues tokens that control their access to channels. Ably enforces these authorization rules and provides verified identity information to agents, giving you powerful capabilities for managing who can participate in sessions and what they can do: + +- **Verified identity**: Agents automatically receive the authenticated identity of message senders, with cryptographic guarantees that identities cannot be spoofed +- **Granular authorization**: Control precisely what operations each user can perform on specific channels through fine-grained capabilities +- **Rich user attributes**: Pass authenticated user data to agents for personalized behavior without building custom token systems +- **Role-based participation**: Distinguish between different types of participants, such as users and agents, to customize behaviour based on their role diff --git a/src/pages/docs/ai-transport/sessions-identity/session-abandonment.mdx b/src/pages/docs/ai-transport/sessions-identity/session-abandonment.mdx new file mode 100644 index 0000000000..3b6890c136 --- /dev/null +++ b/src/pages/docs/ai-transport/sessions-identity/session-abandonment.mdx @@ -0,0 +1,434 @@ +--- +title: Session abandonment +meta_description: "Detect when users leave AI sessions and respond with appropriate agent strategies." +--- + +Session abandonment occurs when users disconnect from an AI session. This can happen in several ways: + +- **Intentional exit**: User closes the browser tab, navigates away, or explicitly ends the session. +- **Device switch**: User moves from desktop to mobile, leaving the original session behind. +- **Network interruption**: Connection drops due to poor connectivity, VPN changes, or ISP issues. +- **Idle timeout**: User walks away from their device without closing the session. +- **App in background**: On mobile, the app is suspended or terminated by the operating system. + +Each scenario presents different challenges. A user switching devices may return on the new device within seconds. A network interruption might resolve in moments, or indicate the user is gone for hours. An idle user might still be watching the screen, even if not actively interacting. + +In AI Transport's Pub/Sub channel-oriented architecture, agents run independently from client connections. This decoupling means agents must actively detect when users leave and decide how to respond - whether to stop expensive token generation immediately, complete the current response gracefully, continue background processing, or switch to a cheaper model. The right strategy depends on the cost of the operation, the value of partial results, and the likelihood the user will return. + +[Presence](/docs/presence-occupancy/presence) provides the mechanism for detecting user disconnections. When users connect to a session, they enter the channel's presence set. When they disconnect - whether intentionally or due to network issues - they automatically leave the presence set after a brief timeout. Agents subscribed to presence events receive `enter` and `leave` events they can use to trigger appropriate response strategies. + +## Detecting abandonment with presence + +Users signal their participation in a session by entering the channel's [presence](/docs/presence-occupancy/presence) set. When they disconnect - whether intentionally or due to network issues - they automatically leave the presence set, and agents subscribed to presence events receive a `leave` event. + +### Subscribe to presence events + +Agents subscribe to presence events on the session channel to detect when users enter or leave: + + +```javascript +const channel = realtime.channels.get('conversation:{{USER_ID}}'); + +await channel.presence.subscribe((member) => { + switch (member.action) { + case 'leave': + console.log(`User ${member.clientId} left the session`); + // Trigger abandonment handling logic + break; + case 'enter': + console.log(`User ${member.clientId} joined the session`); + break; + } +}); +``` + + +### Tracking multi-device presence + +Users may be connected from multiple devices simultaneously. A `leave` event from one device doesn't necessarily mean the user has abandoned the session - they may still be connected from another device. + +To determine when a user is completely offline, count the number of presence entries for that user's `clientId`: + + +```javascript +const channel = realtime.channels.get('conversation:{{USER_ID}}'); + +async function isUserCompletelyOffline(clientId) { + const members = await channel.presence.get(); + const userConnections = members.filter(m => m.clientId === clientId); + return userConnections.length === 0; +} + +await channel.presence.subscribe('leave', async (member) => { + const offline = await isUserCompletelyOffline(member.clientId); + if (offline) { + console.log(`User ${member.clientId} is completely offline`); + // Handle full abandonment + } else { + console.log(`User ${member.clientId} still connected on another device`); + } +}); +``` + + + + +## Agent response strategies + +How an agent responds to session abandonment depends on the nature of the work being performed. The following strategies cover common scenarios, from immediately stopping expensive operations to continuing background work. + +### Immediate termination + +For expensive streaming operations where there's no value in continuing without an active user, stop generation immediately to save costs. + +This strategy is appropriate for: + +- Token streaming where costs accumulate per token +- Realtime transcription or translation +- Any operation where partial results have no value + + +```javascript +const channel = realtime.channels.get('conversation:{{USER_ID}}'); + +let abortController = null; + +// Start a streaming response +async function startStreaming(prompt) { + abortController = new AbortController(); + + try { + const stream = await aiModel.stream(prompt, { + signal: abortController.signal + }); + + for await (const token of stream) { + channel.publish('token', token); + } + } catch (err) { + if (err.name === 'AbortError') { + // Stream was intentionally stopped + channel.publish('status', { + type: 'terminated', + reason: 'user_left' + }); + } else { + throw err; + } + } +} + +// Stop streaming when user leaves +await channel.presence.subscribe('leave', async (member) => { + const offline = await isUserCompletelyOffline(member.clientId); + if (offline && abortController) { + abortController.abort(); + abortController = null; + } +}); +``` + + +### Graceful completion + +For conversational AI, it often makes sense to complete the current response before stopping. This ensures the conversation ends at a natural point rather than mid-sentence. + +This strategy is appropriate for: + +- Chat applications where partial responses are confusing +- Assistants generating structured outputs that need to be complete +- Any scenario where you want clean conversation boundaries + + +```javascript +const channel = realtime.channels.get('conversation:{{USER_ID}}'); + +let userPresent = true; +let isGenerating = false; + +// Check user presence before starting new responses +async function generateResponse(prompt) { + if (!userPresent) { + console.log('User not present, skipping response'); + return; + } + + isGenerating = true; + + try { + const stream = await aiModel.stream(prompt); + + for await (const token of stream) { + channel.publish('token', token); + } + + // Response complete - check if we should continue + if (!userPresent) { + channel.publish('status', { + type: 'paused', + reason: 'user_left', + message: 'Session paused after completing response' + }); + await cleanup(); + } + } finally { + isGenerating = false; + } +} + +// Track user presence +await channel.presence.subscribe('leave', async (member) => { + const offline = await isUserCompletelyOffline(member.clientId); + if (offline) { + userPresent = false; + // If not currently generating, clean up immediately + if (!isGenerating) { + await cleanup(); + } + // Otherwise, let current generation complete + } +}); + +await channel.presence.subscribe('enter', (member) => { + userPresent = true; +}); + +async function cleanup() { + // Release resources, close connections, etc. + console.log('Session cleaned up'); +} +``` + + +### Background continuation + +For long-running tasks like code generation, document analysis, or data processing, the agent may continue working even after the user disconnects. The user can be notified when results are ready. + +This strategy is appropriate for: + +- Code generation or refactoring tasks +- Document summarization or analysis +- Any task where results are valuable even if delivered later + + +```javascript +const channel = realtime.channels.get('conversation:{{USER_ID}}'); + +let backgroundMode = false; + +async function processTask(task) { + channel.publish('status', { type: 'processing', task: task.id }); + + const result = await performLongRunningWork(task); + + if (backgroundMode) { + // User left during processing - store result and notify + await storeResultForLater(task.userId, result); + channel.publish('status', { + type: 'completed_background', + task: task.id, + message: 'Results ready when you return' + }); + + // Optionally send push notification + await sendPushNotification(task.userId, 'Your task is complete'); + } else { + // User still present - deliver result directly + channel.publish('result', result); + } +} + +await channel.presence.subscribe('leave', async (member) => { + const offline = await isUserCompletelyOffline(member.clientId); + if (offline) { + backgroundMode = true; + channel.publish('status', { + type: 'continuing_background', + message: 'Work will continue in background' + }); + } +}); + +await channel.presence.subscribe('enter', (member) => { + backgroundMode = false; +}); +``` + + +### Cost optimization + +When users disconnect, switch to a more cost-effective model to complete ongoing work. This balances resource usage against the value of completion. + +This strategy is appropriate for: + +- Long responses where the user may return +- Background tasks where latency is less important +- Any scenario where you want to balance cost against completion + + +```javascript +const channel = realtime.channels.get('conversation:{{USER_ID}}'); + +let currentModel = 'fast-expensive-model'; +let userPresent = true; + +async function generateResponse(prompt, conversationHistory) { + const model = userPresent ? 'fast-expensive-model' : 'slow-cheap-model'; + + if (model !== currentModel) { + currentModel = model; + channel.publish('status', { + type: 'model_switched', + model: currentModel, + reason: userPresent ? 'user_returned' : 'user_away' + }); + } + + const stream = await aiModel.stream(prompt, { + model: currentModel, + history: conversationHistory + }); + + for await (const token of stream) { + // Check if model should change mid-stream + if (userPresent && currentModel === 'slow-cheap-model') { + // User returned - could restart with faster model + // or continue with current for consistency + } + channel.publish('token', token); + } +} + +await channel.presence.subscribe('leave', async (member) => { + const offline = await isUserCompletelyOffline(member.clientId); + if (offline) { + userPresent = false; + } +}); + +await channel.presence.subscribe('enter', (member) => { + userPresent = true; +}); +``` + + +## Re-attachment handling + +When users return to a session after disconnecting, agents need to restore state and clients need to catch up on any missed content. + +### Agent detects user return + +When a user re-enters the presence set, the agent can resume normal operation: + + +```javascript +const channel = realtime.channels.get('conversation:{{USER_ID}}'); + +let sessionState = { + mode: 'active', + model: 'fast-model', + pendingResults: [] +}; + +await channel.presence.subscribe('enter', async (member) => { + console.log(`User ${member.clientId} returned`); + + // Restore active mode + sessionState.mode = 'active'; + sessionState.model = 'fast-model'; + + // Notify user of any work completed while away + if (sessionState.pendingResults.length > 0) { + channel.publish('status', { + type: 'resumed', + completedWhileAway: sessionState.pendingResults.length + }); + + // Deliver pending results + for (const result of sessionState.pendingResults) { + channel.publish('result', result); + } + sessionState.pendingResults = []; + } else { + channel.publish('status', { type: 'resumed' }); + } +}); + +await channel.presence.subscribe('leave', async (member) => { + const offline = await isUserCompletelyOffline(member.clientId); + if (offline) { + sessionState.mode = 'background'; + sessionState.model = 'slow-cheap-model'; + } +}); +``` + + +### Client catches up on missed tokens + +When a client reconnects, it can use [rewind](/docs/channels/options/rewind) to retrieve messages published while it was disconnected: + + +```javascript +// Client reconnecting to a session +const channel = realtime.channels.get('conversation:{{USER_ID}}', { + params: { rewind: '2m' } // Retrieve last 2 minutes of messages +}); + +// Track which responses we've already seen +const seenResponses = new Set(loadFromLocalStorage('seenResponses')); + +await channel.subscribe((message) => { + if (message.name === 'token') { + const responseId = message.extras?.headers?.responseId; + + // Skip tokens from responses we already have + if (seenResponses.has(responseId)) { + return; + } + + appendToken(message.data, responseId); + } else if (message.name === 'status') { + handleStatusUpdate(message.data); + } +}); + +// Enter presence to signal we're back +await channel.presence.enter({ device: 'web', reconnected: true }); +``` + + +For longer disconnection periods, use [history](/docs/storage-history/history) with persistence enabled to retrieve older messages: + + +```javascript +// Client recovering after longer disconnection +const channel = realtime.channels.get('persisted:conversation:{{USER_ID}}'); + +// Subscribe to live messages first +await channel.subscribe((message) => { + handleMessage(message); +}); + +// Then fetch history to catch up +let page = await channel.history({ untilAttach: true }); + +while (page) { + // Process historical messages (newest first) + for (const message of page.items) { + handleHistoricalMessage(message); + } + + page = page.hasNext() ? await page.next() : null; +} + +// Signal we're back +await channel.presence.enter(); +``` + + +