From badc66992f04154a0ad2a6cc2137f3b39ede38ca Mon Sep 17 00:00:00 2001
From: Templight41 <armaanpasha3@gmail.com>
Date: Wed, 17 Dec 2025 10:56:29 +0530
Subject: [PATCH 1/5] feature addition complete

---
 src/google/adk/cli/adk_web_server.py          |  34 ++
 .../adk/cli/browser/token-usage-display.js    | 464 ++++++++++++++++++
 .../adk/flows/llm_flows/base_llm_flow.py      |  51 +-
 src/google/adk/models/llm_response.py         |   7 +
 src/google/adk/utils/gemini_pricing.py        | 266 ++++++++++
 tests/unittests/utils/test_gemini_pricing.py  | 168 +++++++
 6 files changed, 985 insertions(+), 5 deletions(-)
 create mode 100644 src/google/adk/cli/browser/token-usage-display.js
 create mode 100644 src/google/adk/utils/gemini_pricing.py
 create mode 100644 tests/unittests/utils/test_gemini_pricing.py
diff --git a/src/google/adk/cli/adk_web_server.py b/src/google/adk/cli/adk_web_server.py
index 78fe426628..5582c2682f 100644
--- a/src/google/adk/cli/adk_web_server.py
+++ b/src/google/adk/cli/adk_web_server.py
@@ -1664,6 +1664,40 @@ async def redirect_root_to_dev_ui():
       async def redirect_dev_ui_add_slash():
         return RedirectResponse(redirect_dev_ui_url)
 
+      @app.get("/dev-ui/index.html")
+      @app.get("/dev-ui/")
+      async def serve_index_with_token_display():
+        """Serve index.html with token usage display script injected."""
+        from pathlib import Path
+
+        from fastapi.responses import HTMLResponse
+
+        index_path = Path(web_assets_dir) / "index.html"
+        script_path = Path(web_assets_dir) / "token-usage-display.js"
+
+        if not index_path.exists():
+          return HTMLResponse("Index not found", status_code=404)
+
+        # Read the index.html content
+        with open(index_path, "r", encoding="utf-8") as f:
+          html_content = f.read()
+
+        # Read the token usage script
+        script_content = ""
+        if script_path.exists():
+          with open(script_path, "r", encoding="utf-8") as f:
+            script_content = f.read()
+
+        # Inject the token usage display script inline before </body>
+        script_tag = f"<script>{script_content}</script>"
+        if "</body>" in html_content:
+          html_content = html_content.replace("</body>", f"{script_tag}</body>")
+        else:
+          # Fallback: append at the end
+          html_content += script_tag
+
+        return HTMLResponse(content=html_content)
+
       app.mount(
           "/dev-ui/",
           StaticFiles(directory=web_assets_dir, html=True, follow_symlink=True),
diff --git a/src/google/adk/cli/browser/token-usage-display.js b/src/google/adk/cli/browser/token-usage-display.js
new file mode 100644
index 0000000000..23393cd481
--- /dev/null
+++ b/src/google/adk/cli/browser/token-usage-display.js
@@ -0,0 +1,464 @@
+/**
+ * Token Usage and Cost Display
+ *
+ * This script monitors SSE events for token usage metadata and displays
+ * the accumulated token counts and estimated costs in USD.
+ * UI is integrated into the chat input area, matching the website's theme.
+ */
+
+(function() {
+  'use strict';
+
+  // State management
+  let sessionTokenUsage = {
+    totalPromptTokens: 0,
+    totalOutputTokens: 0,
+    totalCachedTokens: 0,
+    totalCost: 0,
+    totalTokens: 0,
+    eventCount: 0
+  };
+
+  let isPopoverOpen = false;
+  let buttonElement = null;
+  let popoverElement = null;
+
+  // Find the message input textarea
+  function findMessageInput() {
+    const selectors = [
+      'textarea[placeholder*="message" i]',
+      'textarea[placeholder*="Message" i]',
+      'textarea[aria-label*="message" i]',
+      'textarea',
+      'input[type="text"]',
+    ];
+
+    for (const selector of selectors) {
+      const el = document.querySelector(selector);
+      if (el) {
+        console.log('[Token Usage] Found input:', selector);
+        return el;
+      }
+    }
+    return null;
+  }
+
+  // Create button container next to input
+  function createButtonContainer() {
+    const input = findMessageInput();
+    if (!input) {
+      console.warn('[Token Usage] Could not find message input');
+      return null;
+    }
+
+    // Find the parent container that holds the input and buttons
+    let container = input.parentElement;
+
+    // Look for a container that has multiple children (input + buttons)
+    while (container && container.children.length < 2 && container !== document.body) {
+      container = container.parentElement;
+    }
+
+    if (!container || container === document.body) {
+      console.warn('[Token Usage] Could not find suitable container');
+      return null;
+    }
+
+    console.log('[Token Usage] Found container:', container);
+
+    // Create a wrapper div for our button
+    const buttonWrapper = document.createElement('div');
+    buttonWrapper.id = 'token-usage-wrapper';
+    buttonWrapper.style.cssText = `
+      display: inline-flex;
+      align-items: center;
+      margin: 0 8px;
+    `;
+
+    // Try to append to container
+    container.appendChild(buttonWrapper);
+
+    return buttonWrapper;
+  }
+
+  // Create the main button that shows cost and token count
+  function createUsageButton() {
+    const button = document.createElement('button');
+    button.id = 'token-usage-button';
+    button.type = 'button';
+    button.setAttribute('aria-label', 'Token usage and cost');
+
+    // Match the website's button styling
+    button.style.cssText = `
+      background: transparent;
+      border: 1px solid rgba(128, 128, 128, 0.3);
+      border-radius: 20px;
+      padding: 6px 12px;
+      font-family: inherit;
+      font-size: 13px;
+      font-weight: 500;
+      cursor: pointer;
+      color: inherit;
+      display: inline-flex;
+      align-items: center;
+      gap: 6px;
+      transition: all 0.2s;
+      white-space: nowrap;
+    `;
+
+    button.innerHTML = `
+      <span id="cost-display" style="font-weight: 600;">$0.00</span>
+      <span style="opacity: 0.5;">|</span>
+      <span id="token-count-display" style="opacity: 0.8;">0 tokens</span>
+    `;
+
+    button.addEventListener('mouseenter', () => {
+      button.style.backgroundColor = 'rgba(128, 128, 128, 0.1)';
+      button.style.borderColor = 'rgba(128, 128, 128, 0.5)';
+    });
+
+    button.addEventListener('mouseleave', () => {
+      button.style.backgroundColor = 'transparent';
+      button.style.borderColor = 'rgba(128, 128, 128, 0.3)';
+    });
+
+    button.addEventListener('click', (e) => {
+      e.preventDefault();
+      e.stopPropagation();
+      togglePopover(button);
+    });
+
+    buttonElement = button;
+    return button;
+  }
+
+  // Create the popover that shows detailed breakdown
+  function createPopover() {
+    const popover = document.createElement('div');
+    popover.id = 'token-usage-popover';
+    popover.style.cssText = `
+      position: fixed;
+      background: var(--surface-container, #2d2d2d);
+      border: 1px solid rgba(255, 255, 255, 0.12);
+      border-radius: 12px;
+      padding: 16px;
+      font-family: inherit;
+      font-size: 13px;
+      box-shadow: 0 4px 16px rgba(0, 0, 0, 0.3);
+      z-index: 10001;
+      min-width: 200px;
+      display: none;
+      color: inherit;
+    `;
+
+    popover.innerHTML = `
+      <div style="margin-bottom: 12px; padding-bottom: 12px; border-bottom: 1px solid rgba(255, 255, 255, 0.12);">
+        <div style="display: flex; justify-content: space-between; align-items: center;">
+          <span style="font-weight: 500; font-size: 14px;">Token Usage</span>
+          <button id="reset-usage-btn" style="
+            background: transparent;
+            border: none;
+            color: var(--primary, #8ab4f8);
+            font-size: 12px;
+            cursor: pointer;
+            padding: 4px 8px;
+            border-radius: 4px;
+            font-weight: 500;
+          ">Reset</button>
+        </div>
+      </div>
+
+      <div style="display: flex; flex-direction: column; gap: 10px;">
+        <div style="display: flex; justify-content: space-between; align-items: center;">
+          <span style="opacity: 0.7;">Input</span>
+          <span id="popover-input-tokens" style="font-weight: 500;">–</span>
+        </div>
+
+        <div style="display: flex; justify-content: space-between; align-items: center;">
+          <span style="opacity: 0.7;">Output</span>
+          <span id="popover-output-tokens" style="font-weight: 500;">–</span>
+        </div>
+
+        <div style="display: flex; justify-content: space-between; align-items: center;">
+          <span style="opacity: 0.7;">Cost</span>
+          <span id="popover-cost" style="font-weight: 600;">–</span>
+        </div>
+      </div>
+
+      <div style="margin-top: 12px; padding-top: 12px; border-top: 1px solid rgba(255, 255, 255, 0.12);">
+        <div style="display: flex; justify-content: space-between; align-items: center; font-size: 12px; opacity: 0.6;">
+          <span>Total events</span>
+          <span id="popover-event-count">0</span>
+        </div>
+      </div>
+    `;
+
+    document.body.appendChild(popover);
+
+    // Add reset button handler
+    const resetBtn = document.getElementById('reset-usage-btn');
+    if (resetBtn) {
+      resetBtn.addEventListener('mouseenter', () => {
+        resetBtn.style.backgroundColor = 'rgba(255, 255, 255, 0.08)';
+      });
+      resetBtn.addEventListener('mouseleave', () => {
+        resetBtn.style.backgroundColor = 'transparent';
+      });
+      resetBtn.addEventListener('click', (e) => {
+        e.stopPropagation();
+        resetUsage();
+      });
+    }
+
+    // Close popover when clicking outside
+    document.addEventListener('click', (e) => {
+      const popoverEl = document.getElementById('token-usage-popover');
+      const buttonEl = document.getElementById('token-usage-button');
+      if (isPopoverOpen &&
+          popoverEl &&
+          !popoverEl.contains(e.target) &&
+          buttonEl &&
+          !buttonEl.contains(e.target)) {
+        closePopover();
+      }
+    });
+
+    popoverElement = popover;
+    return popover;
+  }
+
+  // Position popover relative to button
+  function positionPopover(button) {
+    const popover = document.getElementById('token-usage-popover');
+    if (!popover || !button) return;
+
+    const buttonRect = button.getBoundingClientRect();
+
+    // Position above the button
+    popover.style.bottom = `${window.innerHeight - buttonRect.top + 8}px`;
+    popover.style.right = `${window.innerWidth - buttonRect.right}px`;
+    popover.style.left = 'auto';
+    popover.style.top = 'auto';
+  }
+
+  // Toggle popover visibility
+  function togglePopover(button) {
+    const popover = document.getElementById('token-usage-popover');
+    if (!popover) return;
+
+    isPopoverOpen = !isPopoverOpen;
+
+    if (isPopoverOpen) {
+      positionPopover(button);
+      popover.style.display = 'block';
+    } else {
+      popover.style.display = 'none';
+    }
+  }
+
+  // Close popover
+  function closePopover() {
+    const popover = document.getElementById('token-usage-popover');
+    if (popover) {
+      popover.style.display = 'none';
+      isPopoverOpen = false;
+    }
+  }
+
+  // Update the button display
+  function updateButton() {
+    const costDisplay = document.getElementById('cost-display');
+    const tokenCountDisplay = document.getElementById('token-count-display');
+
+    if (costDisplay) {
+      const costFormatted = sessionTokenUsage.totalCost >= 0.01
+        ? `$${sessionTokenUsage.totalCost.toFixed(2)}`
+        : `$${sessionTokenUsage.totalCost.toFixed(4)}`;
+      costDisplay.textContent = costFormatted;
+    }
+
+    if (tokenCountDisplay) {
+      const totalTokens = sessionTokenUsage.totalPromptTokens + sessionTokenUsage.totalOutputTokens;
+      tokenCountDisplay.textContent = `${totalTokens.toLocaleString()} token${totalTokens !== 1 ? 's' : ''}`;
+    }
+  }
+
+  // Update the popover display
+  function updatePopover() {
+    const inputTokensEl = document.getElementById('popover-input-tokens');
+    const outputTokensEl = document.getElementById('popover-output-tokens');
+    const costEl = document.getElementById('popover-cost');
+    const eventCountEl = document.getElementById('popover-event-count');
+
+    if (inputTokensEl) {
+      inputTokensEl.textContent = sessionTokenUsage.totalPromptTokens > 0
+        ? sessionTokenUsage.totalPromptTokens.toLocaleString()
+        : '–';
+    }
+
+    if (outputTokensEl) {
+      outputTokensEl.textContent = sessionTokenUsage.totalOutputTokens > 0
+        ? sessionTokenUsage.totalOutputTokens.toLocaleString()
+        : '–';
+    }
+
+    if (costEl) {
+      const costFormatted = sessionTokenUsage.totalCost >= 0.01
+        ? `$${sessionTokenUsage.totalCost.toFixed(2)}`
+        : sessionTokenUsage.totalCost > 0
+        ? `$${sessionTokenUsage.totalCost.toFixed(4)}`
+        : '–';
+      costEl.textContent = costFormatted;
+    }
+
+    if (eventCountEl) {
+      eventCountEl.textContent = sessionTokenUsage.eventCount.toString();
+    }
+  }
+
+  // Update all displays
+  function updateDisplay() {
+    updateButton();
+    updatePopover();
+  }
+
+  // Reset usage statistics
+  function resetUsage() {
+    sessionTokenUsage = {
+      totalPromptTokens: 0,
+      totalOutputTokens: 0,
+      totalCachedTokens: 0,
+      totalCost: 0,
+      totalTokens: 0,
+      eventCount: 0
+    };
+    updateDisplay();
+  }
+
+  // Process an event from the SSE stream
+  function processEvent(eventData) {
+    try {
+      const event = JSON.parse(eventData);
+
+      // Check if the event has usage metadata
+      if (event.usageMetadata) {
+        const metadata = event.usageMetadata;
+
+        // Update token counts
+        if (metadata.promptTokenCount) {
+          sessionTokenUsage.totalPromptTokens += metadata.promptTokenCount;
+        }
+        if (metadata.candidatesTokenCount) {
+          sessionTokenUsage.totalOutputTokens += metadata.candidatesTokenCount;
+        }
+        if (metadata.cachedContentTokenCount) {
+          sessionTokenUsage.totalCachedTokens += metadata.cachedContentTokenCount;
+        }
+
+        // Update cost if available
+        if (event.costUsd !== undefined && event.costUsd !== null) {
+          sessionTokenUsage.totalCost += event.costUsd;
+          sessionTokenUsage.eventCount++;
+        }
+
+        // Update the display
+        updateDisplay();
+      }
+    } catch (e) {
+      console.error('Error processing event for token usage:', e);
+    }
+  }
+
+  // Intercept fetch requests to monitor SSE events
+  const originalFetch = window.fetch;
+  window.fetch = function(...args) {
+    const request = args[0];
+
+    // Check if this is a run_sse request
+    if (typeof request === 'string' && request.includes('/run_sse')) {
+      return originalFetch.apply(this, args).then(response => {
+        // Clone the response so we can read it
+        const clonedResponse = response.clone();
+
+        // Process the SSE stream
+        const reader = clonedResponse.body.getReader();
+        const decoder = new TextDecoder();
+
+        function readStream() {
+          reader.read().then(({ done, value }) => {
+            if (done) return;
+
+            const chunk = decoder.decode(value, { stream: true });
+            const lines = chunk.split('\n');
+
+            for (const line of lines) {
+              if (line.startsWith('data: ')) {
+                const data = line.substring(6);
+                if (data && data !== '[DONE]') {
+                  processEvent(data);
+                }
+              }
+            }
+
+            readStream();
+          });
+        }
+
+        readStream();
+
+        return response;
+      });
+    }
+
+    return originalFetch.apply(this, args);
+  };
+
+  // Try to inject the button
+  function tryInject(retries = 15) {
+    console.log(`[Token Usage] Injection attempt ${16 - retries}/15`);
+
+    const wrapper = createButtonContainer();
+
+    if (wrapper) {
+      const button = createUsageButton();
+      wrapper.appendChild(button);
+      createPopover();
+      console.log('[Token Usage] ✓ Button injected successfully');
+      return true;
+    } else if (retries > 0) {
+      setTimeout(() => tryInject(retries - 1), 1000);
+      return false;
+    } else {
+      console.warn('[Token Usage] ✗ Could not find suitable location after 15 attempts');
+
+      // Fallback: Create floating button
+      console.log('[Token Usage] Creating fallback floating button');
+      const button = createUsageButton();
+      button.style.position = 'fixed';
+      button.style.bottom = '20px';
+      button.style.right = '20px';
+      button.style.zIndex = '10000';
+      document.body.appendChild(button);
+      createPopover();
+      console.log('[Token Usage] ✓ Fallback button created');
+      return true;
+    }
+  }
+
+  // Initialize when the DOM is ready
+  function initialize() {
+    console.log('[Token Usage] Initializing...');
+
+    if (document.readyState === 'loading') {
+      document.addEventListener('DOMContentLoaded', () => {
+        console.log('[Token Usage] DOM loaded, starting injection');
+        tryInject();
+      });
+    } else {
+      console.log('[Token Usage] DOM already loaded, starting injection');
+      tryInject();
+    }
+  }
+
+  initialize();
+})();
diff --git a/src/google/adk/flows/llm_flows/base_llm_flow.py b/src/google/adk/flows/llm_flows/base_llm_flow.py
index 824cd26be1..174c6a761a 100644
--- a/src/google/adk/flows/llm_flows/base_llm_flow.py
+++ b/src/google/adk/flows/llm_flows/base_llm_flow.py
@@ -544,8 +544,8 @@ async def _postprocess_async(
       return
 
     # Builds the event.
-    model_response_event = self._finalize_model_response_event(
-        llm_request, llm_response, model_response_event
+    model_response_event = await self._finalize_model_response_event(
+        invocation_context, llm_request, llm_response, model_response_event
     )
     yield model_response_event
 
@@ -637,8 +637,8 @@ async def _postprocess_live(
         return
 
     # Builds the event.
-    model_response_event = self._finalize_model_response_event(
-        llm_request, llm_response, model_response_event
+    model_response_event = await self._finalize_model_response_event(
+        invocation_context, llm_request, llm_response, model_response_event
     )
     yield model_response_event
 
@@ -914,8 +914,9 @@ async def _maybe_add_grounding_metadata(
         return await _maybe_add_grounding_metadata(callback_response)
     return await _maybe_add_grounding_metadata()
 
-  def _finalize_model_response_event(
+  async def _finalize_model_response_event(
       self,
+      invocation_context: InvocationContext,
       llm_request: LlmRequest,
       llm_response: LlmResponse,
       model_response_event: Event,
@@ -925,6 +926,46 @@ def _finalize_model_response_event(
         **llm_response.model_dump(exclude_none=True),
     })
 
+    # Calculate cost if usage metadata is available
+    if model_response_event.usage_metadata:
+      from ...utils.gemini_pricing import calculate_token_cost
+
+      try:
+        llm = self.__get_llm(invocation_context)
+        model_name = llm.model
+
+        prompt_tokens = (
+            model_response_event.usage_metadata.prompt_token_count or 0
+        )
+        output_tokens = (
+            model_response_event.usage_metadata.candidates_token_count or 0
+        )
+        cached_tokens = (
+            model_response_event.usage_metadata.cached_content_token_count or 0
+        )
+
+        # Subtract cached tokens from prompt tokens to avoid double counting
+        prompt_tokens = max(0, prompt_tokens - cached_tokens)
+
+        logger.debug(
+            'Calculating token cost: model=%s, prompt=%d, output=%d, cached=%d',
+            model_name,
+            prompt_tokens,
+            output_tokens,
+            cached_tokens,
+        )
+
+        cost = await calculate_token_cost(
+            model_name, prompt_tokens, output_tokens, cached_tokens
+        )
+        if cost is not None:
+          model_response_event.cost_usd = cost
+          logger.debug('Token cost calculated: $%.6f', cost)
+        else:
+          logger.warning('Token cost is None for model: %s', model_name)
+      except Exception as e:
+        logger.warning('Failed to calculate token cost: %s', e)
+
     if model_response_event.content:
       function_calls = model_response_event.get_function_calls()
       if function_calls:
diff --git a/src/google/adk/models/llm_response.py b/src/google/adk/models/llm_response.py
index 0e42c02d09..3f144a6ccd 100644
--- a/src/google/adk/models/llm_response.py
+++ b/src/google/adk/models/llm_response.py
@@ -135,6 +135,13 @@ class LlmResponse(BaseModel):
   This field is automatically populated when citation is enabled.
   """
 
+  cost_usd: Optional[float] = None
+  """The estimated cost in USD for this LLM response.
+
+  This field is calculated based on the usage_metadata and the model's pricing.
+  Only populated when pricing information is available.
+  """
+
   @staticmethod
   def create(
       generate_content_response: types.GenerateContentResponse,
diff --git a/src/google/adk/utils/gemini_pricing.py b/src/google/adk/utils/gemini_pricing.py
new file mode 100644
index 0000000000..caa1c7b893
--- /dev/null
+++ b/src/google/adk/utils/gemini_pricing.py
@@ -0,0 +1,266 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import asyncio
+from dataclasses import dataclass
+from datetime import datetime
+from datetime import timedelta
+import logging
+from typing import Optional
+
+import aiohttp
+
+_logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ModelPricing:
+  """Pricing information for a specific Gemini model.
+
+  All prices are in USD per 1 million tokens.
+  """
+
+  input_price_low: float
+  input_price_high: float
+  output_price_low: float
+  output_price_high: float
+  cached_input_price_low: float
+  cached_input_price_high: float
+  threshold_tokens: int = 200_000
+
+  def calculate_cost(
+      self,
+      prompt_tokens: int,
+      output_tokens: int,
+      cached_tokens: int = 0,
+  ) -> float:
+    """Calculate the total cost for a request.
+
+    Args:
+      prompt_tokens: Number of prompt tokens (excluding cached tokens).
+      output_tokens: Number of output tokens generated.
+      cached_tokens: Number of cached prompt tokens.
+
+    Returns:
+      Total cost in USD.
+    """
+    total_input_tokens = prompt_tokens + cached_tokens
+
+    # Determine if we're in the high-tier pricing
+    use_high_tier = total_input_tokens > self.threshold_tokens
+
+    # Calculate input cost (non-cached)
+    input_price = (
+        self.input_price_high if use_high_tier else self.input_price_low
+    )
+    input_cost = (prompt_tokens / 1_000_000) * input_price
+
+    # Calculate cached input cost
+    cached_price = (
+        self.cached_input_price_high
+        if use_high_tier
+        else self.cached_input_price_low
+    )
+    cached_cost = (cached_tokens / 1_000_000) * cached_price
+
+    # Calculate output cost
+    output_price = (
+        self.output_price_high if use_high_tier else self.output_price_low
+    )
+    output_cost = (output_tokens / 1_000_000) * output_price
+
+    return input_cost + cached_cost + output_cost
+
+
+# Default pricing for common Gemini models (fallback if fetching fails)
+# Prices are per 1 million tokens in USD
+_DEFAULT_MODEL_PRICING = {
+    'gemini-2.5-pro': ModelPricing(
+        input_price_low=1.25,
+        input_price_high=2.50,
+        output_price_low=10.00,
+        output_price_high=15.00,
+        cached_input_price_low=0.125,
+        cached_input_price_high=0.250,
+    ),
+    'gemini-2.5-flash': ModelPricing(
+        input_price_low=0.30,
+        input_price_high=0.30,
+        output_price_low=2.50,
+        output_price_high=2.50,
+        cached_input_price_low=0.030,
+        cached_input_price_high=0.030,
+    ),
+    'gemini-2.5-flash-lite': ModelPricing(
+        input_price_low=0.10,
+        input_price_high=0.10,
+        output_price_low=0.40,
+        output_price_high=0.40,
+        cached_input_price_low=0.010,
+        cached_input_price_high=0.010,
+    ),
+    'gemini-2.0-flash': ModelPricing(
+        input_price_low=0.15,
+        input_price_high=0.15,
+        output_price_low=0.60,
+        output_price_high=0.60,
+        cached_input_price_low=0.015,
+        cached_input_price_high=0.015,
+    ),
+    'gemini-2.0-flash-lite': ModelPricing(
+        input_price_low=0.075,
+        input_price_high=0.075,
+        output_price_low=0.30,
+        output_price_high=0.30,
+        cached_input_price_low=0.0075,
+        cached_input_price_high=0.0075,
+    ),
+    'gemini-1.5-pro': ModelPricing(
+        input_price_low=1.25,
+        input_price_high=2.50,
+        output_price_low=5.00,
+        output_price_high=10.00,
+        cached_input_price_low=0.3125,
+        cached_input_price_high=0.625,
+        threshold_tokens=128_000,
+    ),
+    'gemini-1.5-flash': ModelPricing(
+        input_price_low=0.075,
+        input_price_high=0.15,
+        output_price_low=0.30,
+        output_price_high=0.60,
+        cached_input_price_low=0.01875,
+        cached_input_price_high=0.0375,
+        threshold_tokens=128_000,
+    ),
+}
+
+
+class GeminiPricingService:
+  """Service for fetching and caching Gemini API pricing information."""
+
+  def __init__(
+      self,
+      pricing_url: str = (
+          'https://cloud.google.com/vertex-ai/generative-ai/pricing'
+      ),
+      cache_duration: timedelta = timedelta(hours=24),
+  ):
+    """Initialize the pricing service.
+
+    Args:
+      pricing_url: URL to fetch pricing information from.
+      cache_duration: How long to cache pricing data before refreshing.
+    """
+    self._pricing_url = pricing_url
+    self._cache_duration = cache_duration
+    self._cached_pricing: dict[str, ModelPricing] = _DEFAULT_MODEL_PRICING
+    self._last_updated: Optional[datetime] = None
+    self._fetch_lock = asyncio.Lock()
+
+  async def get_pricing(self, model_name: str) -> Optional[ModelPricing]:
+    """Get pricing for a specific model.
+
+    Args:
+      model_name: Name of the Gemini model (e.g., "gemini-2.5-flash").
+
+    Returns:
+      ModelPricing object if found, None otherwise.
+    """
+    # Normalize model name (remove prefixes like "models/")
+    normalized_name = model_name.split('/')[-1]
+
+    # Check if we need to refresh the cache
+    if self._should_refresh_cache():
+      await self._refresh_pricing()
+
+    # Try to find exact match
+    if normalized_name in self._cached_pricing:
+      return self._cached_pricing[normalized_name]
+
+    # Try to find fuzzy match (e.g., "gemini-2.5-flash-001" -> "gemini-2.5-flash")
+    for key in self._cached_pricing:
+      if normalized_name.startswith(key):
+        return self._cached_pricing[key]
+
+    _logger.warning(
+        'Pricing not found for model: %s, using default', model_name
+    )
+    return None
+
+  def _should_refresh_cache(self) -> bool:
+    """Check if the pricing cache should be refreshed."""
+    if self._last_updated is None:
+      return False  # Use defaults on first run
+    return datetime.now() - self._last_updated > self._cache_duration
+
+  async def _refresh_pricing(self) -> None:
+    """Refresh pricing data from the Vertex AI pricing page.
+
+    Note: This is a placeholder implementation. In production, you would
+    either parse the pricing page HTML or use an official API if available.
+    For now, we use the hardcoded defaults.
+    """
+    async with self._fetch_lock:
+      # Double-check to avoid race conditions
+      if not self._should_refresh_cache():
+        return
+
+      try:
+        # TODO: Implement actual pricing page parsing or API call
+        # For now, we just use the hardcoded defaults
+        _logger.info('Using default Gemini pricing (no dynamic fetch yet)')
+        self._last_updated = datetime.now()
+      except Exception as e:
+        _logger.error('Failed to refresh Gemini pricing: %s', e)
+
+
+# Global pricing service instance
+_pricing_service: Optional[GeminiPricingService] = None
+
+
+def get_pricing_service() -> GeminiPricingService:
+  """Get the global pricing service instance."""
+  global _pricing_service
+  if _pricing_service is None:
+    _pricing_service = GeminiPricingService()
+  return _pricing_service
+
+
+async def calculate_token_cost(
+    model_name: str,
+    prompt_tokens: int,
+    output_tokens: int,
+    cached_tokens: int = 0,
+) -> Optional[float]:
+  """Calculate the cost of a model invocation.
+
+  Args:
+    model_name: Name of the Gemini model.
+    prompt_tokens: Number of prompt tokens.
+    output_tokens: Number of output tokens.
+    cached_tokens: Number of cached tokens.
+
+  Returns:
+    Total cost in USD, or None if pricing not available.
+  """
+  service = get_pricing_service()
+  pricing = await service.get_pricing(model_name)
+
+  if pricing is None:
+    return None
+
+  return pricing.calculate_cost(prompt_tokens, output_tokens, cached_tokens)
diff --git a/tests/unittests/utils/test_gemini_pricing.py b/tests/unittests/utils/test_gemini_pricing.py
new file mode 100644
index 0000000000..85512a3c90
--- /dev/null
+++ b/tests/unittests/utils/test_gemini_pricing.py
@@ -0,0 +1,168 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from google.adk.utils.gemini_pricing import calculate_token_cost
+from google.adk.utils.gemini_pricing import GeminiPricingService
+from google.adk.utils.gemini_pricing import ModelPricing
+import pytest
+
+
+class TestModelPricing:
+  """Test the ModelPricing class."""
+
+  def test_calculate_cost_low_tier(self):
+    """Test cost calculation for low-tier usage."""
+    pricing = ModelPricing(
+        input_price_low=1.25,
+        input_price_high=2.50,
+        output_price_low=10.00,
+        output_price_high=15.00,
+        cached_input_price_low=0.125,
+        cached_input_price_high=0.250,
+        threshold_tokens=200_000,
+    )
+
+    # Test with 1000 prompt tokens, 500 output tokens, no cache
+    cost = pricing.calculate_cost(1000, 500, 0)
+    expected = (1000 / 1_000_000) * 1.25 + (500 / 1_000_000) * 10.00
+    assert abs(cost - expected) < 0.000001
+
+  def test_calculate_cost_high_tier(self):
+    """Test cost calculation for high-tier usage (>200K tokens)."""
+    pricing = ModelPricing(
+        input_price_low=1.25,
+        input_price_high=2.50,
+        output_price_low=10.00,
+        output_price_high=15.00,
+        cached_input_price_low=0.125,
+        cached_input_price_high=0.250,
+        threshold_tokens=200_000,
+    )
+
+    # Test with 250K prompt tokens, 50K output tokens
+    cost = pricing.calculate_cost(250_000, 50_000, 0)
+    expected = (250_000 / 1_000_000) * 2.50 + (50_000 / 1_000_000) * 15.00
+    assert abs(cost - expected) < 0.000001
+
+  def test_calculate_cost_with_cache(self):
+    """Test cost calculation with cached tokens."""
+    pricing = ModelPricing(
+        input_price_low=1.25,
+        input_price_high=2.50,
+        output_price_low=10.00,
+        output_price_high=15.00,
+        cached_input_price_low=0.125,
+        cached_input_price_high=0.250,
+        threshold_tokens=200_000,
+    )
+
+    # Test with 1000 prompt tokens, 500 output tokens, 5000 cached tokens
+    cost = pricing.calculate_cost(1000, 500, 5000)
+    expected = (
+        (1000 / 1_000_000) * 1.25
+        + (5000 / 1_000_000) * 0.125
+        + (500 / 1_000_000) * 10.00
+    )
+    assert abs(cost - expected) < 0.000001
+
+  def test_calculate_cost_flash_model(self):
+    """Test cost calculation for Flash model."""
+    pricing = ModelPricing(
+        input_price_low=0.30,
+        input_price_high=0.30,
+        output_price_low=2.50,
+        output_price_high=2.50,
+        cached_input_price_low=0.030,
+        cached_input_price_high=0.030,
+    )
+
+    # Test with 10000 prompt tokens, 5000 output tokens
+    cost = pricing.calculate_cost(10_000, 5_000, 0)
+    expected = (10_000 / 1_000_000) * 0.30 + (5_000 / 1_000_000) * 2.50
+    assert abs(cost - expected) < 0.000001
+
+
+class TestGeminiPricingService:
+  """Test the GeminiPricingService class."""
+
+  @pytest.mark.asyncio
+  async def test_get_pricing_exact_match(self):
+    """Test getting pricing for an exact model name match."""
+    service = GeminiPricingService()
+    pricing = await service.get_pricing("gemini-2.5-pro")
+    assert pricing is not None
+    assert pricing.input_price_low == 1.25
+
+  @pytest.mark.asyncio
+  async def test_get_pricing_fuzzy_match(self):
+    """Test getting pricing for a model with version suffix."""
+    service = GeminiPricingService()
+    pricing = await service.get_pricing("gemini-2.5-flash-001")
+    assert pricing is not None
+    assert pricing.input_price_low == 0.30
+
+  @pytest.mark.asyncio
+  async def test_get_pricing_with_prefix(self):
+    """Test getting pricing for a model with 'models/' prefix."""
+    service = GeminiPricingService()
+    pricing = await service.get_pricing("models/gemini-2.0-flash")
+    assert pricing is not None
+    assert pricing.input_price_low == 0.15
+
+  @pytest.mark.asyncio
+  async def test_get_pricing_unknown_model(self):
+    """Test getting pricing for an unknown model."""
+    service = GeminiPricingService()
+    pricing = await service.get_pricing("unknown-model-xyz")
+    assert pricing is None
+
+
+class TestCalculateTokenCost:
+  """Test the calculate_token_cost helper function."""
+
+  @pytest.mark.asyncio
+  async def test_calculate_token_cost_gemini_25_pro(self):
+    """Test cost calculation for Gemini 2.5 Pro."""
+    cost = await calculate_token_cost("gemini-2.5-pro", 1000, 500, 0)
+    assert cost is not None
+    expected = (1000 / 1_000_000) * 1.25 + (500 / 1_000_000) * 10.00
+    assert abs(cost - expected) < 0.000001
+
+  @pytest.mark.asyncio
+  async def test_calculate_token_cost_gemini_25_flash(self):
+    """Test cost calculation for Gemini 2.5 Flash."""
+    cost = await calculate_token_cost("gemini-2.5-flash", 10_000, 5_000, 0)
+    assert cost is not None
+    expected = (10_000 / 1_000_000) * 0.30 + (5_000 / 1_000_000) * 2.50
+    assert abs(cost - expected) < 0.000001
+
+  @pytest.mark.asyncio
+  async def test_calculate_token_cost_with_cache(self):
+    """Test cost calculation with cached tokens."""
+    cost = await calculate_token_cost("gemini-2.5-pro", 1000, 500, 5000)
+    assert cost is not None
+    expected = (
+        (1000 / 1_000_000) * 1.25
+        + (5000 / 1_000_000) * 0.125
+        + (500 / 1_000_000) * 10.00
+    )
+    assert abs(cost - expected) < 0.000001
+
+  @pytest.mark.asyncio
+  async def test_calculate_token_cost_unknown_model(self):
+    """Test cost calculation for unknown model."""
+    cost = await calculate_token_cost("unknown-model", 1000, 500, 0)
+    assert cost is None

From eefda164980f7b5a0f4b6966eabec3bf4f63146b Mon Sep 17 00:00:00 2001
From: Templight41 <armaanpasha3@gmail.com>
Date: Wed, 17 Dec 2025 11:24:56 +0530
Subject: [PATCH 2/5] live api pricing update

---
 src/google/adk/utils/gemini_pricing.py       | 207 ++++++++++++++++---
 tests/unittests/utils/test_gemini_pricing.py |  19 +-
 2 files changed, 194 insertions(+), 32 deletions(-)

diff --git a/src/google/adk/utils/gemini_pricing.py b/src/google/adk/utils/gemini_pricing.py
index caa1c7b893..5771605a30 100644
--- a/src/google/adk/utils/gemini_pricing.py
+++ b/src/google/adk/utils/gemini_pricing.py
@@ -12,6 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+"""Gemini API pricing calculator with live pricing fetching.
+
+This module provides utilities to calculate token costs for Gemini models.
+On first use, it attempts to fetch the latest pricing from Google Cloud's
+pricing page and caches it permanently for the session. If fetching fails,
+it falls back to hardcoded defaults (accurate as of December 2025).
+
+Features:
+  - Automatic pricing fetch from cloud.google.com/vertex-ai/generative-ai/pricing
+  - One-time fetch on first request, then cached permanently
+  - Fallback to hardcoded defaults only if fetching fails
+  - Support for tiered pricing (low/high token thresholds)
+  - Cached token pricing calculation
+"""
+
 from __future__ import annotations
 
 import asyncio
@@ -19,6 +34,7 @@
 from datetime import datetime
 from datetime import timedelta
 import logging
+import re
 from typing import Optional
 
 import aiohttp
@@ -87,6 +103,7 @@ def calculate_cost(
 
 # Default pricing for common Gemini models (fallback if fetching fails)
 # Prices are per 1 million tokens in USD
+# Last updated: December 2025 from https://cloud.google.com/vertex-ai/generative-ai/pricing
 _DEFAULT_MODEL_PRICING = {
     'gemini-2.5-pro': ModelPricing(
         input_price_low=1.25,
@@ -157,75 +174,209 @@ def __init__(
       pricing_url: str = (
           'https://cloud.google.com/vertex-ai/generative-ai/pricing'
       ),
-      cache_duration: timedelta = timedelta(hours=24),
+      enable_fetch: bool = True,
   ):
     """Initialize the pricing service.
 
     Args:
       pricing_url: URL to fetch pricing information from.
-      cache_duration: How long to cache pricing data before refreshing.
+      enable_fetch: If False, skip fetching and use hardcoded defaults only.
+                    Useful for testing.
     """
     self._pricing_url = pricing_url
-    self._cache_duration = cache_duration
-    self._cached_pricing: dict[str, ModelPricing] = _DEFAULT_MODEL_PRICING
-    self._last_updated: Optional[datetime] = None
+    self._enable_fetch = enable_fetch
+    self._cached_pricing: Optional[dict[str, ModelPricing]] = None
+    self._fetch_attempted: bool = False
     self._fetch_lock = asyncio.Lock()
 
   async def get_pricing(self, model_name: str) -> Optional[ModelPricing]:
     """Get pricing for a specific model.
 
+    Fetches pricing from Google Cloud on first call, then caches permanently.
+    Falls back to hardcoded defaults only if fetching fails or is disabled.
+
     Args:
       model_name: Name of the Gemini model (e.g., "gemini-2.5-flash").
 
     Returns:
       ModelPricing object if found, None otherwise.
     """
+    # Fetch pricing on first call (if enabled)
+    if self._enable_fetch and not self._fetch_attempted:
+      await self._refresh_pricing()
+
     # Normalize model name (remove prefixes like "models/")
     normalized_name = model_name.split('/')[-1]
 
-    # Check if we need to refresh the cache
-    if self._should_refresh_cache():
-      await self._refresh_pricing()
+    # Use cached pricing (either fetched or fallback)
+    pricing_data = self._cached_pricing or _DEFAULT_MODEL_PRICING
 
     # Try to find exact match
-    if normalized_name in self._cached_pricing:
-      return self._cached_pricing[normalized_name]
+    if normalized_name in pricing_data:
+      return pricing_data[normalized_name]
 
     # Try to find fuzzy match (e.g., "gemini-2.5-flash-001" -> "gemini-2.5-flash")
-    for key in self._cached_pricing:
+    for key in pricing_data:
       if normalized_name.startswith(key):
-        return self._cached_pricing[key]
+        return pricing_data[key]
 
     _logger.warning(
         'Pricing not found for model: %s, using default', model_name
     )
     return None
 
-  def _should_refresh_cache(self) -> bool:
-    """Check if the pricing cache should be refreshed."""
-    if self._last_updated is None:
-      return False  # Use defaults on first run
-    return datetime.now() - self._last_updated > self._cache_duration
-
   async def _refresh_pricing(self) -> None:
-    """Refresh pricing data from the Vertex AI pricing page.
+    """Fetch pricing data from the Vertex AI pricing page on first call.
 
-    Note: This is a placeholder implementation. In production, you would
-    either parse the pricing page HTML or use an official API if available.
-    For now, we use the hardcoded defaults.
+    Attempts to fetch and parse the latest pricing from Google Cloud's
+    pricing page. Falls back to hardcoded defaults only if fetching fails.
+    This is called only once - on the first pricing request.
     """
     async with self._fetch_lock:
       # Double-check to avoid race conditions
-      if not self._should_refresh_cache():
+      if self._fetch_attempted:
         return
 
+      self._fetch_attempted = True
+
       try:
-        # TODO: Implement actual pricing page parsing or API call
-        # For now, we just use the hardcoded defaults
-        _logger.info('Using default Gemini pricing (no dynamic fetch yet)')
-        self._last_updated = datetime.now()
+        _logger.info(
+            'Fetching latest Gemini pricing from %s', self._pricing_url
+        )
+
+        async with aiohttp.ClientSession() as session:
+          async with session.get(
+              self._pricing_url, timeout=aiohttp.ClientTimeout(total=10)
+          ) as response:
+            if response.status != 200:
+              _logger.warning(
+                  'Failed to fetch pricing page (status %d), using hardcoded'
+                  ' defaults',
+                  response.status,
+              )
+              self._cached_pricing = _DEFAULT_MODEL_PRICING
+              return
+
+            html_content = await response.text()
+            parsed_pricing = self._parse_pricing_page(html_content)
+
+            if parsed_pricing:
+              # Merge parsed pricing with defaults (in case some models are missing)
+              self._cached_pricing = {
+                  **_DEFAULT_MODEL_PRICING,
+                  **parsed_pricing,
+              }
+              _logger.info(
+                  'Successfully fetched pricing for %d models from API',
+                  len(parsed_pricing),
+              )
+            else:
+              _logger.warning(
+                  'No pricing found in API response, using hardcoded defaults'
+              )
+              self._cached_pricing = _DEFAULT_MODEL_PRICING
+
       except Exception as e:
-        _logger.error('Failed to refresh Gemini pricing: %s', e)
+        _logger.warning(
+            'Failed to fetch Gemini pricing: %s, using hardcoded defaults', e
+        )
+        self._cached_pricing = _DEFAULT_MODEL_PRICING
+
+  def _parse_pricing_page(self, html_content: str) -> dict[str, ModelPricing]:
+    """Parse pricing information from the HTML page.
+
+    Args:
+      html_content: HTML content of the pricing page.
+
+    Returns:
+      Dictionary mapping model names to ModelPricing objects.
+      Returns empty dict if parsing fails or produces invalid results.
+    """
+    pricing_data = {}
+
+    try:
+      # Look for pricing tables in the HTML
+      # The pricing page typically has tables with model names and prices
+      # Pattern: Match prices in format like "$0.30" or "$1.25"
+      price_pattern = r'\$(\d+\.?\d*)'
+
+      # Try to find Gemini model sections and their associated prices
+      # This is a best-effort parsing and may need updates if the page structure changes
+
+      # Look for common model names in the content
+      model_patterns = {
+          'gemini-2.5-flash': r'Gemini 2\.5 Flash',
+          'gemini-2.5-pro': r'Gemini 2\.5 Pro',
+          'gemini-2.0-flash': r'Gemini 2\.0 Flash',
+          'gemini-1.5-pro': r'Gemini 1\.5 Pro',
+          'gemini-1.5-flash': r'Gemini 1\.5 Flash',
+      }
+
+      for model_key, model_pattern in model_patterns.items():
+        match = re.search(model_pattern, html_content, re.IGNORECASE)
+        if match:
+          # Find the section containing this model
+          section_start = match.start()
+          section_end = min(section_start + 5000, len(html_content))
+          section = html_content[section_start:section_end]
+
+          # Extract all prices in this section
+          prices = re.findall(price_pattern, section)
+
+          if len(prices) >= 4:
+            # Typically: input_low, input_high, output_low, output_high
+            # or just: input, output (if no tiering)
+            try:
+              input_low = float(prices[0])
+              output_low = float(prices[1]) if len(prices) > 1 else input_low
+
+              # Check if there's tiered pricing
+              input_high = float(prices[2]) if len(prices) > 2 else input_low
+              output_high = float(prices[3]) if len(prices) > 3 else output_low
+
+              # Validate pricing - sanity check to avoid garbage data
+              # Gemini prices should be < $100 per 1M tokens
+              if (
+                  input_low > 100
+                  or input_high > 100
+                  or output_low > 100
+                  or output_high > 100
+              ):
+                _logger.warning(
+                    'Parsed pricing for %s looks invalid (>$100/1M tokens),'
+                    ' skipping',
+                    model_key,
+                )
+                continue
+
+              # Cached pricing is typically 10% of regular pricing
+              cached_low = input_low * 0.1
+              cached_high = input_high * 0.1
+
+              pricing_data[model_key] = ModelPricing(
+                  input_price_low=input_low,
+                  input_price_high=input_high,
+                  output_price_low=output_low,
+                  output_price_high=output_high,
+                  cached_input_price_low=cached_low,
+                  cached_input_price_high=cached_high,
+              )
+              _logger.debug(
+                  'Parsed pricing for %s: in=$%.2f-$%.2f, out=$%.2f-$%.2f',
+                  model_key,
+                  input_low,
+                  input_high,
+                  output_low,
+                  output_high,
+              )
+            except (ValueError, IndexError) as e:
+              _logger.debug('Failed to parse prices for %s: %s', model_key, e)
+              continue
+
+    except Exception as e:
+      _logger.warning('Error parsing pricing page: %s', e)
+
+    return pricing_data
 
 
 # Global pricing service instance
diff --git a/tests/unittests/utils/test_gemini_pricing.py b/tests/unittests/utils/test_gemini_pricing.py
index 85512a3c90..1e7101031f 100644
--- a/tests/unittests/utils/test_gemini_pricing.py
+++ b/tests/unittests/utils/test_gemini_pricing.py
@@ -14,12 +14,23 @@
 
 from __future__ import annotations
 
+from google.adk.utils import gemini_pricing
 from google.adk.utils.gemini_pricing import calculate_token_cost
 from google.adk.utils.gemini_pricing import GeminiPricingService
 from google.adk.utils.gemini_pricing import ModelPricing
 import pytest
 
 
+@pytest.fixture(autouse=True)
+def disable_pricing_fetch():
+  """Disable live pricing fetch for all tests."""
+  # Reset the global pricing service before each test
+  gemini_pricing._pricing_service = GeminiPricingService(enable_fetch=False)
+  yield
+  # Clean up after test
+  gemini_pricing._pricing_service = None
+
+
 class TestModelPricing:
   """Test the ModelPricing class."""
 
@@ -101,7 +112,7 @@ class TestGeminiPricingService:
   @pytest.mark.asyncio
   async def test_get_pricing_exact_match(self):
     """Test getting pricing for an exact model name match."""
-    service = GeminiPricingService()
+    service = GeminiPricingService(enable_fetch=False)
     pricing = await service.get_pricing("gemini-2.5-pro")
     assert pricing is not None
     assert pricing.input_price_low == 1.25
@@ -109,7 +120,7 @@ async def test_get_pricing_exact_match(self):
   @pytest.mark.asyncio
   async def test_get_pricing_fuzzy_match(self):
     """Test getting pricing for a model with version suffix."""
-    service = GeminiPricingService()
+    service = GeminiPricingService(enable_fetch=False)
     pricing = await service.get_pricing("gemini-2.5-flash-001")
     assert pricing is not None
     assert pricing.input_price_low == 0.30
@@ -117,7 +128,7 @@ async def test_get_pricing_fuzzy_match(self):
   @pytest.mark.asyncio
   async def test_get_pricing_with_prefix(self):
     """Test getting pricing for a model with 'models/' prefix."""
-    service = GeminiPricingService()
+    service = GeminiPricingService(enable_fetch=False)
     pricing = await service.get_pricing("models/gemini-2.0-flash")
     assert pricing is not None
     assert pricing.input_price_low == 0.15
@@ -125,7 +136,7 @@ async def test_get_pricing_with_prefix(self):
   @pytest.mark.asyncio
   async def test_get_pricing_unknown_model(self):
     """Test getting pricing for an unknown model."""
-    service = GeminiPricingService()
+    service = GeminiPricingService(enable_fetch=False)
     pricing = await service.get_pricing("unknown-model-xyz")
     assert pricing is None
 

From 1deb9976164e430a3b9f76a642fe09df95446b70 Mon Sep 17 00:00:00 2001
From: Templight41 <armaanpasha3@gmail.com>
Date: Wed, 17 Dec 2025 12:18:42 +0530
Subject: [PATCH 3/5] chore: Remove frontend code - to be implemented in
 adk-web repo

Removed frontend-specific code as per maintainer feedback:
- Deleted src/google/adk/cli/browser/token-usage-display.js
- Removed JavaScript injection endpoint from adk_web_server.py

Backend API remains intact:
- Token cost calculation in base_llm_flow.py
- cost_usd field in LlmResponse model
- Gemini pricing service with live API fetching
- All unit tests passing (12/12)

Frontend implementation will be done in the separate adk-web repository.
---
 src/google/adk/cli/adk_web_server.py          |  34 --
 .../adk/cli/browser/token-usage-display.js    | 464 ------------------
 2 files changed, 498 deletions(-)
 delete mode 100644 src/google/adk/cli/browser/token-usage-display.js

diff --git a/src/google/adk/cli/adk_web_server.py b/src/google/adk/cli/adk_web_server.py
index b11912f7db..5d71591466 100644
--- a/src/google/adk/cli/adk_web_server.py
+++ b/src/google/adk/cli/adk_web_server.py
@@ -1696,40 +1696,6 @@ async def redirect_root_to_dev_ui():
       async def redirect_dev_ui_add_slash():
         return RedirectResponse(redirect_dev_ui_url)
 
-      @app.get("/dev-ui/index.html")
-      @app.get("/dev-ui/")
-      async def serve_index_with_token_display():
-        """Serve index.html with token usage display script injected."""
-        from pathlib import Path
-
-        from fastapi.responses import HTMLResponse
-
-        index_path = Path(web_assets_dir) / "index.html"
-        script_path = Path(web_assets_dir) / "token-usage-display.js"
-
-        if not index_path.exists():
-          return HTMLResponse("Index not found", status_code=404)
-
-        # Read the index.html content
-        with open(index_path, "r", encoding="utf-8") as f:
-          html_content = f.read()
-
-        # Read the token usage script
-        script_content = ""
-        if script_path.exists():
-          with open(script_path, "r", encoding="utf-8") as f:
-            script_content = f.read()
-
-        # Inject the token usage display script inline before </body>
-        script_tag = f"<script>{script_content}</script>"
-        if "</body>" in html_content:
-          html_content = html_content.replace("</body>", f"{script_tag}</body>")
-        else:
-          # Fallback: append at the end
-          html_content += script_tag
-
-        return HTMLResponse(content=html_content)
-
       app.mount(
           "/dev-ui/",
           StaticFiles(directory=web_assets_dir, html=True, follow_symlink=True),
diff --git a/src/google/adk/cli/browser/token-usage-display.js b/src/google/adk/cli/browser/token-usage-display.js
deleted file mode 100644
index 23393cd481..0000000000
--- a/src/google/adk/cli/browser/token-usage-display.js
+++ /dev/null
@@ -1,464 +0,0 @@
-/**
- * Token Usage and Cost Display
- *
- * This script monitors SSE events for token usage metadata and displays
- * the accumulated token counts and estimated costs in USD.
- * UI is integrated into the chat input area, matching the website's theme.
- */
-
-(function() {
-  'use strict';
-
-  // State management
-  let sessionTokenUsage = {
-    totalPromptTokens: 0,
-    totalOutputTokens: 0,
-    totalCachedTokens: 0,
-    totalCost: 0,
-    totalTokens: 0,
-    eventCount: 0
-  };
-
-  let isPopoverOpen = false;
-  let buttonElement = null;
-  let popoverElement = null;
-
-  // Find the message input textarea
-  function findMessageInput() {
-    const selectors = [
-      'textarea[placeholder*="message" i]',
-      'textarea[placeholder*="Message" i]',
-      'textarea[aria-label*="message" i]',
-      'textarea',
-      'input[type="text"]',
-    ];
-
-    for (const selector of selectors) {
-      const el = document.querySelector(selector);
-      if (el) {
-        console.log('[Token Usage] Found input:', selector);
-        return el;
-      }
-    }
-    return null;
-  }
-
-  // Create button container next to input
-  function createButtonContainer() {
-    const input = findMessageInput();
-    if (!input) {
-      console.warn('[Token Usage] Could not find message input');
-      return null;
-    }
-
-    // Find the parent container that holds the input and buttons
-    let container = input.parentElement;
-
-    // Look for a container that has multiple children (input + buttons)
-    while (container && container.children.length < 2 && container !== document.body) {
-      container = container.parentElement;
-    }
-
-    if (!container || container === document.body) {
-      console.warn('[Token Usage] Could not find suitable container');
-      return null;
-    }
-
-    console.log('[Token Usage] Found container:', container);
-
-    // Create a wrapper div for our button
-    const buttonWrapper = document.createElement('div');
-    buttonWrapper.id = 'token-usage-wrapper';
-    buttonWrapper.style.cssText = `
-      display: inline-flex;
-      align-items: center;
-      margin: 0 8px;
-    `;
-
-    // Try to append to container
-    container.appendChild(buttonWrapper);
-
-    return buttonWrapper;
-  }
-
-  // Create the main button that shows cost and token count
-  function createUsageButton() {
-    const button = document.createElement('button');
-    button.id = 'token-usage-button';
-    button.type = 'button';
-    button.setAttribute('aria-label', 'Token usage and cost');
-
-    // Match the website's button styling
-    button.style.cssText = `
-      background: transparent;
-      border: 1px solid rgba(128, 128, 128, 0.3);
-      border-radius: 20px;
-      padding: 6px 12px;
-      font-family: inherit;
-      font-size: 13px;
-      font-weight: 500;
-      cursor: pointer;
-      color: inherit;
-      display: inline-flex;
-      align-items: center;
-      gap: 6px;
-      transition: all 0.2s;
-      white-space: nowrap;
-    `;
-
-    button.innerHTML = `
-      <span id="cost-display" style="font-weight: 600;">$0.00</span>
-      <span style="opacity: 0.5;">|</span>
-      <span id="token-count-display" style="opacity: 0.8;">0 tokens</span>
-    `;
-
-    button.addEventListener('mouseenter', () => {
-      button.style.backgroundColor = 'rgba(128, 128, 128, 0.1)';
-      button.style.borderColor = 'rgba(128, 128, 128, 0.5)';
-    });
-
-    button.addEventListener('mouseleave', () => {
-      button.style.backgroundColor = 'transparent';
-      button.style.borderColor = 'rgba(128, 128, 128, 0.3)';
-    });
-
-    button.addEventListener('click', (e) => {
-      e.preventDefault();
-      e.stopPropagation();
-      togglePopover(button);
-    });
-
-    buttonElement = button;
-    return button;
-  }
-
-  // Create the popover that shows detailed breakdown
-  function createPopover() {
-    const popover = document.createElement('div');
-    popover.id = 'token-usage-popover';
-    popover.style.cssText = `
-      position: fixed;
-      background: var(--surface-container, #2d2d2d);
-      border: 1px solid rgba(255, 255, 255, 0.12);
-      border-radius: 12px;
-      padding: 16px;
-      font-family: inherit;
-      font-size: 13px;
-      box-shadow: 0 4px 16px rgba(0, 0, 0, 0.3);
-      z-index: 10001;
-      min-width: 200px;
-      display: none;
-      color: inherit;
-    `;
-
-    popover.innerHTML = `
-      <div style="margin-bottom: 12px; padding-bottom: 12px; border-bottom: 1px solid rgba(255, 255, 255, 0.12);">
-        <div style="display: flex; justify-content: space-between; align-items: center;">
-          <span style="font-weight: 500; font-size: 14px;">Token Usage</span>
-          <button id="reset-usage-btn" style="
-            background: transparent;
-            border: none;
-            color: var(--primary, #8ab4f8);
-            font-size: 12px;
-            cursor: pointer;
-            padding: 4px 8px;
-            border-radius: 4px;
-            font-weight: 500;
-          ">Reset</button>
-        </div>
-      </div>
-
-      <div style="display: flex; flex-direction: column; gap: 10px;">
-        <div style="display: flex; justify-content: space-between; align-items: center;">
-          <span style="opacity: 0.7;">Input</span>
-          <span id="popover-input-tokens" style="font-weight: 500;">–</span>
-        </div>
-
-        <div style="display: flex; justify-content: space-between; align-items: center;">
-          <span style="opacity: 0.7;">Output</span>
-          <span id="popover-output-tokens" style="font-weight: 500;">–</span>
-        </div>
-
-        <div style="display: flex; justify-content: space-between; align-items: center;">
-          <span style="opacity: 0.7;">Cost</span>
-          <span id="popover-cost" style="font-weight: 600;">–</span>
-        </div>
-      </div>
-
-      <div style="margin-top: 12px; padding-top: 12px; border-top: 1px solid rgba(255, 255, 255, 0.12);">
-        <div style="display: flex; justify-content: space-between; align-items: center; font-size: 12px; opacity: 0.6;">
-          <span>Total events</span>
-          <span id="popover-event-count">0</span>
-        </div>
-      </div>
-    `;
-
-    document.body.appendChild(popover);
-
-    // Add reset button handler
-    const resetBtn = document.getElementById('reset-usage-btn');
-    if (resetBtn) {
-      resetBtn.addEventListener('mouseenter', () => {
-        resetBtn.style.backgroundColor = 'rgba(255, 255, 255, 0.08)';
-      });
-      resetBtn.addEventListener('mouseleave', () => {
-        resetBtn.style.backgroundColor = 'transparent';
-      });
-      resetBtn.addEventListener('click', (e) => {
-        e.stopPropagation();
-        resetUsage();
-      });
-    }
-
-    // Close popover when clicking outside
-    document.addEventListener('click', (e) => {
-      const popoverEl = document.getElementById('token-usage-popover');
-      const buttonEl = document.getElementById('token-usage-button');
-      if (isPopoverOpen &&
-          popoverEl &&
-          !popoverEl.contains(e.target) &&
-          buttonEl &&
-          !buttonEl.contains(e.target)) {
-        closePopover();
-      }
-    });
-
-    popoverElement = popover;
-    return popover;
-  }
-
-  // Position popover relative to button
-  function positionPopover(button) {
-    const popover = document.getElementById('token-usage-popover');
-    if (!popover || !button) return;
-
-    const buttonRect = button.getBoundingClientRect();
-
-    // Position above the button
-    popover.style.bottom = `${window.innerHeight - buttonRect.top + 8}px`;
-    popover.style.right = `${window.innerWidth - buttonRect.right}px`;
-    popover.style.left = 'auto';
-    popover.style.top = 'auto';
-  }
-
-  // Toggle popover visibility
-  function togglePopover(button) {
-    const popover = document.getElementById('token-usage-popover');
-    if (!popover) return;
-
-    isPopoverOpen = !isPopoverOpen;
-
-    if (isPopoverOpen) {
-      positionPopover(button);
-      popover.style.display = 'block';
-    } else {
-      popover.style.display = 'none';
-    }
-  }
-
-  // Close popover
-  function closePopover() {
-    const popover = document.getElementById('token-usage-popover');
-    if (popover) {
-      popover.style.display = 'none';
-      isPopoverOpen = false;
-    }
-  }
-
-  // Update the button display
-  function updateButton() {
-    const costDisplay = document.getElementById('cost-display');
-    const tokenCountDisplay = document.getElementById('token-count-display');
-
-    if (costDisplay) {
-      const costFormatted = sessionTokenUsage.totalCost >= 0.01
-        ? `$${sessionTokenUsage.totalCost.toFixed(2)}`
-        : `$${sessionTokenUsage.totalCost.toFixed(4)}`;
-      costDisplay.textContent = costFormatted;
-    }
-
-    if (tokenCountDisplay) {
-      const totalTokens = sessionTokenUsage.totalPromptTokens + sessionTokenUsage.totalOutputTokens;
-      tokenCountDisplay.textContent = `${totalTokens.toLocaleString()} token${totalTokens !== 1 ? 's' : ''}`;
-    }
-  }
-
-  // Update the popover display
-  function updatePopover() {
-    const inputTokensEl = document.getElementById('popover-input-tokens');
-    const outputTokensEl = document.getElementById('popover-output-tokens');
-    const costEl = document.getElementById('popover-cost');
-    const eventCountEl = document.getElementById('popover-event-count');
-
-    if (inputTokensEl) {
-      inputTokensEl.textContent = sessionTokenUsage.totalPromptTokens > 0
-        ? sessionTokenUsage.totalPromptTokens.toLocaleString()
-        : '–';
-    }
-
-    if (outputTokensEl) {
-      outputTokensEl.textContent = sessionTokenUsage.totalOutputTokens > 0
-        ? sessionTokenUsage.totalOutputTokens.toLocaleString()
-        : '–';
-    }
-
-    if (costEl) {
-      const costFormatted = sessionTokenUsage.totalCost >= 0.01
-        ? `$${sessionTokenUsage.totalCost.toFixed(2)}`
-        : sessionTokenUsage.totalCost > 0
-        ? `$${sessionTokenUsage.totalCost.toFixed(4)}`
-        : '–';
-      costEl.textContent = costFormatted;
-    }
-
-    if (eventCountEl) {
-      eventCountEl.textContent = sessionTokenUsage.eventCount.toString();
-    }
-  }
-
-  // Update all displays
-  function updateDisplay() {
-    updateButton();
-    updatePopover();
-  }
-
-  // Reset usage statistics
-  function resetUsage() {
-    sessionTokenUsage = {
-      totalPromptTokens: 0,
-      totalOutputTokens: 0,
-      totalCachedTokens: 0,
-      totalCost: 0,
-      totalTokens: 0,
-      eventCount: 0
-    };
-    updateDisplay();
-  }
-
-  // Process an event from the SSE stream
-  function processEvent(eventData) {
-    try {
-      const event = JSON.parse(eventData);
-
-      // Check if the event has usage metadata
-      if (event.usageMetadata) {
-        const metadata = event.usageMetadata;
-
-        // Update token counts
-        if (metadata.promptTokenCount) {
-          sessionTokenUsage.totalPromptTokens += metadata.promptTokenCount;
-        }
-        if (metadata.candidatesTokenCount) {
-          sessionTokenUsage.totalOutputTokens += metadata.candidatesTokenCount;
-        }
-        if (metadata.cachedContentTokenCount) {
-          sessionTokenUsage.totalCachedTokens += metadata.cachedContentTokenCount;
-        }
-
-        // Update cost if available
-        if (event.costUsd !== undefined && event.costUsd !== null) {
-          sessionTokenUsage.totalCost += event.costUsd;
-          sessionTokenUsage.eventCount++;
-        }
-
-        // Update the display
-        updateDisplay();
-      }
-    } catch (e) {
-      console.error('Error processing event for token usage:', e);
-    }
-  }
-
-  // Intercept fetch requests to monitor SSE events
-  const originalFetch = window.fetch;
-  window.fetch = function(...args) {
-    const request = args[0];
-
-    // Check if this is a run_sse request
-    if (typeof request === 'string' && request.includes('/run_sse')) {
-      return originalFetch.apply(this, args).then(response => {
-        // Clone the response so we can read it
-        const clonedResponse = response.clone();
-
-        // Process the SSE stream
-        const reader = clonedResponse.body.getReader();
-        const decoder = new TextDecoder();
-
-        function readStream() {
-          reader.read().then(({ done, value }) => {
-            if (done) return;
-
-            const chunk = decoder.decode(value, { stream: true });
-            const lines = chunk.split('\n');
-
-            for (const line of lines) {
-              if (line.startsWith('data: ')) {
-                const data = line.substring(6);
-                if (data && data !== '[DONE]') {
-                  processEvent(data);
-                }
-              }
-            }
-
-            readStream();
-          });
-        }
-
-        readStream();
-
-        return response;
-      });
-    }
-
-    return originalFetch.apply(this, args);
-  };
-
-  // Try to inject the button
-  function tryInject(retries = 15) {
-    console.log(`[Token Usage] Injection attempt ${16 - retries}/15`);
-
-    const wrapper = createButtonContainer();
-
-    if (wrapper) {
-      const button = createUsageButton();
-      wrapper.appendChild(button);
-      createPopover();
-      console.log('[Token Usage] ✓ Button injected successfully');
-      return true;
-    } else if (retries > 0) {
-      setTimeout(() => tryInject(retries - 1), 1000);
-      return false;
-    } else {
-      console.warn('[Token Usage] ✗ Could not find suitable location after 15 attempts');
-
-      // Fallback: Create floating button
-      console.log('[Token Usage] Creating fallback floating button');
-      const button = createUsageButton();
-      button.style.position = 'fixed';
-      button.style.bottom = '20px';
-      button.style.right = '20px';
-      button.style.zIndex = '10000';
-      document.body.appendChild(button);
-      createPopover();
-      console.log('[Token Usage] ✓ Fallback button created');
-      return true;
-    }
-  }
-
-  // Initialize when the DOM is ready
-  function initialize() {
-    console.log('[Token Usage] Initializing...');
-
-    if (document.readyState === 'loading') {
-      document.addEventListener('DOMContentLoaded', () => {
-        console.log('[Token Usage] DOM loaded, starting injection');
-        tryInject();
-      });
-    } else {
-      console.log('[Token Usage] DOM already loaded, starting injection');
-      tryInject();
-    }
-  }
-
-  initialize();
-})();

From 4431fb9015b9d29aeb78705d1de1c7728ec6561a Mon Sep 17 00:00:00 2001
From: Ari <88184960+Templight41@users.noreply.github.com>
Date: Wed, 17 Dec 2025 12:28:25 +0530
Subject: [PATCH 4/5] Update src/google/adk/utils/gemini_pricing.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 src/google/adk/utils/gemini_pricing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/google/adk/utils/gemini_pricing.py b/src/google/adk/utils/gemini_pricing.py
index 5771605a30..ee24e89a3f 100644
--- a/src/google/adk/utils/gemini_pricing.py
+++ b/src/google/adk/utils/gemini_pricing.py
@@ -323,7 +323,7 @@ def _parse_pricing_page(self, html_content: str) -> dict[str, ModelPricing]:
           # Extract all prices in this section
           prices = re.findall(price_pattern, section)
 
-          if len(prices) >= 4:
+          if len(prices) >= 2:
             # Typically: input_low, input_high, output_low, output_high
             # or just: input, output (if no tiering)
             try:

From 9e5b272c7666b37d3a10435bd701545f76ff506b Mon Sep 17 00:00:00 2001
From: Templight41 <armaanpasha3@gmail.com>
Date: Wed, 17 Dec 2025 12:31:24 +0530
Subject: [PATCH 5/5] remove unused variabled

---
 src/google/adk/utils/gemini_pricing.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/google/adk/utils/gemini_pricing.py b/src/google/adk/utils/gemini_pricing.py
index ee24e89a3f..0b08408074 100644
--- a/src/google/adk/utils/gemini_pricing.py
+++ b/src/google/adk/utils/gemini_pricing.py
@@ -31,8 +31,6 @@
 
 import asyncio
 from dataclasses import dataclass
-from datetime import datetime
-from datetime import timedelta
 import logging
 import re
 from typing import Optional