Add rate limiting for API endpoints

Copilot · Mte90 · Copilot · commit 7fddd68f1bb3 · 2025-11-06T18:06:04.000Z
Co-authored-by: Mte90 &lt;403283+Mte90@users.noreply.github.com&gt;
diff --git a/main.py b/main.py
@@ -21,6 +21,7 @@
     QueryRequest
 )
 from logger import get_logger
+from rate_limiter import query_limiter, indexing_limiter, general_limiter
 
 logger = get_logger(__name__)
 
@@ -29,6 +30,14 @@
 # Controls how many characters of each snippet and total context we send to coding model
 TOTAL_CONTEXT_LIMIT = 4000
 
+
+def _get_client_ip(request: Request) -> str:
+    """Get client IP address from request."""
+    forwarded = request.headers.get("X-Forwarded-For")
+    if forwarded:
+        return forwarded.split(",")[0].strip()
+    return request.client.host if request.client else "unknown"
+
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Project registry is auto-initialized when needed via create_project
@@ -113,8 +122,18 @@ def api_delete_project(project_id: str):
 
 
 @app.post("/api/projects/index")
-def api_index_project(request: IndexProjectRequest, background_tasks: BackgroundTasks):
+def api_index_project(http_request: Request, request: IndexProjectRequest, background_tasks: BackgroundTasks):
     """Index/re-index a project in the background."""
+    # Rate limiting for indexing operations (more strict)
+    client_ip = _get_client_ip(http_request)
+    allowed, retry_after = indexing_limiter.is_allowed(client_ip)
+    if not allowed:
+        return JSONResponse(
+            {"error": "Rate limit exceeded for indexing", "retry_after": retry_after},
+            status_code=429,
+            headers={"Retry-After": str(retry_after)}
+        )
+    
     try:
         project = get_project_by_id(request.project_id)
         if not project:
@@ -149,8 +168,18 @@ def index_callback():
 
 
 @app.post("/api/query")
-def api_query(request: QueryRequest):
+def api_query(http_request: Request, request: QueryRequest):
     """Query a project using semantic search (PyCharm-compatible)."""
+    # Rate limiting
+    client_ip = _get_client_ip(http_request)
+    allowed, retry_after = query_limiter.is_allowed(client_ip)
+    if not allowed:
+        return JSONResponse(
+            {"error": "Rate limit exceeded", "retry_after": retry_after},
+            status_code=429,
+            headers={"Retry-After": str(retry_after)}
+        )
+    
     try:
         project = get_project_by_id(request.project_id)
         if not project:
diff --git a/rate_limiter.py b/rate_limiter.py
@@ -0,0 +1,66 @@
+"""
+Simple rate limiter middleware for FastAPI endpoints.
+"""
+import time
+import threading
+from typing import Dict, Tuple
+from collections import defaultdict
+
+
+class RateLimiter:
+    """
+    Token bucket rate limiter for API endpoints.
+    Thread-safe implementation.
+    """
+    
+    def __init__(self, calls: int = 100, window: int = 60):
+        """
+        Initialize rate limiter.
+        
+        Args:
+            calls: Maximum number of calls allowed
+            window: Time window in seconds
+        """
+        self.calls = calls
+        self.window = window
+        self._storage: Dict[str, list] = defaultdict(list)
+        self._lock = threading.Lock()
+    
+    def is_allowed(self, key: str) -> Tuple[bool, int]:
+        """
+        Check if request is allowed under rate limit.
+        
+        Args:
+            key: Identifier for rate limit (e.g., IP address)
+        
+        Returns:
+            Tuple of (allowed: bool, retry_after: int seconds)
+        """
+        with self._lock:
+            now = time.time()
+            timestamps = self._storage[key]
+            
+            # Remove timestamps outside the window
+            timestamps[:] = [ts for ts in timestamps if ts > now - self.window]
+            
+            if len(timestamps) >= self.calls:
+                # Rate limit exceeded
+                retry_after = int(timestamps[0] + self.window - now) + 1
+                return False, retry_after
+            
+            # Allow request and record timestamp
+            timestamps.append(now)
+            return True, 0
+    
+    def reset(self, key: str):
+        """Reset rate limit for a key."""
+        with self._lock:
+            if key in self._storage:
+                del self._storage[key]
+
+
+# Global rate limiters for different endpoint types
+# More permissive for queries, stricter for indexing operations
+query_limiter = RateLimiter(calls=100, window=60)  # 100 queries per minute
+indexing_limiter = RateLimiter(calls=10, window=60)  # 10 indexing operations per minute
+general_limiter = RateLimiter(calls=200, window=60)  # 200 general requests per minute