Skip to content

Commit 1fa8498

Browse files
CopilotMte90
andcommitted
Implement LRU cache for project queries and remove unused files
Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com>
1 parent 9c2b274 commit 1fa8498

File tree

4 files changed

+37
-729
lines changed

4 files changed

+37
-729
lines changed

OPTIMIZATION_NOTES.md

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,29 +2,25 @@
22

33
## Actionable Optimizations
44

5-
### 1. **main.py** - Cache Management
6-
- Replace `_ANALYSES_CACHE` global variable with `functools.lru_cache` decorator
7-
- This provides automatic cache size limits and thread-safety
8-
9-
### 2. **db.py** - Database Performance
5+
### 1. **db.py** - Database Performance
106
- Add connection pooling for high-load scenarios using SQLite connection pool
117
- Implement prepared statements for frequently used queries to reduce parsing overhead
128

13-
### 3. **analyzer.py** - Batch Processing
9+
### 2. **analyzer.py** - Batch Processing
1410
- Improve embedding batch processing by implementing parallel batch requests
1511
- Add configurable batch size tuning based on API rate limits
1612

17-
### 4. **external_api.py** - API Reliability
13+
### 3. **external_api.py** - API Reliability
1814
- Add rate limiting to prevent API quota exhaustion (consider using `ratelimit` library)
1915
- Implement retry logic with exponential backoff for failed API calls
2016
- Add circuit breaker pattern for cascading failure prevention
2117

22-
### 5. **config.py** - Configuration Validation
18+
### 4. **config.py** - Configuration Validation
2319
- Add Pydantic-based validation for critical config values
2420
- Implement type checking for environment variables at startup
2521
- Add sensible defaults for all optional configuration
2622

27-
### 6. **logger.py** - Production Logging
23+
### 5. **logger.py** - Production Logging
2824
- Add log rotation using `logging.handlers.RotatingFileHandler`
2925
- Configure separate log levels for development vs production
3026
- Add structured logging (JSON format) for better log aggregation

REQUIREMENTS_VERIFICATION.md

Lines changed: 0 additions & 126 deletions
This file was deleted.

db.py

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
import sqlite3
33
from typing import Any, Dict, List, Optional
4+
from functools import lru_cache
45

56
from config import CFG # config (keeps chunk_size etc if needed)
67
import atexit
@@ -533,7 +534,10 @@ def _create():
533534
conn.close()
534535

535536
try:
536-
return _retry_on_db_locked(_create)
537+
result = _retry_on_db_locked(_create)
538+
# Invalidate cache after creating a new project
539+
_get_project_by_id_cached.cache_clear()
540+
return result
537541
except Exception as e:
538542
_LOG.error(f"Failed to create project: {e}")
539543
raise
@@ -559,25 +563,36 @@ def _get():
559563
return _retry_on_db_locked(_get)
560564

561565

562-
def get_project_by_id(project_id: str) -> Optional[Dict[str, Any]]:
563-
"""Get project metadata by ID."""
564-
_init_registry_db()
565-
566-
registry_path = _get_projects_registry_path()
567-
566+
@lru_cache(maxsize=128)
567+
def _get_project_by_id_cached(project_id: str, registry_path: str) -> Optional[tuple]:
568+
"""Internal cached function that returns immutable tuple."""
568569
def _get():
569570
conn = _get_connection(registry_path)
570571
try:
571572
cur = conn.cursor()
572573
cur.execute("SELECT * FROM projects WHERE id = ?", (project_id,))
573574
row = cur.fetchone()
574-
return dict(row) if row else None
575+
if row:
576+
# Convert row to tuple of key-value pairs for immutability
577+
return tuple(dict(row).items())
578+
return None
575579
finally:
576580
conn.close()
577581

578582
return _retry_on_db_locked(_get)
579583

580584

585+
def get_project_by_id(project_id: str) -> Optional[Dict[str, Any]]:
586+
"""Get project metadata by ID with caching."""
587+
_init_registry_db()
588+
589+
registry_path = _get_projects_registry_path()
590+
cached_result = _get_project_by_id_cached(project_id, registry_path)
591+
592+
# Convert tuple back to dict
593+
return dict(cached_result) if cached_result else None
594+
595+
581596
def list_projects() -> List[Dict[str, Any]]:
582597
"""List all registered projects."""
583598
_init_registry_db()
@@ -598,7 +613,7 @@ def _list():
598613

599614

600615
def update_project_status(project_id: str, status: str, last_indexed_at: Optional[str] = None):
601-
"""Update project indexing status."""
616+
"""Update project indexing status and invalidate cache."""
602617
_init_registry_db()
603618

604619
registry_path = _get_projects_registry_path()
@@ -622,10 +637,12 @@ def _update():
622637
conn.close()
623638

624639
_retry_on_db_locked(_update)
640+
# Invalidate cache after update
641+
_get_project_by_id_cached.cache_clear()
625642

626643

627644
def update_project_settings(project_id: str, settings: Dict[str, Any]):
628-
"""Update project settings (stored as JSON)."""
645+
"""Update project settings (stored as JSON) and invalidate cache."""
629646
import json
630647
_init_registry_db()
631648

@@ -644,10 +661,12 @@ def _update():
644661
conn.close()
645662

646663
_retry_on_db_locked(_update)
664+
# Invalidate cache after update
665+
_get_project_by_id_cached.cache_clear()
647666

648667

649668
def delete_project(project_id: str):
650-
"""Delete a project and its database."""
669+
"""Delete a project and its database, invalidating cache."""
651670
_init_registry_db()
652671

653672
project = get_project_by_id(project_id)
@@ -673,6 +692,8 @@ def _delete():
673692
conn.close()
674693

675694
_retry_on_db_locked(_delete)
695+
# Invalidate cache after deletion
696+
_get_project_by_id_cached.cache_clear()
676697

677698

678699
def get_or_create_project(project_path: str, name: Optional[str] = None) -> Dict[str, Any]:

0 commit comments

Comments
 (0)