Skip to content

Commit 7d8df89

Browse files
CopilotMte90
andcommitted
Add LRU caching layer for frequently accessed data
Co-authored-by: Mte90 <403283+Mte90@users.noreply.github.com>
1 parent 7fddd68 commit 7d8df89

File tree

2 files changed

+185
-27
lines changed

2 files changed

+185
-27
lines changed

cache.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
"""
2+
Simple LRU cache implementation for frequently accessed data.
3+
"""
4+
import time
5+
import threading
6+
from typing import Any, Optional, Dict
7+
from collections import OrderedDict
8+
9+
10+
class LRUCache:
11+
"""
12+
Thread-safe Least Recently Used (LRU) cache with TTL support.
13+
"""
14+
15+
def __init__(self, max_size: int = 100, ttl: Optional[int] = None):
16+
"""
17+
Initialize LRU cache.
18+
19+
Args:
20+
max_size: Maximum number of items to cache
21+
ttl: Time-to-live in seconds (None for no expiration)
22+
"""
23+
self.max_size = max_size
24+
self.ttl = ttl
25+
self._cache: OrderedDict = OrderedDict()
26+
self._timestamps: Dict[str, float] = {}
27+
self._lock = threading.Lock()
28+
self._hits = 0
29+
self._misses = 0
30+
31+
def get(self, key: str) -> Optional[Any]:
32+
"""
33+
Get value from cache.
34+
35+
Args:
36+
key: Cache key
37+
38+
Returns:
39+
Cached value or None if not found/expired
40+
"""
41+
with self._lock:
42+
if key not in self._cache:
43+
self._misses += 1
44+
return None
45+
46+
# Check TTL
47+
if self.ttl is not None:
48+
timestamp = self._timestamps.get(key, 0)
49+
if time.time() - timestamp > self.ttl:
50+
# Expired
51+
del self._cache[key]
52+
del self._timestamps[key]
53+
self._misses += 1
54+
return None
55+
56+
# Move to end (most recently used)
57+
self._cache.move_to_end(key)
58+
self._hits += 1
59+
return self._cache[key]
60+
61+
def set(self, key: str, value: Any):
62+
"""
63+
Set value in cache.
64+
65+
Args:
66+
key: Cache key
67+
value: Value to cache
68+
"""
69+
with self._lock:
70+
if key in self._cache:
71+
# Update existing
72+
self._cache.move_to_end(key)
73+
else:
74+
# Add new
75+
self._cache[key] = value
76+
77+
# Evict oldest if over max_size
78+
if len(self._cache) > self.max_size:
79+
oldest_key = next(iter(self._cache))
80+
del self._cache[oldest_key]
81+
if oldest_key in self._timestamps:
82+
del self._timestamps[oldest_key]
83+
84+
self._cache[key] = value
85+
self._timestamps[key] = time.time()
86+
87+
def invalidate(self, key: str):
88+
"""Remove key from cache."""
89+
with self._lock:
90+
if key in self._cache:
91+
del self._cache[key]
92+
if key in self._timestamps:
93+
del self._timestamps[key]
94+
95+
def clear(self):
96+
"""Clear all cached items."""
97+
with self._lock:
98+
self._cache.clear()
99+
self._timestamps.clear()
100+
self._hits = 0
101+
self._misses = 0
102+
103+
def stats(self) -> Dict[str, Any]:
104+
"""Get cache statistics."""
105+
with self._lock:
106+
total = self._hits + self._misses
107+
hit_rate = self._hits / total if total > 0 else 0
108+
return {
109+
"size": len(self._cache),
110+
"max_size": self.max_size,
111+
"hits": self._hits,
112+
"misses": self._misses,
113+
"hit_rate": hit_rate,
114+
"ttl": self.ttl
115+
}
116+
117+
118+
# Global caches for different data types
119+
# Project metadata cache (small, frequently accessed)
120+
project_cache = LRUCache(max_size=50, ttl=300) # 5 minutes TTL
121+
122+
# Project stats cache (small, changes during indexing)
123+
stats_cache = LRUCache(max_size=100, ttl=60) # 1 minute TTL
124+
125+
# Search results cache (larger, query results)
126+
search_cache = LRUCache(max_size=500, ttl=600) # 10 minutes TTL
127+
128+
# File content cache (medium size, for recently accessed files)
129+
file_cache = LRUCache(max_size=200, ttl=300) # 5 minutes TTL

db.py

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import threading
99
import queue
1010
from logger import get_logger
11+
from cache import project_cache, stats_cache, file_cache
1112

1213
_LOG = get_logger(__name__)
1314

@@ -282,7 +283,14 @@ def get_project_stats(database_path: str) -> Dict[str, Any]:
282283
"""
283284
Get statistics for a project database.
284285
Returns file_count and embedding_count.
286+
Uses caching with 60s TTL.
285287
"""
288+
# Check cache first
289+
cache_key = f"stats:{database_path}"
290+
cached = stats_cache.get(cache_key)
291+
if cached is not None:
292+
return cached
293+
286294
conn = _get_connection(database_path)
287295
try:
288296
cur = conn.cursor()
@@ -295,10 +303,14 @@ def get_project_stats(database_path: str) -> Dict[str, Any]:
295303
cur.execute("SELECT COUNT(*) FROM chunks WHERE embedding IS NOT NULL")
296304
embedding_count = cur.fetchone()[0]
297305

298-
return {
306+
stats = {
299307
"file_count": int(file_count),
300308
"embedding_count": int(embedding_count)
301309
}
310+
311+
# Cache the result
312+
stats_cache.set(cache_key, stats)
313+
return stats
302314
finally:
303315
conn.close()
304316

@@ -330,6 +342,7 @@ def clear_project_data(database_path: str) -> None:
330342
"""
331343
Clear all files and chunks from a project database.
332344
Used when re-indexing a project.
345+
Invalidates caches.
333346
"""
334347
conn = _get_connection(database_path)
335348
try:
@@ -339,6 +352,10 @@ def clear_project_data(database_path: str) -> None:
339352
# Delete files
340353
cur.execute("DELETE FROM files")
341354
conn.commit()
355+
356+
# Invalidate caches
357+
stats_cache.invalidate(f"stats:{database_path}")
358+
file_cache.clear() # Clear all file cache since we deleted everything
342359
finally:
343360
conn.close()
344361

@@ -628,25 +645,34 @@ def _create():
628645

629646
cur.execute("SELECT * FROM projects WHERE id = ?", (project_id,))
630647
row = cur.fetchone()
631-
return dict(row) if row else None
648+
result = dict(row) if row else None
649+
# Cache the newly created project
650+
if result:
651+
project_cache.set(f"project:id:{project_id}", result)
652+
project_cache.set(f"project:path:{project_path}", result)
653+
return result
632654
finally:
633655
conn.close()
634656

635657
try:
636658
result = _retry_on_db_locked(_create)
637-
# Invalidate cache after creating a new project
638-
_get_project_by_id_cached.cache_clear()
639659
return result
640660
except Exception as e:
641661
_LOG.error(f"Failed to create project: {e}")
642662
raise
643663

644664

645665
def get_project(project_path: str) -> Optional[Dict[str, Any]]:
646-
"""Get project metadata by path."""
666+
"""Get project metadata by path with caching."""
647667
_init_registry_db()
648668
project_path = os.path.abspath(project_path)
649669

670+
# Check cache first
671+
cache_key = f"project:path:{project_path}"
672+
cached = project_cache.get(cache_key)
673+
if cached is not None:
674+
return cached
675+
650676
registry_path = _get_projects_registry_path()
651677

652678
def _get():
@@ -655,43 +681,44 @@ def _get():
655681
cur = conn.cursor()
656682
cur.execute("SELECT * FROM projects WHERE path = ?", (project_path,))
657683
row = cur.fetchone()
658-
return dict(row) if row else None
684+
result = dict(row) if row else None
685+
if result:
686+
project_cache.set(cache_key, result)
687+
return result
659688
finally:
660689
conn.close()
661690

662691
return _retry_on_db_locked(_get)
663692

664693

665-
@lru_cache(maxsize=128)
666-
def _get_project_by_id_cached(project_id: str, registry_path: str) -> Optional[tuple]:
667-
"""Internal cached function that returns immutable tuple."""
694+
def get_project_by_id(project_id: str) -> Optional[Dict[str, Any]]:
695+
"""Get project metadata by ID with caching."""
696+
_init_registry_db()
697+
698+
# Check cache first
699+
cache_key = f"project:id:{project_id}"
700+
cached = project_cache.get(cache_key)
701+
if cached is not None:
702+
return cached
703+
704+
registry_path = _get_projects_registry_path()
705+
668706
def _get():
669707
conn = _get_connection(registry_path)
670708
try:
671709
cur = conn.cursor()
672710
cur.execute("SELECT * FROM projects WHERE id = ?", (project_id,))
673711
row = cur.fetchone()
674-
if row:
675-
# Convert row to tuple of key-value pairs for immutability
676-
return tuple(dict(row).items())
677-
return None
712+
result = dict(row) if row else None
713+
if result:
714+
project_cache.set(cache_key, result)
715+
return result
678716
finally:
679717
conn.close()
680718

681719
return _retry_on_db_locked(_get)
682720

683721

684-
def get_project_by_id(project_id: str) -> Optional[Dict[str, Any]]:
685-
"""Get project metadata by ID with caching."""
686-
_init_registry_db()
687-
688-
registry_path = _get_projects_registry_path()
689-
cached_result = _get_project_by_id_cached(project_id, registry_path)
690-
691-
# Convert tuple back to dict
692-
return dict(cached_result) if cached_result else None
693-
694-
695722
def list_projects() -> List[Dict[str, Any]]:
696723
"""List all registered projects."""
697724
_init_registry_db()
@@ -737,7 +764,7 @@ def _update():
737764

738765
_retry_on_db_locked(_update)
739766
# Invalidate cache after update
740-
_get_project_by_id_cached.cache_clear()
767+
project_cache.invalidate(f"project:id:{project_id}")
741768

742769

743770
def update_project_settings(project_id: str, settings: Dict[str, Any]):
@@ -761,7 +788,7 @@ def _update():
761788

762789
_retry_on_db_locked(_update)
763790
# Invalidate cache after update
764-
_get_project_by_id_cached.cache_clear()
791+
project_cache.invalidate(f"project:id:{project_id}")
765792

766793

767794
def delete_project(project_id: str):
@@ -792,7 +819,9 @@ def _delete():
792819

793820
_retry_on_db_locked(_delete)
794821
# Invalidate cache after deletion
795-
_get_project_by_id_cached.cache_clear()
822+
project_cache.invalidate(f"project:id:{project_id}")
823+
if project.get("path"):
824+
project_cache.invalidate(f"project:path:{project['path']}")
796825

797826

798827
def get_or_create_project(project_path: str, name: Optional[str] = None) -> Dict[str, Any]:

0 commit comments

Comments
 (0)