Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions benchmarks/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
SimilarityMetricType,
)
from vcache.vcache_core.cache.eviction_policy.eviction_policy import EvictionPolicy
from vcache.vcache_core.cache.eviction_policy.strategies.scu import SCUEvictionPolicy
from vcache.vcache_core.cache.eviction_policy.strategies.mru import MRUEvictionPolicy
from vcache.vcache_core.similarity_evaluator import SimilarityEvaluator
from vcache.vcache_core.similarity_evaluator.strategies.benchmark_comparison import (
BenchmarkComparisonSimilarityEvaluator,
Expand Down Expand Up @@ -186,7 +186,7 @@ class Baseline(Enum):

Each baseline represents a different caching strategy:
- GPTCache: Static threshold-based caching
- VCacheLocal: vCache with local threshold adaptation
- VCacheLocal: vCache with local threshold adaptation (original vCache version)
- VCacheGlobal: vCache with global threshold adaptation
- BerkeleyEmbedding: Fine-tuned embeddings with static threshold
- VCacheBerkeleyEmbedding: vCache with fine-tuned embeddings
Expand All @@ -213,8 +213,11 @@ class Dataset(Enum):
(with relative paths from benchmarks/your_datasets/).
"""

# HuggingFace: https://huggingface.co/datasets/vCache/SemBenchmarkClassification
SEM_BENCHMARK_CLASSIFICATION = "vCache/SemBenchmarkClassification"
# HuggingFace: https://huggingface.co/datasets/vCache/SemBenchmarkLmArena
SEM_BENCHMARK_ARENA = "vCache/SemBenchmarkLmArena"
# HuggingFace: https://huggingface.co/datasets/vCache/SemBenchmarkSearchQueries
SEM_BENCHMARK_SEARCH_QUERIES = "vCache/SemBenchmarkSearchQueries"
# Example for custom dataset. The path is relative to 'benchmarks/your_datasets/'
CUSTOM_EXAMPLE = "your_datasets/your_custom_dataset.parquet"
Expand All @@ -235,7 +238,7 @@ class GeneratePlotsOnly(Enum):
### Benchmark Config ###################################################################################################
########################################################################################################################

CONFIDENCE_INTERVALS_ITERATIONS: int = 1
CONFIDENCE_INTERVALS_ITERATIONS: int = 3
DISABLE_PROGRESS_BAR: bool = False
KEEP_SPLIT: int = 100
MAX_VECTOR_DB_CAPACITY: int = 150000
Expand All @@ -252,33 +255,37 @@ class GeneratePlotsOnly(Enum):
int,
]
] = [
# vCache Paper: Figure 4 and 5 (top row)
(
EmbeddingModel.E5_LARGE_V2,
LargeLanguageModel.GPT_4O_MINI,
Dataset.SEM_BENCHMARK_ARENA,
GeneratePlotsOnly.NO,
BenchmarkComparisonSimilarityEvaluator(),
SCUEvictionPolicy(max_size=100000, watermark=0.99, eviction_percentage=0.1),
MRUEvictionPolicy(max_size=100000, watermark=0.99, eviction_percentage=0.1),
60000,
),
# vCache Paper: Figure 4 and 5 (bottom row)
(
EmbeddingModel.GTE,
LargeLanguageModel.LLAMA_3_8B,
Dataset.SEM_BENCHMARK_ARENA,
Dataset.SEM_BENCHMARK_CLASSIFICATION,
GeneratePlotsOnly.NO,
StringComparisonSimilarityEvaluator(),
SCUEvictionPolicy(max_size=100000, watermark=0.99, eviction_percentage=0.1),
MRUEvictionPolicy(max_size=100000, watermark=0.99, eviction_percentage=0.1),
45000,
),
# vCache Paper: Figure 6 and 7
(
EmbeddingModel.GTE,
LargeLanguageModel.LLAMA_3_8B,
Dataset.SEM_BENCHMARK_SEARCH_QUERIES,
GeneratePlotsOnly.NO,
BenchmarkComparisonSimilarityEvaluator(),
SCUEvictionPolicy(max_size=160000, watermark=0.99, eviction_percentage=0.1),
MRUEvictionPolicy(max_size=160000, watermark=0.99, eviction_percentage=0.1),
150000,
),
# Custom Dataset
(
EmbeddingModel.OPENAI_TEXT_EMBEDDING_SMALL,
LargeLanguageModel.GPT_4_1,
Expand All @@ -289,7 +296,7 @@ class GeneratePlotsOnly(Enum):
model_name="gpt-4.1-nano-2025-04-14", temperature=0.0
)
),
SCUEvictionPolicy(max_size=2000, watermark=0.99, eviction_percentage=0.1),
MRUEvictionPolicy(max_size=2000, watermark=0.99, eviction_percentage=0.1),
50,
),
]
Expand Down
30 changes: 26 additions & 4 deletions vcache/vcache_core/cache/eviction_policy/strategies/fifo.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,33 @@


class FIFOEvictionPolicy(EvictionPolicy):
"""
Implements a First-In, First-Out (FIFO) eviction policy.
def __init__(
self, max_size: int, watermark: float = 0.95, eviction_percentage: float = 0.1
):
"""
Implements a First-In, First-Out (FIFO) eviction policy.

This policy evicts items in the order they were added to the cache.
The eviction process is triggered when the number of items in the cache
exceeds a "high-watermark" threshold, which is a percentage of the
absolute `max_size`. Once triggered, the policy will evict a number
of items equivalent to `eviction_percentage` of the `max_size`.

Example:
With `max_size=1000`, `watermark=0.9`, and `eviction_percentage=0.2`,
eviction starts when the cache size grows beyond 900 items. The
policy will then remove 200 items (0.2 * 1000).

This policy evicts items in the order they were added to the cache.
"""
Args:
max_size: The absolute maximum number of items the cache can hold.
watermark: The percentage of `max_size` that triggers eviction.
eviction_percentage: The percentage of `max_size` to evict.
"""
super().__init__(
max_size=max_size,
watermark=watermark,
eviction_percentage=eviction_percentage,
)

def update_eviction_metadata(self, metadata: EmbeddingMetadataObj) -> None:
"""This method is not used in the FIFO policy."""
Expand Down
32 changes: 27 additions & 5 deletions vcache/vcache_core/cache/eviction_policy/strategies/lru.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,35 @@


class LRUEvictionPolicy(EvictionPolicy):
"""
Implements a Least Recently Used (LRU) eviction policy.
_MIN_DATETIME: datetime = datetime.min.replace(tzinfo=timezone.utc)

def __init__(
self, max_size: int, watermark: float = 0.95, eviction_percentage: float = 0.1
):
"""
Implements a Least Recently Used (LRU) eviction policy.

This policy evicts items that have not been accessed for the longest time.
"""
This policy evicts items that have not been accessed for the longest time.
The eviction process is triggered when the number of items in the cache
exceeds a "high-watermark" threshold, which is a percentage of the
absolute `max_size`. Once triggered, the policy will evict a number
of items equivalent to `eviction_percentage` of the `max_size`.

_MIN_DATETIME: datetime = datetime.min.replace(tzinfo=timezone.utc)
Example:
With `max_size=1000`, `watermark=0.9`, and `eviction_percentage=0.2`,
eviction starts when the cache size grows beyond 900 items. The
policy will then remove 200 items (0.2 * 1000).

Args:
max_size: The absolute maximum number of items the cache can hold.
watermark: The percentage of `max_size` that triggers eviction.
eviction_percentage: The percentage of `max_size` to evict.
"""
super().__init__(
max_size=max_size,
watermark=watermark,
eviction_percentage=eviction_percentage,
)

def update_eviction_metadata(self, metadata: EmbeddingMetadataObj) -> None:
"""Updates the metadata object's last-accessed timestamp.
Expand Down
34 changes: 28 additions & 6 deletions vcache/vcache_core/cache/eviction_policy/strategies/mru.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,36 @@


class MRUEvictionPolicy(EvictionPolicy):
"""
Implements a Most Recently Used (MRU) eviction policy.
_MIN_DATETIME: datetime = datetime.min.replace(tzinfo=timezone.utc)

def __init__(
self, max_size: int, watermark: float = 0.95, eviction_percentage: float = 0.1
):
"""
Implements a Most Recently Used (MRU) eviction policy.

This policy evicts items that have been accessed most recently. This can be
useful in scenarios where older items are more likely to be re-accessed.
"""
This policy evicts items that have been accessed most recently. This can be
useful in scenarios where older items are more likely to be re-accessed.
The eviction process is triggered when the number of items in the cache
exceeds a "high-watermark" threshold, which is a percentage of the
absolute `max_size`. Once triggered, the policy will evict a number
of items equivalent to `eviction_percentage` of the `max_size`.

_MIN_DATETIME: datetime = datetime.min.replace(tzinfo=timezone.utc)
Example:
With `max_size=1000`, `watermark=0.9`, and `eviction_percentage=0.2`,
eviction starts when the cache size grows beyond 900 items. The
policy will then remove 200 items (0.2 * 1000).

Args:
max_size: The absolute maximum number of items the cache can hold.
watermark: The percentage of `max_size` that triggers eviction.
eviction_percentage: The percentage of `max_size` to evict.
"""
super().__init__(
max_size=max_size,
watermark=watermark,
eviction_percentage=eviction_percentage,
)

def update_eviction_metadata(self, metadata: EmbeddingMetadataObj) -> None:
"""Updates the metadata object's last-accessed timestamp.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,14 @@


class NoEvictionPolicy(EvictionPolicy):
"""
A policy that represents the absence of an eviction strategy.

This policy never flags the cache as ready for eviction and never selects
any items to be removed. It is suitable for caches that are not size-limited
or for testing purposes.
"""

def __init__(self):
"""Initializes the NoEvictionPolicy."""
"""
A policy that represents the absence of an eviction strategy.

This policy never flags the cache as ready for eviction and never selects
any items to be removed. It is suitable for caches that are not size-limited
or for testing purposes.
"""
# Intentionally override the parent __init__ to ignore sizing parameters.
pass

Expand Down
36 changes: 29 additions & 7 deletions vcache/vcache_core/cache/eviction_policy/strategies/scu.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,37 @@


class SCUEvictionPolicy(EvictionPolicy):
"""
Implements the Sky Confident Utility (SCU) eviction policy.
def __init__(
self, max_size: int, watermark: float = 0.95, eviction_percentage: float = 0.1
):
"""
Implements the Sky Confident Utility (SCU) eviction policy.

IMPORTANT: This policy can only be used with the VCacheLocal policy.

This policy uses a Pareto-optimal, distance-from-ideal framework to select
victims for eviction, balancing an item's generality and the statistical
confidence in its performance.
The eviction process is triggered when the number of items in the cache
exceeds a "high-watermark" threshold, which is a percentage of the
absolute `max_size`. Once triggered, the policy will evict a number
of items equivalent to `eviction_percentage` of the `max_size`.

IMPORTANT: This policy can only be used with the VerifiedDecisionPolicy.
Example:
With `max_size=1000`, `watermark=0.9`, and `eviction_percentage=0.2`,
eviction starts when the cache size grows beyond 900 items. The
policy will then remove 200 items (0.2 * 1000).

This policy uses a Pareto-optimal, distance-from-ideal framework to select
victims for eviction, balancing an item's generality and the statistical
confidence in its performance.
"""
Args:
max_size: The absolute maximum number of items the cache can hold.
watermark: The percentage of `max_size` that triggers eviction.
eviction_percentage: The percentage of `max_size` to evict.
"""
super().__init__(
max_size=max_size,
watermark=watermark,
eviction_percentage=eviction_percentage,
)

def update_eviction_metadata(self, metadata: EmbeddingMetadataObj) -> None:
"""This method is not used in the SCU policy."""
Expand Down