Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 167 additions & 0 deletions src/uipath/_cli/_evals/_configurable_factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
"""Configurable runtime factory that supports model settings overrides."""

import json
import logging
import os
import tempfile
from pathlib import Path

from uipath.runtime import UiPathRuntimeFactoryProtocol, UiPathRuntimeProtocol

from ._models._evaluation_set import EvaluationSetModelSettings

logger = logging.getLogger(__name__)


class ConfigurableRuntimeFactory:
"""Wrapper factory that supports model settings overrides for evaluation runs.

This factory wraps an existing UiPathRuntimeFactoryProtocol implementation
and allows applying model settings overrides when creating runtimes.
"""

def __init__(self, base_factory: UiPathRuntimeFactoryProtocol):
"""Initialize with a base factory to wrap."""
self.base_factory = base_factory
self.model_settings_override: EvaluationSetModelSettings | None = None
self._temp_files: list[str] = []

def set_model_settings_override(
self, settings: EvaluationSetModelSettings | None
) -> None:
"""Set model settings to override when creating runtimes.

Args:
settings: The model settings to apply, or None to clear overrides
"""
self.model_settings_override = settings

async def new_runtime(
self, entrypoint: str, runtime_id: str
) -> UiPathRuntimeProtocol:
"""Create a new runtime with optional model settings overrides.

If model settings override is configured, creates a temporary modified
entrypoint file with the overridden settings.

Args:
entrypoint: Path to the agent entrypoint file
runtime_id: Unique identifier for the runtime instance

Returns:
A new runtime instance with overrides applied if configured
"""
# If no overrides, delegate directly to base factory
if not self.model_settings_override:
return await self.base_factory.new_runtime(entrypoint, runtime_id)

# Apply overrides by creating modified entrypoint
modified_entrypoint = self._apply_overrides(
entrypoint, self.model_settings_override
)
if modified_entrypoint:
# Track temp file for cleanup
self._temp_files.append(modified_entrypoint)
return await self.base_factory.new_runtime(modified_entrypoint, runtime_id)

# If override failed, fall back to original
return await self.base_factory.new_runtime(entrypoint, runtime_id)

def _apply_overrides(
self, entrypoint: str, settings: EvaluationSetModelSettings
) -> str | None:
"""Apply model settings overrides to an agent entrypoint.

Creates a temporary modified version of the entrypoint file with
the specified model settings overrides applied.

Args:
entrypoint: Path to the original entrypoint file
settings: Model settings to override

Returns:
Path to temporary modified entrypoint, or None if override not needed/failed
"""
if (
settings.model == "same-as-agent"
and settings.temperature == "same-as-agent"
):
logger.debug(
"Both model and temperature are 'same-as-agent', no override needed"
)
return None

entrypoint_path = Path(entrypoint)
if not entrypoint_path.exists():
logger.warning(f"Entrypoint file '{entrypoint_path}' not found")
return None

try:
with open(entrypoint_path, "r") as f:
agent_data = json.load(f)
except (json.JSONDecodeError, IOError) as e:
logger.error(f"Failed to load entrypoint file: {e}")
return None

original_settings = agent_data.get("settings", {})
modified_settings = original_settings.copy()

# Override model if not "same-as-agent"
if settings.model != "same-as-agent":
modified_settings["model"] = settings.model
logger.debug(
f"Overriding model: {original_settings.get('model')} -> {settings.model}"
)

# Override temperature if not "same-as-agent"
if settings.temperature not in ["same-as-agent", None]:
if isinstance(settings.temperature, (int, float)):
modified_settings["temperature"] = float(settings.temperature)
elif isinstance(settings.temperature, str):
try:
modified_settings["temperature"] = float(settings.temperature)
except ValueError:
logger.warning(
f"Invalid temperature value: '{settings.temperature}'"
)

if "temperature" in modified_settings:
logger.debug(
f"Overriding temperature: {original_settings.get('temperature')} -> "
f"{modified_settings['temperature']}"
)

if modified_settings == original_settings:
return None

agent_data["settings"] = modified_settings

# Create a temporary file with the modified agent definition
try:
temp_fd, temp_path = tempfile.mkstemp(
suffix=".json", prefix="agent_override_"
)
with os.fdopen(temp_fd, "w") as temp_file:
json.dump(agent_data, temp_file, indent=2)

logger.info(f"Created temporary entrypoint with overrides: {temp_path}")
return temp_path
except Exception as e:
logger.error(f"Failed to create temporary entrypoint file: {e}")
return None

async def dispose(self) -> None:
"""Dispose resources and clean up temporary files."""
# Clean up any temporary files created
for temp_file in self._temp_files:
try:
os.unlink(temp_file)
logger.debug(f"Cleaned up temporary file: {temp_file}")
except Exception as e:
logger.warning(f"Failed to clean up temporary file {temp_file}: {e}")

self._temp_files.clear()

# Delegate disposal to base factory
if hasattr(self.base_factory, "dispose"):
await self.base_factory.dispose()
13 changes: 11 additions & 2 deletions src/uipath/_cli/_evals/_models/_evaluation_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,20 @@ class ModelSettings(BaseModel):
"""Model Generation Parameters."""

model: str = Field(..., alias="model")
temperature: float | None = Field(default=None, alias="temperature")
temperature: float | str | None = Field(default=None, alias="temperature")
top_p: float | None = Field(default=None, alias="topP")
top_k: int | None = Field(default=None, alias="topK")
frequency_penalty: float | None = Field(default=None, alias="frequencyPenalty")
presence_penalty: float | None = Field(default=None, alias="presencePenalty")
max_tokens: int | None = Field(default=None, alias="maxTokens")


class EvaluationSetModelSettings(ModelSettings):
"""Model setting overrides within evaluation sets with ID."""

id: str = Field(..., alias="id")


class LLMMockingStrategy(BaseMockingStrategy):
type: Literal[MockingStrategyType.LLM] = MockingStrategyType.LLM
prompt: str = Field(..., alias="prompt")
Expand Down Expand Up @@ -211,6 +217,9 @@ class EvaluationSet(BaseModel):
default_factory=list, alias="evaluatorConfigs"
)
evaluations: list[EvaluationItem] = Field(default_factory=list)
model_settings: list[EvaluationSetModelSettings] = Field(
default_factory=list, alias="modelSettings"
)

def extract_selected_evals(self, eval_ids) -> None:
selected_evals: list[EvaluationItem] = []
Expand Down Expand Up @@ -239,7 +248,7 @@ class LegacyEvaluationSet(BaseModel):
name: str
batch_size: int = Field(10, alias="batchSize")
timeout_minutes: int = Field(default=20, alias="timeoutMinutes")
model_settings: list[dict[str, Any]] = Field(
model_settings: list[EvaluationSetModelSettings] = Field(
default_factory=list, alias="modelSettings"
)
created_at: str = Field(alias="createdAt")
Expand Down
58 changes: 57 additions & 1 deletion src/uipath/_cli/_evals/_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
from ...eval.models.models import AgentExecution, EvalItemResult
from .._utils._eval_set import EvalHelpers
from .._utils._parallelization import execute_parallel
from ._configurable_factory import ConfigurableRuntimeFactory
from ._evaluator_factory import EvaluatorFactory
from ._models._evaluation_set import (
EvaluationItem,
Expand Down Expand Up @@ -184,6 +185,7 @@ class UiPathEvalContext:
verbose: bool = False
enable_mocker_cache: bool = False
report_coverage: bool = False
model_settings_id: str = "default"


class UiPathEvalRuntime:
Expand All @@ -197,7 +199,8 @@ def __init__(
event_bus: EventBus,
):
self.context: UiPathEvalContext = context
self.factory: UiPathRuntimeFactoryProtocol = factory
# Wrap the factory to support model settings overrides
self.factory = ConfigurableRuntimeFactory(factory)
self.event_bus: EventBus = event_bus
self.trace_manager: UiPathTraceManager = trace_manager
self.span_exporter: ExecutionSpanExporter = ExecutionSpanExporter()
Expand All @@ -222,6 +225,10 @@ async def __aexit__(self, *args: Any) -> None:
self.coverage.stop()
self.coverage.report(include=["./*"], show_missing=True)

# Clean up any temporary files created by the factory
if hasattr(self.factory, "dispose"):
await self.factory.dispose()

async def get_schema(self, runtime: UiPathRuntimeProtocol) -> UiPathRuntimeSchema:
schema = await runtime.get_schema()
if schema is None:
Expand Down Expand Up @@ -550,12 +557,61 @@ def _get_and_clear_execution_data(

return spans, logs

async def _configure_model_settings_override(self) -> None:
"""Configure the factory with model settings override if specified."""
# Skip if no model settings ID specified
if (
not self.context.model_settings_id
or self.context.model_settings_id == "default"
):
return

# Load evaluation set to get model settings
evaluation_set, _ = EvalHelpers.load_eval_set(self.context.eval_set or "")
if (
not hasattr(evaluation_set, "model_settings")
or not evaluation_set.model_settings
):
logger.warning("No model settings available in evaluation set")
return

# Find the specified model settings
target_model_settings = next(
(
ms
for ms in evaluation_set.model_settings
if ms.id == self.context.model_settings_id
),
None,
)

if not target_model_settings:
logger.warning(
f"Model settings ID '{self.context.model_settings_id}' not found in evaluation set"
)
return

logger.info(
f"Configuring model settings override: id='{target_model_settings.id}', "
f"model='{target_model_settings.model}', temperature='{target_model_settings.temperature}'"
)

# Configure the factory with the override settings
self.factory.set_model_settings_override(target_model_settings)

async def execute_runtime(
self,
eval_item: EvaluationItem,
execution_id: str,
runtime: UiPathRuntimeProtocol,
) -> UiPathEvalRunExecutionOutput:
# Apply model settings override if specified
await self._configure_model_settings_override()

runtime = await self.factory.new_runtime(
entrypoint=self.context.entrypoint or "",
runtime_id=execution_id,
)
log_handler = self._setup_execution_logging(execution_id)
attributes = {
"evalId": eval_item.id,
Expand Down
1 change: 1 addition & 0 deletions src/uipath/_cli/_utils/_eval_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def migrate_evaluation_item(
migrate_evaluation_item(evaluation, eval_set.evaluator_refs)
for evaluation in eval_set.evaluations
],
model_settings=eval_set.model_settings,
)
except ValidationError as e:
raise ValueError(
Expand Down
9 changes: 9 additions & 0 deletions src/uipath/_cli/cli_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ def setup_reporting_prereq(no_report: bool) -> bool:
default=False,
help="Report evaluation coverage",
)
@click.option(
"--model-settings-id",
type=str,
default="default",
help="Model settings ID from evaluation set to override agent settings (default: 'default')",
)
def eval(
entrypoint: str | None,
eval_set: str | None,
Expand All @@ -102,6 +108,7 @@ def eval(
output_file: str | None,
enable_mocker_cache: bool,
report_coverage: bool,
model_settings_id: str,
) -> None:
"""Run an evaluation set against the agent.

Expand All @@ -114,6 +121,7 @@ def eval(
no_report: Do not report the evaluation results
enable_mocker_cache: Enable caching for LLM mocker responses
report_coverage: Report evaluation coverage
model_settings_id: Model settings ID to override agent settings
"""
should_register_progress_reporter = setup_reporting_prereq(no_report)

Expand Down Expand Up @@ -148,6 +156,7 @@ def eval(
eval_context.eval_set = resolved_eval_set_path
eval_context.eval_ids = eval_ids
eval_context.report_coverage = report_coverage
eval_context.model_settings_id = model_settings_id

try:

Expand Down
Loading