From 68a7b3722aff87361cdb5b5a6ce5a0b9df38b1e7 Mon Sep 17 00:00:00 2001 From: mathurk Date: Fri, 26 Dec 2025 16:17:47 -0800 Subject: [PATCH 1/6] feat: add model settings id parameter --- src/uipath/_cli/_evals/_runtime.py | 100 +++++++++++++++++++++++++++- src/uipath/_cli/_utils/_eval_set.py | 1 + src/uipath/_cli/cli_eval.py | 9 +++ 3 files changed, 109 insertions(+), 1 deletion(-) diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py index 23751d266..403bec993 100644 --- a/src/uipath/_cli/_evals/_runtime.py +++ b/src/uipath/_cli/_evals/_runtime.py @@ -1,5 +1,7 @@ import json import logging +import os +import tempfile import uuid from collections import defaultdict from contextlib import contextmanager @@ -44,12 +46,14 @@ from ...eval.evaluators import BaseEvaluator from ...eval.models import EvaluationResult from ...eval.models.models import AgentExecution, EvalItemResult +from .._utils._console import ConsoleLogger from .._utils._eval_set import EvalHelpers from .._utils._parallelization import execute_parallel from ._evaluator_factory import EvaluatorFactory from ._models._evaluation_set import ( EvaluationItem, EvaluationSet, + LegacyEvaluationSet, ) from ._models._exceptions import EvaluationRuntimeException from ._models._output import ( @@ -67,6 +71,7 @@ set_execution_context, ) +logger = logging.getLogger(__name__) class ExecutionSpanExporter(SpanExporter): """Custom exporter that stores spans grouped by execution ids.""" @@ -153,6 +158,7 @@ class UiPathEvalContext: verbose: bool = False enable_mocker_cache: bool = False report_coverage: bool = False + model_settings_id: str = "default" class UiPathEvalRuntime: @@ -513,11 +519,97 @@ def _get_and_clear_execution_data( return spans, logs + async def _apply_model_settings_override(self) -> str | None: + """Apply model settings override if specified. + + Returns: + Modified entrypoint path if settings were overridden, otherwise None + """ + console = ConsoleLogger() + console.info(f"Checking model settings override with ID: '{self.context.model_settings_id}'") + + # Skip if no model settings ID specified + if not self.context.model_settings_id or self.context.model_settings_id == "default": + return None + + # Load evaluation set to get model settings + evaluation_set, _ = EvalHelpers.load_eval_set(self.context.eval_set or "") + if not hasattr(evaluation_set, 'model_settings') or not evaluation_set.model_settings: + console.warning("No model settings available in evaluation set") + return None + + # Find the specified model settings + target_model_settings = next( + (ms for ms in evaluation_set.model_settings if ms.id == self.context.model_settings_id), + None + ) + + if not target_model_settings: + logger.warning(f"Model settings ID '{self.context.model_settings_id}' not found in evaluation set") + return None + + console.info(f"Found model settings: model='{target_model_settings.model_name}', temperature='{target_model_settings.temperature}'") + + # Early exit: if both values are "same-as-agent", no override needed + if (target_model_settings.model_name == "same-as-agent" and + target_model_settings.temperature == "same-as-agent"): + console.info("Both model and temperature are 'same-as-agent', no override needed") + return None + + # Load the original entrypoint file + entrypoint_path = Path(self.context.entrypoint or "agent.json") + if not entrypoint_path.exists(): + console.warning(f"Entrypoint file '{entrypoint_path}' not found, model settings override not applicable") + return None + + with open(entrypoint_path, 'r') as f: + agent_data = json.load(f) + + # Apply model settings overrides + settings = agent_data.get("settings", {}) + original_model = settings.get("model", "") + original_temperature = settings.get("temperature", 0.0) + + console.info(f"Original agent settings: model='{original_model}', temperature={original_temperature}") + + # Override model if not "same-as-agent" + if target_model_settings.model_name != "same-as-agent": + settings["model"] = target_model_settings.model_name + + # Override temperature if not "same-as-agent" + if target_model_settings.temperature != "same-as-agent": + try: + settings["temperature"] = float(target_model_settings.temperature) + except ValueError: + logger.warning(f"Invalid temperature value: '{target_model_settings.temperature}', keeping original") + + agent_data["settings"] = settings + + # Create a temporary file with the modified agent definition + temp_fd, temp_path = tempfile.mkstemp(suffix=".json", prefix="agent_override_") + try: + with os.fdopen(temp_fd, 'w') as temp_file: + json.dump(agent_data, temp_file, indent=2) + + console.info(f"Applied model settings override: model='{settings.get('model', '')}', temperature={settings.get('temperature', 0.0)}") + return temp_path + except Exception as e: + logger.error(f"Failed to create temporary agent file: {e}") + try: + os.unlink(temp_path) + except: + pass + return None + async def execute_runtime( self, eval_item: EvaluationItem, execution_id: str ) -> UiPathEvalRunExecutionOutput: + # Apply model settings override if needed + overridden_entrypoint = await self._apply_model_settings_override() + entrypoint_to_use = overridden_entrypoint or self.context.entrypoint + runtime = await self.factory.new_runtime( - entrypoint=self.context.entrypoint or "", + entrypoint=entrypoint_to_use or "", runtime_id=execution_id, ) log_handler = self._setup_execution_logging(execution_id) @@ -551,6 +643,12 @@ async def execute_runtime( finally: await runtime.dispose() + # Clean up temporary file if it was created + if overridden_entrypoint and overridden_entrypoint != (self.context.entrypoint or ""): + try: + os.unlink(overridden_entrypoint) + except Exception as e: + logger.warning(f"Failed to clean up temporary agent file: {e}") end_time = time() spans, logs = self._get_and_clear_execution_data(execution_id) diff --git a/src/uipath/_cli/_utils/_eval_set.py b/src/uipath/_cli/_utils/_eval_set.py index ffb352be0..0be35bff0 100644 --- a/src/uipath/_cli/_utils/_eval_set.py +++ b/src/uipath/_cli/_utils/_eval_set.py @@ -149,6 +149,7 @@ def migrate_evaluation_item( migrate_evaluation_item(evaluation, eval_set.evaluator_refs) for evaluation in eval_set.evaluations ], + model_settings=eval_set.model_settings, # Preserve model settings ) except ValidationError as e: raise ValueError( diff --git a/src/uipath/_cli/cli_eval.py b/src/uipath/_cli/cli_eval.py index a4a02108b..d8fa76daf 100644 --- a/src/uipath/_cli/cli_eval.py +++ b/src/uipath/_cli/cli_eval.py @@ -92,6 +92,12 @@ def setup_reporting_prereq(no_report: bool) -> bool: default=False, help="Report evaluation coverage", ) +@click.option( + "--model-settings-id", + type=str, + default="default", + help="Model settings ID from evaluation set to override agent settings (default: 'default')", +) def eval( entrypoint: str | None, eval_set: str | None, @@ -102,6 +108,7 @@ def eval( output_file: str | None, enable_mocker_cache: bool, report_coverage: bool, + model_settings_id: str, ) -> None: """Run an evaluation set against the agent. @@ -114,6 +121,7 @@ def eval( no_report: Do not report the evaluation results enable_mocker_cache: Enable caching for LLM mocker responses report_coverage: Report evaluation coverage + model_settings_id: Model settings ID to override agent settings """ should_register_progress_reporter = setup_reporting_prereq(no_report) @@ -148,6 +156,7 @@ def eval( eval_context.eval_set = resolved_eval_set_path eval_context.eval_ids = eval_ids eval_context.report_coverage = report_coverage + eval_context.model_settings_id = model_settings_id try: From 00d73e8d65605ac38e4bc3dfa1c23f285664fb5d Mon Sep 17 00:00:00 2001 From: mathurk Date: Fri, 26 Dec 2025 21:28:39 -0800 Subject: [PATCH 2/6] fix: lint error --- src/uipath/_cli/_evals/_models/_evaluation_set.py | 13 ++++++++++++- src/uipath/_cli/_utils/_eval_set.py | 1 - 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/uipath/_cli/_evals/_models/_evaluation_set.py b/src/uipath/_cli/_evals/_models/_evaluation_set.py index 78a2fb6f5..a5ab5b703 100644 --- a/src/uipath/_cli/_evals/_models/_evaluation_set.py +++ b/src/uipath/_cli/_evals/_models/_evaluation_set.py @@ -78,6 +78,14 @@ class ModelSettings(BaseModel): max_tokens: int | None = Field(default=None, alias="maxTokens") +class EvaluationSetModelSettings(BaseModel): + """Model settings configuration for evaluation sets.""" + + id: str = Field(..., alias="id") + model_name: str = Field(..., alias="modelName") + temperature: str = Field(..., alias="temperature") # Can be "same-as-agent" or numeric string + + class LLMMockingStrategy(BaseMockingStrategy): type: Literal[MockingStrategyType.LLM] = MockingStrategyType.LLM prompt: str = Field(..., alias="prompt") @@ -211,6 +219,9 @@ class EvaluationSet(BaseModel): default_factory=list, alias="evaluatorConfigs" ) evaluations: list[EvaluationItem] = Field(default_factory=list) + model_settings: list[EvaluationSetModelSettings] = Field( + default_factory=list, alias="modelSettings" + ) def extract_selected_evals(self, eval_ids) -> None: selected_evals: list[EvaluationItem] = [] @@ -239,7 +250,7 @@ class LegacyEvaluationSet(BaseModel): name: str batch_size: int = Field(10, alias="batchSize") timeout_minutes: int = Field(default=20, alias="timeoutMinutes") - model_settings: list[dict[str, Any]] = Field( + model_settings: list[EvaluationSetModelSettings] = Field( default_factory=list, alias="modelSettings" ) created_at: str = Field(alias="createdAt") diff --git a/src/uipath/_cli/_utils/_eval_set.py b/src/uipath/_cli/_utils/_eval_set.py index 0be35bff0..ffb352be0 100644 --- a/src/uipath/_cli/_utils/_eval_set.py +++ b/src/uipath/_cli/_utils/_eval_set.py @@ -149,7 +149,6 @@ def migrate_evaluation_item( migrate_evaluation_item(evaluation, eval_set.evaluator_refs) for evaluation in eval_set.evaluations ], - model_settings=eval_set.model_settings, # Preserve model settings ) except ValidationError as e: raise ValueError( From 5bd31a50f6aeb531a76fc7230d388b68e3f2e0a8 Mon Sep 17 00:00:00 2001 From: mathurk Date: Mon, 29 Dec 2025 23:10:35 -0800 Subject: [PATCH 3/6] fix: wrapper factory to update model settings --- .../_cli/_evals/_configurable_factory.py | 168 ++++++++++++++++++ .../_cli/_evals/_models/_evaluation_set.py | 14 +- src/uipath/_cli/_evals/_runtime.py | 101 +++-------- 3 files changed, 201 insertions(+), 82 deletions(-) create mode 100644 src/uipath/_cli/_evals/_configurable_factory.py diff --git a/src/uipath/_cli/_evals/_configurable_factory.py b/src/uipath/_cli/_evals/_configurable_factory.py new file mode 100644 index 000000000..680a37210 --- /dev/null +++ b/src/uipath/_cli/_evals/_configurable_factory.py @@ -0,0 +1,168 @@ +"""Configurable runtime factory that supports model settings overrides.""" + +import json +import os +import tempfile +import logging +from pathlib import Path +from typing import Any + + UiPathRuntimeFactoryProtocol, + UiPathRuntimeProtocol, +) + +from ._models._evaluation_set import EvaluationSetModelSettings + +logger = logging.getLogger(__name__) + + +class ConfigurableRuntimeFactory: + """ + Wrapper factory that supports model settings overrides for evaluation runs. + + This factory wraps an existing UiPathRuntimeFactoryProtocol implementation + and allows applying model settings overrides when creating runtimes. + """ + + def __init__(self, base_factory: UiPathRuntimeFactoryProtocol): + """Initialize with a base factory to wrap.""" + self.base_factory = base_factory + self.model_settings_override: EvaluationSetModelSettings | None = None + self._temp_files: list[str] = [] + + def set_model_settings_override(self, settings: EvaluationSetModelSettings | None) -> None: + """ + Set model settings to override when creating runtimes. + + Args: + settings: The model settings to apply, or None to clear overrides + """ + self.model_settings_override = settings + + async def new_runtime(self, entrypoint: str, runtime_id: str) -> UiPathRuntimeProtocol: + """ + Create a new runtime with optional model settings overrides. + + If model settings override is configured, creates a temporary modified + entrypoint file with the overridden settings. + + Args: + entrypoint: Path to the agent entrypoint file + runtime_id: Unique identifier for the runtime instance + + Returns: + A new runtime instance with overrides applied if configured + """ + # If no overrides, delegate directly to base factory + if not self.model_settings_override: + return await self.base_factory.new_runtime(entrypoint, runtime_id) + + # Apply overrides by creating modified entrypoint + modified_entrypoint = self._apply_overrides(entrypoint, self.model_settings_override) + if modified_entrypoint: + # Track temp file for cleanup + self._temp_files.append(modified_entrypoint) + return await self.base_factory.new_runtime(modified_entrypoint, runtime_id) + + # If override failed, fall back to original + return await self.base_factory.new_runtime(entrypoint, runtime_id) + + def _apply_overrides(self, entrypoint: str, settings: EvaluationSetModelSettings) -> str | None: + """ + Apply model settings overrides to an agent entrypoint. + + Creates a temporary modified version of the entrypoint file with + the specified model settings overrides applied. + + Args: + entrypoint: Path to the original entrypoint file + settings: Model settings to override + + Returns: + Path to temporary modified entrypoint, or None if override not needed/failed + """ + if settings.model == "same-as-agent" and settings.temperature == "same-as-agent": + logger.debug("Both model and temperature are 'same-as-agent', no override needed") + return None + + entrypoint_path = Path(entrypoint) + if not entrypoint_path.exists(): + logger.warning(f"Entrypoint file '{entrypoint_path}' not found") + return None + + try: + with open(entrypoint_path, 'r') as f: + agent_data = json.load(f) + except (json.JSONDecodeError, IOError) as e: + logger.error(f"Failed to load entrypoint file: {e}") + return None + + original_settings = agent_data.get("settings", {}) + modified_settings = original_settings.copy() + + # Override model if not "same-as-agent" + if settings.model != "same-as-agent": + modified_settings["model"] = settings.model + logger.debug(f"Overriding model: {original_settings.get('model')} -> {settings.model}") + + # Override temperature if not "same-as-agent" + if settings.temperature not in ["same-as-agent", None]: + if isinstance(settings.temperature, (int, float)): + modified_settings["temperature"] = float(settings.temperature) + elif isinstance(settings.temperature, str): + try: + modified_settings["temperature"] = float(settings.temperature) + except ValueError: + logger.warning(f"Invalid temperature value: '{settings.temperature}'") + + if "temperature" in modified_settings: + logger.debug( + f"Overriding temperature: {original_settings.get('temperature')} -> " + f"{modified_settings['temperature']}" + ) + + if modified_settings == original_settings: + return None + + agent_data["settings"] = modified_settings + + # Create a temporary file with the modified agent definition + try: + temp_fd, temp_path = tempfile.mkstemp(suffix=".json", prefix="agent_override_") + with os.fdopen(temp_fd, 'w') as temp_file: + json.dump(agent_data, temp_file, indent=2) + + logger.info(f"Created temporary entrypoint with overrides: {temp_path}") + return temp_path + except Exception as e: + logger.error(f"Failed to create temporary entrypoint file: {e}") + return None + + async def dispose(self) -> None: + """Dispose resources and clean up temporary files.""" + # Clean up any temporary files created + for temp_file in self._temp_files: + try: + os.unlink(temp_file) + logger.debug(f"Cleaned up temporary file: {temp_file}") + except Exception as e: + logger.warning(f"Failed to clean up temporary file {temp_file}: {e}") + + self._temp_files.clear() + + # Delegate disposal to base factory + if hasattr(self.base_factory, 'dispose'): + await self.base_factory.dispose() + + # Delegate other factory protocol methods to base factory + async def discover_entrypoints(self) -> list[str]: + """Discover available entrypoints from the base factory.""" + if hasattr(self.base_factory, 'discover_entrypoints'): + return await self.base_factory.discover_entrypoints() + return [] + + async def discover_runtimes(self) -> list[UiPathRuntimeProtocol]: + """Discover available runtimes from the base factory.""" + if hasattr(self.base_factory, 'discover_runtimes'): + return await self.base_factory.discover_runtimes() + return [] \ No newline at end of file diff --git a/src/uipath/_cli/_evals/_models/_evaluation_set.py b/src/uipath/_cli/_evals/_models/_evaluation_set.py index a5ab5b703..780421ae8 100644 --- a/src/uipath/_cli/_evals/_models/_evaluation_set.py +++ b/src/uipath/_cli/_evals/_models/_evaluation_set.py @@ -78,12 +78,18 @@ class ModelSettings(BaseModel): max_tokens: int | None = Field(default=None, alias="maxTokens") -class EvaluationSetModelSettings(BaseModel): - """Model settings configuration for evaluation sets.""" +class EvaluationSetModelSettings(ModelSettings): + """Model settings configuration for evaluation sets with ID and special values support. + + Extends ModelSettings to add an ID field and support for "same-as-agent" special value + in model and temperature fields for evaluation-specific overrides. + """ id: str = Field(..., alias="id") - model_name: str = Field(..., alias="modelName") - temperature: str = Field(..., alias="temperature") # Can be "same-as-agent" or numeric string + # Override model to keep it required and support "same-as-agent" + model: str = Field(..., alias="model") # Can be "same-as-agent" or actual model name + # Override temperature to support "same-as-agent" as string + temperature: float | str | None = Field(default=None, alias="temperature") # Can be "same-as-agent", float, or None class LLMMockingStrategy(BaseMockingStrategy): diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py index d15419cf3..24b3d7159 100644 --- a/src/uipath/_cli/_evals/_runtime.py +++ b/src/uipath/_cli/_evals/_runtime.py @@ -1,7 +1,5 @@ import json import logging -import os -import tempfile import uuid from collections import defaultdict from contextlib import contextmanager @@ -55,15 +53,16 @@ from ...eval.evaluators import BaseEvaluator from ...eval.models import EvaluationResult from ...eval.models.models import AgentExecution, EvalItemResult -from .._utils._console import ConsoleLogger from .._utils._eval_set import EvalHelpers from .._utils._parallelization import execute_parallel from ._evaluator_factory import EvaluatorFactory from ._models._evaluation_set import ( EvaluationItem, EvaluationSet, + EvaluationSetModelSettings, LegacyEvaluationSet, ) +from ._configurable_factory import ConfigurableRuntimeFactory from ._models._exceptions import EvaluationRuntimeException from ._models._output import ( EvaluationResultDto, @@ -200,7 +199,8 @@ def __init__( event_bus: EventBus, ): self.context: UiPathEvalContext = context - self.factory: UiPathRuntimeFactoryProtocol = factory + # Wrap the factory to support model settings overrides + self.factory = ConfigurableRuntimeFactory(factory) self.event_bus: EventBus = event_bus self.trace_manager: UiPathTraceManager = trace_manager self.span_exporter: ExecutionSpanExporter = ExecutionSpanExporter() @@ -228,6 +228,10 @@ async def __aexit__(self, *args: Any) -> None: self.coverage.stop() self.coverage.report(include=["./*"], show_missing=True) + # Clean up any temporary files created by the factory + if hasattr(self.factory, 'dispose'): + await self.factory.dispose() + async def _ensure_metadata_loaded(self) -> None: """Load metadata (schema, agent model) from a single temporary runtime. @@ -568,24 +572,17 @@ def _get_and_clear_execution_data( return spans, logs - async def _apply_model_settings_override(self) -> str | None: - """Apply model settings override if specified. - - Returns: - Modified entrypoint path if settings were overridden, otherwise None - """ - console = ConsoleLogger() - console.info(f"Checking model settings override with ID: '{self.context.model_settings_id}'") - + async def _configure_model_settings_override(self) -> None: + """Configure the factory with model settings override if specified.""" # Skip if no model settings ID specified if not self.context.model_settings_id or self.context.model_settings_id == "default": - return None + return # Load evaluation set to get model settings evaluation_set, _ = EvalHelpers.load_eval_set(self.context.eval_set or "") if not hasattr(evaluation_set, 'model_settings') or not evaluation_set.model_settings: - console.warning("No model settings available in evaluation set") - return None + logger.warning("No model settings available in evaluation set") + return # Find the specified model settings target_model_settings = next( @@ -595,70 +592,24 @@ async def _apply_model_settings_override(self) -> str | None: if not target_model_settings: logger.warning(f"Model settings ID '{self.context.model_settings_id}' not found in evaluation set") - return None - - console.info(f"Found model settings: model='{target_model_settings.model_name}', temperature='{target_model_settings.temperature}'") - - # Early exit: if both values are "same-as-agent", no override needed - if (target_model_settings.model_name == "same-as-agent" and - target_model_settings.temperature == "same-as-agent"): - console.info("Both model and temperature are 'same-as-agent', no override needed") - return None - - # Load the original entrypoint file - entrypoint_path = Path(self.context.entrypoint or "agent.json") - if not entrypoint_path.exists(): - console.warning(f"Entrypoint file '{entrypoint_path}' not found, model settings override not applicable") - return None - - with open(entrypoint_path, 'r') as f: - agent_data = json.load(f) - - # Apply model settings overrides - settings = agent_data.get("settings", {}) - original_model = settings.get("model", "") - original_temperature = settings.get("temperature", 0.0) - - console.info(f"Original agent settings: model='{original_model}', temperature={original_temperature}") - - # Override model if not "same-as-agent" - if target_model_settings.model_name != "same-as-agent": - settings["model"] = target_model_settings.model_name - - # Override temperature if not "same-as-agent" - if target_model_settings.temperature != "same-as-agent": - try: - settings["temperature"] = float(target_model_settings.temperature) - except ValueError: - logger.warning(f"Invalid temperature value: '{target_model_settings.temperature}', keeping original") - - agent_data["settings"] = settings + return - # Create a temporary file with the modified agent definition - temp_fd, temp_path = tempfile.mkstemp(suffix=".json", prefix="agent_override_") - try: - with os.fdopen(temp_fd, 'w') as temp_file: - json.dump(agent_data, temp_file, indent=2) + logger.info( + f"Configuring model settings override: id='{target_model_settings.id}', " + f"model='{target_model_settings.model}', temperature='{target_model_settings.temperature}'" + ) - console.info(f"Applied model settings override: model='{settings.get('model', '')}', temperature={settings.get('temperature', 0.0)}") - return temp_path - except Exception as e: - logger.error(f"Failed to create temporary agent file: {e}") - try: - os.unlink(temp_path) - except: - pass - return None + # Configure the factory with the override settings + self.factory.set_model_settings_override(target_model_settings) async def execute_runtime( self, eval_item: EvaluationItem, execution_id: str ) -> UiPathEvalRunExecutionOutput: - # Apply model settings override if needed - overridden_entrypoint = await self._apply_model_settings_override() - entrypoint_to_use = overridden_entrypoint or self.context.entrypoint + # Apply model settings override if specified + await self._configure_model_settings_override() runtime = await self.factory.new_runtime( - entrypoint=entrypoint_to_use or "", + entrypoint=self.context.entrypoint or "", runtime_id=execution_id, ) log_handler = self._setup_execution_logging(execution_id) @@ -692,12 +643,6 @@ async def execute_runtime( finally: await runtime.dispose() - # Clean up temporary file if it was created - if overridden_entrypoint and overridden_entrypoint != (self.context.entrypoint or ""): - try: - os.unlink(overridden_entrypoint) - except Exception as e: - logger.warning(f"Failed to clean up temporary agent file: {e}") end_time = time() spans, logs = self._get_and_clear_execution_data(execution_id) From 6c2ac85a415909dc456016c3e4015315074aedc2 Mon Sep 17 00:00:00 2001 From: mathurk Date: Mon, 29 Dec 2025 23:21:07 -0800 Subject: [PATCH 4/6] fix: add test --- .../_cli/_evals/_configurable_factory.py | 4 +- tests/cli/eval/test_configurable_factory.py | 210 ++++++++++++++++++ 2 files changed, 211 insertions(+), 3 deletions(-) create mode 100644 tests/cli/eval/test_configurable_factory.py diff --git a/src/uipath/_cli/_evals/_configurable_factory.py b/src/uipath/_cli/_evals/_configurable_factory.py index 680a37210..3f3ed31f5 100644 --- a/src/uipath/_cli/_evals/_configurable_factory.py +++ b/src/uipath/_cli/_evals/_configurable_factory.py @@ -7,9 +7,7 @@ from pathlib import Path from typing import Any - UiPathRuntimeFactoryProtocol, - UiPathRuntimeProtocol, -) +from uipath.runtime import UiPathRuntimeFactoryProtocol, UiPathRuntimeProtocol from ._models._evaluation_set import EvaluationSetModelSettings diff --git a/tests/cli/eval/test_configurable_factory.py b/tests/cli/eval/test_configurable_factory.py new file mode 100644 index 000000000..247b37bcd --- /dev/null +++ b/tests/cli/eval/test_configurable_factory.py @@ -0,0 +1,210 @@ +"""Tests for ConfigurableRuntimeFactory.""" + +import json +import tempfile +import pytest +from pathlib import Path +from unittest.mock import AsyncMock, Mock + +from uipath._cli._evals._configurable_factory import ConfigurableRuntimeFactory +from uipath._cli._evals._models._evaluation_set import EvaluationSetModelSettings + + +@pytest.mark.asyncio +async def test_configurable_factory_no_override(): + """Test factory without any overrides.""" + mock_base_factory = AsyncMock() + mock_runtime = Mock() + mock_base_factory.new_runtime.return_value = mock_runtime + + factory = ConfigurableRuntimeFactory(mock_base_factory) + + result = await factory.new_runtime("test.json", "test-id") + + assert result == mock_runtime + mock_base_factory.new_runtime.assert_called_once_with("test.json", "test-id") + + +@pytest.mark.asyncio +async def test_configurable_factory_with_model_override(): + """Test factory with model override.""" + # Create a temporary agent.json file + test_agent = { + "settings": { + "model": "gpt-4", + "temperature": 0.7 + } + } + + with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: + json.dump(test_agent, f) + temp_path = f.name + + try: + mock_base_factory = AsyncMock() + mock_runtime = Mock() + mock_base_factory.new_runtime.return_value = mock_runtime + + factory = ConfigurableRuntimeFactory(mock_base_factory) + + # Set model override + settings = EvaluationSetModelSettings( + id="test-settings", + model="gpt-3.5-turbo", + temperature="same-as-agent" + ) + factory.set_model_settings_override(settings) + + result = await factory.new_runtime(temp_path, "test-id") + + assert result == mock_runtime + # Should have been called with a modified temp file + call_args = mock_base_factory.new_runtime.call_args + assert call_args[0][0] != temp_path # Different path (temp file) + assert call_args[0][1] == "test-id" + + # Verify the temp file has correct content + with open(call_args[0][0]) as f: + modified_data = json.load(f) + assert modified_data["settings"]["model"] == "gpt-3.5-turbo" + assert modified_data["settings"]["temperature"] == 0.7 # Unchanged + + finally: + Path(temp_path).unlink(missing_ok=True) + # Clean up temp files created by factory + await factory.dispose() + + +@pytest.mark.asyncio +async def test_configurable_factory_same_as_agent(): + """Test factory when both settings are 'same-as-agent'.""" + # Create a temporary agent.json file + test_agent = { + "settings": { + "model": "gpt-4", + "temperature": 0.7 + } + } + + with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: + json.dump(test_agent, f) + temp_path = f.name + + try: + mock_base_factory = AsyncMock() + mock_runtime = Mock() + mock_base_factory.new_runtime.return_value = mock_runtime + + factory = ConfigurableRuntimeFactory(mock_base_factory) + + # Set "same-as-agent" for both + settings = EvaluationSetModelSettings( + id="test-settings", + model="same-as-agent", + temperature="same-as-agent" + ) + factory.set_model_settings_override(settings) + + result = await factory.new_runtime(temp_path, "test-id") + + assert result == mock_runtime + # Should use original path (no override) + mock_base_factory.new_runtime.assert_called_once_with(temp_path, "test-id") + + finally: + Path(temp_path).unlink(missing_ok=True) + + +@pytest.mark.asyncio +async def test_configurable_factory_temperature_override(): + """Test factory with temperature override.""" + # Create a temporary agent.json file + test_agent = { + "settings": { + "model": "gpt-4", + "temperature": 0.7 + } + } + + with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: + json.dump(test_agent, f) + temp_path = f.name + + try: + mock_base_factory = AsyncMock() + mock_runtime = Mock() + mock_base_factory.new_runtime.return_value = mock_runtime + + factory = ConfigurableRuntimeFactory(mock_base_factory) + + # Set temperature override + settings = EvaluationSetModelSettings( + id="test-settings", + model="same-as-agent", + temperature=0.2 + ) + factory.set_model_settings_override(settings) + + result = await factory.new_runtime(temp_path, "test-id") + + assert result == mock_runtime + # Should have been called with a modified temp file + call_args = mock_base_factory.new_runtime.call_args + assert call_args[0][0] != temp_path # Different path (temp file) + + # Verify the temp file has correct content + with open(call_args[0][0]) as f: + modified_data = json.load(f) + assert modified_data["settings"]["model"] == "gpt-4" # Unchanged + assert modified_data["settings"]["temperature"] == 0.2 # Changed + + finally: + Path(temp_path).unlink(missing_ok=True) + await factory.dispose() + + +@pytest.mark.asyncio +async def test_configurable_factory_cleanup(): + """Test that temporary files are cleaned up.""" + test_agent = { + "settings": { + "model": "gpt-4", + "temperature": 0.7 + } + } + + with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: + json.dump(test_agent, f) + temp_path = f.name + + try: + mock_base_factory = AsyncMock() + mock_runtime = Mock() + mock_base_factory.new_runtime.return_value = mock_runtime + + factory = ConfigurableRuntimeFactory(mock_base_factory) + + settings = EvaluationSetModelSettings( + id="test-settings", + model="gpt-3.5-turbo", + temperature=0.5 + ) + factory.set_model_settings_override(settings) + + await factory.new_runtime(temp_path, "test-id") + + # Get the temp file created + call_args = mock_base_factory.new_runtime.call_args + temp_file_created = call_args[0][0] + + # Temp file should exist + assert Path(temp_file_created).exists() + + # Clean up + await factory.dispose() + + # Temp file should be deleted + assert not Path(temp_file_created).exists() + + finally: + Path(temp_path).unlink(missing_ok=True) \ No newline at end of file From 5a027428271419167740176d50f58d182bad9a97 Mon Sep 17 00:00:00 2001 From: mathurk Date: Tue, 30 Dec 2025 16:27:37 -0800 Subject: [PATCH 5/6] fix: linting --- .../_cli/_evals/_configurable_factory.py | 31 +++++-------------- .../_cli/_evals/_models/_evaluation_set.py | 12 ++----- src/uipath/_cli/_utils/_eval_set.py | 1 + 3 files changed, 10 insertions(+), 34 deletions(-) diff --git a/src/uipath/_cli/_evals/_configurable_factory.py b/src/uipath/_cli/_evals/_configurable_factory.py index 3f3ed31f5..22f580173 100644 --- a/src/uipath/_cli/_evals/_configurable_factory.py +++ b/src/uipath/_cli/_evals/_configurable_factory.py @@ -1,11 +1,11 @@ """Configurable runtime factory that supports model settings overrides.""" +import inspect import json +import logging import os import tempfile -import logging from pathlib import Path -from typing import Any from uipath.runtime import UiPathRuntimeFactoryProtocol, UiPathRuntimeProtocol @@ -15,8 +15,7 @@ class ConfigurableRuntimeFactory: - """ - Wrapper factory that supports model settings overrides for evaluation runs. + """Wrapper factory that supports model settings overrides for evaluation runs. This factory wraps an existing UiPathRuntimeFactoryProtocol implementation and allows applying model settings overrides when creating runtimes. @@ -29,8 +28,7 @@ def __init__(self, base_factory: UiPathRuntimeFactoryProtocol): self._temp_files: list[str] = [] def set_model_settings_override(self, settings: EvaluationSetModelSettings | None) -> None: - """ - Set model settings to override when creating runtimes. + """Set model settings to override when creating runtimes. Args: settings: The model settings to apply, or None to clear overrides @@ -38,8 +36,7 @@ def set_model_settings_override(self, settings: EvaluationSetModelSettings | Non self.model_settings_override = settings async def new_runtime(self, entrypoint: str, runtime_id: str) -> UiPathRuntimeProtocol: - """ - Create a new runtime with optional model settings overrides. + """Create a new runtime with optional model settings overrides. If model settings override is configured, creates a temporary modified entrypoint file with the overridden settings. @@ -66,8 +63,7 @@ async def new_runtime(self, entrypoint: str, runtime_id: str) -> UiPathRuntimePr return await self.base_factory.new_runtime(entrypoint, runtime_id) def _apply_overrides(self, entrypoint: str, settings: EvaluationSetModelSettings) -> str | None: - """ - Apply model settings overrides to an agent entrypoint. + """Apply model settings overrides to an agent entrypoint. Creates a temporary modified version of the entrypoint file with the specified model settings overrides applied. @@ -150,17 +146,4 @@ async def dispose(self) -> None: # Delegate disposal to base factory if hasattr(self.base_factory, 'dispose'): - await self.base_factory.dispose() - - # Delegate other factory protocol methods to base factory - async def discover_entrypoints(self) -> list[str]: - """Discover available entrypoints from the base factory.""" - if hasattr(self.base_factory, 'discover_entrypoints'): - return await self.base_factory.discover_entrypoints() - return [] - - async def discover_runtimes(self) -> list[UiPathRuntimeProtocol]: - """Discover available runtimes from the base factory.""" - if hasattr(self.base_factory, 'discover_runtimes'): - return await self.base_factory.discover_runtimes() - return [] \ No newline at end of file + await self.base_factory.dispose() \ No newline at end of file diff --git a/src/uipath/_cli/_evals/_models/_evaluation_set.py b/src/uipath/_cli/_evals/_models/_evaluation_set.py index 780421ae8..0f85875a9 100644 --- a/src/uipath/_cli/_evals/_models/_evaluation_set.py +++ b/src/uipath/_cli/_evals/_models/_evaluation_set.py @@ -70,7 +70,7 @@ class ModelSettings(BaseModel): """Model Generation Parameters.""" model: str = Field(..., alias="model") - temperature: float | None = Field(default=None, alias="temperature") + temperature: float | str | None = Field(default=None, alias="temperature") top_p: float | None = Field(default=None, alias="topP") top_k: int | None = Field(default=None, alias="topK") frequency_penalty: float | None = Field(default=None, alias="frequencyPenalty") @@ -79,17 +79,9 @@ class ModelSettings(BaseModel): class EvaluationSetModelSettings(ModelSettings): - """Model settings configuration for evaluation sets with ID and special values support. - - Extends ModelSettings to add an ID field and support for "same-as-agent" special value - in model and temperature fields for evaluation-specific overrides. - """ + """Model setting overrides within evaluation sets with ID""" id: str = Field(..., alias="id") - # Override model to keep it required and support "same-as-agent" - model: str = Field(..., alias="model") # Can be "same-as-agent" or actual model name - # Override temperature to support "same-as-agent" as string - temperature: float | str | None = Field(default=None, alias="temperature") # Can be "same-as-agent", float, or None class LLMMockingStrategy(BaseMockingStrategy): diff --git a/src/uipath/_cli/_utils/_eval_set.py b/src/uipath/_cli/_utils/_eval_set.py index ffb352be0..3bd7dffd4 100644 --- a/src/uipath/_cli/_utils/_eval_set.py +++ b/src/uipath/_cli/_utils/_eval_set.py @@ -149,6 +149,7 @@ def migrate_evaluation_item( migrate_evaluation_item(evaluation, eval_set.evaluator_refs) for evaluation in eval_set.evaluations ], + model_settings=eval_set.model_settings, ) except ValidationError as e: raise ValueError( From 375c1f99fd2f441ec61a6d77a6f46c62a131d3ac Mon Sep 17 00:00:00 2001 From: mathurk Date: Tue, 30 Dec 2025 16:47:42 -0800 Subject: [PATCH 6/6] fix: lint --- .../_cli/_evals/_configurable_factory.py | 46 +++++++++---- .../_cli/_evals/_models/_evaluation_set.py | 2 +- src/uipath/_cli/_evals/_runtime.py | 29 ++++++--- tests/cli/eval/test_configurable_factory.py | 65 ++++++------------- 4 files changed, 72 insertions(+), 70 deletions(-) diff --git a/src/uipath/_cli/_evals/_configurable_factory.py b/src/uipath/_cli/_evals/_configurable_factory.py index 22f580173..6ae473dc1 100644 --- a/src/uipath/_cli/_evals/_configurable_factory.py +++ b/src/uipath/_cli/_evals/_configurable_factory.py @@ -1,6 +1,5 @@ """Configurable runtime factory that supports model settings overrides.""" -import inspect import json import logging import os @@ -27,7 +26,9 @@ def __init__(self, base_factory: UiPathRuntimeFactoryProtocol): self.model_settings_override: EvaluationSetModelSettings | None = None self._temp_files: list[str] = [] - def set_model_settings_override(self, settings: EvaluationSetModelSettings | None) -> None: + def set_model_settings_override( + self, settings: EvaluationSetModelSettings | None + ) -> None: """Set model settings to override when creating runtimes. Args: @@ -35,7 +36,9 @@ def set_model_settings_override(self, settings: EvaluationSetModelSettings | Non """ self.model_settings_override = settings - async def new_runtime(self, entrypoint: str, runtime_id: str) -> UiPathRuntimeProtocol: + async def new_runtime( + self, entrypoint: str, runtime_id: str + ) -> UiPathRuntimeProtocol: """Create a new runtime with optional model settings overrides. If model settings override is configured, creates a temporary modified @@ -53,7 +56,9 @@ async def new_runtime(self, entrypoint: str, runtime_id: str) -> UiPathRuntimePr return await self.base_factory.new_runtime(entrypoint, runtime_id) # Apply overrides by creating modified entrypoint - modified_entrypoint = self._apply_overrides(entrypoint, self.model_settings_override) + modified_entrypoint = self._apply_overrides( + entrypoint, self.model_settings_override + ) if modified_entrypoint: # Track temp file for cleanup self._temp_files.append(modified_entrypoint) @@ -62,7 +67,9 @@ async def new_runtime(self, entrypoint: str, runtime_id: str) -> UiPathRuntimePr # If override failed, fall back to original return await self.base_factory.new_runtime(entrypoint, runtime_id) - def _apply_overrides(self, entrypoint: str, settings: EvaluationSetModelSettings) -> str | None: + def _apply_overrides( + self, entrypoint: str, settings: EvaluationSetModelSettings + ) -> str | None: """Apply model settings overrides to an agent entrypoint. Creates a temporary modified version of the entrypoint file with @@ -75,8 +82,13 @@ def _apply_overrides(self, entrypoint: str, settings: EvaluationSetModelSettings Returns: Path to temporary modified entrypoint, or None if override not needed/failed """ - if settings.model == "same-as-agent" and settings.temperature == "same-as-agent": - logger.debug("Both model and temperature are 'same-as-agent', no override needed") + if ( + settings.model == "same-as-agent" + and settings.temperature == "same-as-agent" + ): + logger.debug( + "Both model and temperature are 'same-as-agent', no override needed" + ) return None entrypoint_path = Path(entrypoint) @@ -85,7 +97,7 @@ def _apply_overrides(self, entrypoint: str, settings: EvaluationSetModelSettings return None try: - with open(entrypoint_path, 'r') as f: + with open(entrypoint_path, "r") as f: agent_data = json.load(f) except (json.JSONDecodeError, IOError) as e: logger.error(f"Failed to load entrypoint file: {e}") @@ -97,7 +109,9 @@ def _apply_overrides(self, entrypoint: str, settings: EvaluationSetModelSettings # Override model if not "same-as-agent" if settings.model != "same-as-agent": modified_settings["model"] = settings.model - logger.debug(f"Overriding model: {original_settings.get('model')} -> {settings.model}") + logger.debug( + f"Overriding model: {original_settings.get('model')} -> {settings.model}" + ) # Override temperature if not "same-as-agent" if settings.temperature not in ["same-as-agent", None]: @@ -107,7 +121,9 @@ def _apply_overrides(self, entrypoint: str, settings: EvaluationSetModelSettings try: modified_settings["temperature"] = float(settings.temperature) except ValueError: - logger.warning(f"Invalid temperature value: '{settings.temperature}'") + logger.warning( + f"Invalid temperature value: '{settings.temperature}'" + ) if "temperature" in modified_settings: logger.debug( @@ -122,8 +138,10 @@ def _apply_overrides(self, entrypoint: str, settings: EvaluationSetModelSettings # Create a temporary file with the modified agent definition try: - temp_fd, temp_path = tempfile.mkstemp(suffix=".json", prefix="agent_override_") - with os.fdopen(temp_fd, 'w') as temp_file: + temp_fd, temp_path = tempfile.mkstemp( + suffix=".json", prefix="agent_override_" + ) + with os.fdopen(temp_fd, "w") as temp_file: json.dump(agent_data, temp_file, indent=2) logger.info(f"Created temporary entrypoint with overrides: {temp_path}") @@ -145,5 +163,5 @@ async def dispose(self) -> None: self._temp_files.clear() # Delegate disposal to base factory - if hasattr(self.base_factory, 'dispose'): - await self.base_factory.dispose() \ No newline at end of file + if hasattr(self.base_factory, "dispose"): + await self.base_factory.dispose() diff --git a/src/uipath/_cli/_evals/_models/_evaluation_set.py b/src/uipath/_cli/_evals/_models/_evaluation_set.py index 0f85875a9..5f6a2952a 100644 --- a/src/uipath/_cli/_evals/_models/_evaluation_set.py +++ b/src/uipath/_cli/_evals/_models/_evaluation_set.py @@ -79,7 +79,7 @@ class ModelSettings(BaseModel): class EvaluationSetModelSettings(ModelSettings): - """Model setting overrides within evaluation sets with ID""" + """Model setting overrides within evaluation sets with ID.""" id: str = Field(..., alias="id") diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py index 64979091b..1dccee744 100644 --- a/src/uipath/_cli/_evals/_runtime.py +++ b/src/uipath/_cli/_evals/_runtime.py @@ -56,14 +56,12 @@ from ...eval.models.models import AgentExecution, EvalItemResult from .._utils._eval_set import EvalHelpers from .._utils._parallelization import execute_parallel +from ._configurable_factory import ConfigurableRuntimeFactory from ._evaluator_factory import EvaluatorFactory from ._models._evaluation_set import ( EvaluationItem, EvaluationSet, - EvaluationSetModelSettings, - LegacyEvaluationSet, ) -from ._configurable_factory import ConfigurableRuntimeFactory from ._models._exceptions import EvaluationRuntimeException from ._models._output import ( EvaluationResultDto, @@ -101,6 +99,7 @@ def get_agent_model(self) -> str | None: """ ... + class ExecutionSpanExporter(SpanExporter): """Custom exporter that stores spans grouped by execution ids.""" @@ -227,7 +226,7 @@ async def __aexit__(self, *args: Any) -> None: self.coverage.report(include=["./*"], show_missing=True) # Clean up any temporary files created by the factory - if hasattr(self.factory, 'dispose'): + if hasattr(self.factory, "dispose"): await self.factory.dispose() async def get_schema(self, runtime: UiPathRuntimeProtocol) -> UiPathRuntimeSchema: @@ -561,23 +560,35 @@ def _get_and_clear_execution_data( async def _configure_model_settings_override(self) -> None: """Configure the factory with model settings override if specified.""" # Skip if no model settings ID specified - if not self.context.model_settings_id or self.context.model_settings_id == "default": + if ( + not self.context.model_settings_id + or self.context.model_settings_id == "default" + ): return # Load evaluation set to get model settings evaluation_set, _ = EvalHelpers.load_eval_set(self.context.eval_set or "") - if not hasattr(evaluation_set, 'model_settings') or not evaluation_set.model_settings: + if ( + not hasattr(evaluation_set, "model_settings") + or not evaluation_set.model_settings + ): logger.warning("No model settings available in evaluation set") return # Find the specified model settings target_model_settings = next( - (ms for ms in evaluation_set.model_settings if ms.id == self.context.model_settings_id), - None + ( + ms + for ms in evaluation_set.model_settings + if ms.id == self.context.model_settings_id + ), + None, ) if not target_model_settings: - logger.warning(f"Model settings ID '{self.context.model_settings_id}' not found in evaluation set") + logger.warning( + f"Model settings ID '{self.context.model_settings_id}' not found in evaluation set" + ) return logger.info( diff --git a/tests/cli/eval/test_configurable_factory.py b/tests/cli/eval/test_configurable_factory.py index 247b37bcd..690ef90aa 100644 --- a/tests/cli/eval/test_configurable_factory.py +++ b/tests/cli/eval/test_configurable_factory.py @@ -2,10 +2,11 @@ import json import tempfile -import pytest from pathlib import Path from unittest.mock import AsyncMock, Mock +import pytest + from uipath._cli._evals._configurable_factory import ConfigurableRuntimeFactory from uipath._cli._evals._models._evaluation_set import EvaluationSetModelSettings @@ -29,14 +30,9 @@ async def test_configurable_factory_no_override(): async def test_configurable_factory_with_model_override(): """Test factory with model override.""" # Create a temporary agent.json file - test_agent = { - "settings": { - "model": "gpt-4", - "temperature": 0.7 - } - } - - with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: + test_agent = {"settings": {"model": "gpt-4", "temperature": 0.7}} + + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: json.dump(test_agent, f) temp_path = f.name @@ -49,9 +45,7 @@ async def test_configurable_factory_with_model_override(): # Set model override settings = EvaluationSetModelSettings( - id="test-settings", - model="gpt-3.5-turbo", - temperature="same-as-agent" + id="test-settings", model="gpt-3.5-turbo", temperature="same-as-agent" ) factory.set_model_settings_override(settings) @@ -79,14 +73,9 @@ async def test_configurable_factory_with_model_override(): async def test_configurable_factory_same_as_agent(): """Test factory when both settings are 'same-as-agent'.""" # Create a temporary agent.json file - test_agent = { - "settings": { - "model": "gpt-4", - "temperature": 0.7 - } - } - - with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: + test_agent = {"settings": {"model": "gpt-4", "temperature": 0.7}} + + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: json.dump(test_agent, f) temp_path = f.name @@ -99,9 +88,7 @@ async def test_configurable_factory_same_as_agent(): # Set "same-as-agent" for both settings = EvaluationSetModelSettings( - id="test-settings", - model="same-as-agent", - temperature="same-as-agent" + id="test-settings", model="same-as-agent", temperature="same-as-agent" ) factory.set_model_settings_override(settings) @@ -119,14 +106,9 @@ async def test_configurable_factory_same_as_agent(): async def test_configurable_factory_temperature_override(): """Test factory with temperature override.""" # Create a temporary agent.json file - test_agent = { - "settings": { - "model": "gpt-4", - "temperature": 0.7 - } - } - - with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: + test_agent = {"settings": {"model": "gpt-4", "temperature": 0.7}} + + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: json.dump(test_agent, f) temp_path = f.name @@ -139,9 +121,7 @@ async def test_configurable_factory_temperature_override(): # Set temperature override settings = EvaluationSetModelSettings( - id="test-settings", - model="same-as-agent", - temperature=0.2 + id="test-settings", model="same-as-agent", temperature=0.2 ) factory.set_model_settings_override(settings) @@ -166,14 +146,9 @@ async def test_configurable_factory_temperature_override(): @pytest.mark.asyncio async def test_configurable_factory_cleanup(): """Test that temporary files are cleaned up.""" - test_agent = { - "settings": { - "model": "gpt-4", - "temperature": 0.7 - } - } - - with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f: + test_agent = {"settings": {"model": "gpt-4", "temperature": 0.7}} + + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: json.dump(test_agent, f) temp_path = f.name @@ -185,9 +160,7 @@ async def test_configurable_factory_cleanup(): factory = ConfigurableRuntimeFactory(mock_base_factory) settings = EvaluationSetModelSettings( - id="test-settings", - model="gpt-3.5-turbo", - temperature=0.5 + id="test-settings", model="gpt-3.5-turbo", temperature=0.5 ) factory.set_model_settings_override(settings) @@ -207,4 +180,4 @@ async def test_configurable_factory_cleanup(): assert not Path(temp_file_created).exists() finally: - Path(temp_path).unlink(missing_ok=True) \ No newline at end of file + Path(temp_path).unlink(missing_ok=True)