From 5522ea7db8ea6d939daab3a5e7d5b4d0316ee75b Mon Sep 17 00:00:00 2001
From: sreeharsha1902 <53158304+sreeharsha1902@users.noreply.github.com>
Date: Fri, 19 Dec 2025 09:10:00 -0500
Subject: [PATCH 1/3] fix(cli): Validate agent names in 'adk create' command

- Add validation to reject invalid Python identifiers (e.g., hyphens)
- Import and use validate_app_name from apps.app module
- Display clear error message when invalid names are provided
- Add comprehensive tests for valid and invalid agent names

Fixes #3977
---
 src/google/adk/cli/cli_create.py             |  8 +++
 tests/unittests/cli/utils/test_cli_create.py | 73 ++++++++++++++++++++
 2 files changed, 81 insertions(+)

diff --git a/src/google/adk/cli/cli_create.py b/src/google/adk/cli/cli_create.py
index 9085586e18..6fee8d855c 100644
--- a/src/google/adk/cli/cli_create.py
+++ b/src/google/adk/cli/cli_create.py
@@ -21,6 +21,8 @@
 
 import click
 
+from ..apps.app import validate_app_name
+
 _INIT_PY_TEMPLATE = """\
 from . import agent
 """
@@ -294,6 +296,12 @@ def run_cmd(
       VertexAI as backend.
     type: Optional[str], Whether to define agent with config file or code.
   """
+  # Validate agent name is a valid Python identifier
+  try:
+    validate_app_name(agent_name)
+  except ValueError as e:
+    raise click.UsageError(str(e)) from e
+
   agent_folder = os.path.join(os.getcwd(), agent_name)
   # check folder doesn't exist or it's empty. Otherwise, throw
   if os.path.exists(agent_folder) and os.listdir(agent_folder):
diff --git a/tests/unittests/cli/utils/test_cli_create.py b/tests/unittests/cli/utils/test_cli_create.py
index 33b3f877a8..211bece525 100644
--- a/tests/unittests/cli/utils/test_cli_create.py
+++ b/tests/unittests/cli/utils/test_cli_create.py
@@ -301,3 +301,76 @@ def test_get_gcp_region_from_gcloud_fail(
       ),
   )
   assert cli_create._get_gcp_region_from_gcloud() == ""
+
+
+# run_cmd validation
+def test_run_cmd_rejects_invalid_agent_names(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+  """run_cmd should reject agent names with hyphens or other invalid chars."""
+  monkeypatch.setattr(os, "getcwd", lambda: str(tmp_path))
+
+  # Test hyphen rejection
+  with pytest.raises(click.UsageError) as exc_info:
+    cli_create.run_cmd(
+        "my-agent",
+        model="gemini-2.5-flash",
+        google_api_key="test-key",
+        google_cloud_project=None,
+        google_cloud_region=None,
+        type="code",
+    )
+  assert "must be a valid identifier" in str(exc_info.value)
+
+  # Test other invalid characters
+  with pytest.raises(click.UsageError) as exc_info:
+    cli_create.run_cmd(
+        "my agent",  # space
+        model="gemini-2.5-flash",
+        google_api_key="test-key",
+        google_cloud_project=None,
+        google_cloud_region=None,
+        type="code",
+    )
+  assert "must be a valid identifier" in str(exc_info.value)
+
+  # Test reserved name
+  with pytest.raises(click.UsageError) as exc_info:
+    cli_create.run_cmd(
+        "user",
+        model="gemini-2.5-flash",
+        google_api_key="test-key",
+        google_cloud_project=None,
+        google_cloud_region=None,
+        type="code",
+    )
+  assert "reserved for end-user input" in str(exc_info.value)
+
+
+def test_run_cmd_accepts_valid_agent_names(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+  """run_cmd should accept valid Python identifiers."""
+  monkeypatch.setattr(os, "getcwd", lambda: str(tmp_path))
+  monkeypatch.setattr(click, "prompt", lambda *a, **k: "1")  # Choose type
+
+  # Valid names should work
+  cli_create.run_cmd(
+      "my_agent",
+      model="gemini-2.5-flash",
+      google_api_key="test-key",
+      google_cloud_project=None,
+      google_cloud_region=None,
+      type="code",
+  )
+  assert (tmp_path / "my_agent").exists()
+
+  cli_create.run_cmd(
+      "agent123",
+      model="gemini-2.5-flash",
+      google_api_key="test-key",
+      google_cloud_project=None,
+      google_cloud_region=None,
+      type="code",
+  )
+  assert (tmp_path / "agent123").exists()

From 122200e17afc6a35f4ef68322bb6d678b79e4411 Mon Sep 17 00:00:00 2001
From: sreeharsha1902 <53158304+sreeharsha1902@users.noreply.github.com>
Date: Fri, 19 Dec 2025 09:10:10 -0500
Subject: [PATCH 2/3] fix(eval): Accept string enum values for
 ToolTrajectoryCriterion.match_type

- Add field_validator to convert string enum names (EXACT, IN_ORDER, ANY_ORDER) to enum values
- Maintain backward compatibility with integer and enum values
- Provide clear error message for invalid string values
- Add comprehensive tests covering all input types

Fixes #3711
---
 src/google/adk/evaluation/eval_metrics.py     | 15 ++++++
 .../evaluation/test_trajectory_evaluator.py   | 53 +++++++++++++++++++
 2 files changed, 68 insertions(+)

diff --git a/src/google/adk/evaluation/eval_metrics.py b/src/google/adk/evaluation/eval_metrics.py
index b7c544ccad..c0e46e19c1 100644
--- a/src/google/adk/evaluation/eval_metrics.py
+++ b/src/google/adk/evaluation/eval_metrics.py
@@ -23,6 +23,7 @@
 from pydantic import BaseModel
 from pydantic import ConfigDict
 from pydantic import Field
+from pydantic import field_validator
 from pydantic.json_schema import SkipJsonSchema
 from typing_extensions import TypeAlias
 
@@ -224,6 +225,20 @@ class MatchType(Enum):
       ),
   )
 
+  @field_validator("match_type", mode="before")
+  @classmethod
+  def _validate_match_type(cls, v):
+    """Convert string enum names to enum values for backward compatibility."""
+    if isinstance(v, str):
+      try:
+        return cls.MatchType[v]
+      except KeyError:
+        raise ValueError(
+            f"Invalid match_type: '{v}'. Must be one of: EXACT, IN_ORDER,"
+            " ANY_ORDER"
+        )
+    return v
+
 
 class LlmBackedUserSimulatorCriterion(LlmAsAJudgeCriterion):
   """Criterion for LLM-backed User Simulator Evaluators."""
diff --git a/tests/unittests/evaluation/test_trajectory_evaluator.py b/tests/unittests/evaluation/test_trajectory_evaluator.py
index 0795739768..f0e94d3249 100644
--- a/tests/unittests/evaluation/test_trajectory_evaluator.py
+++ b/tests/unittests/evaluation/test_trajectory_evaluator.py
@@ -407,3 +407,56 @@ def test_evaluate_invocations_no_invocations(evaluator: TrajectoryEvaluator):
   assert result.overall_score is None
   assert result.overall_eval_status == EvalStatus.NOT_EVALUATED
   assert not result.per_invocation_results
+
+
+def test_tool_trajectory_criterion_accepts_string_match_type():
+  """Tests that ToolTrajectoryCriterion accepts string enum values."""
+  # Test string values (as documented)
+  criterion_exact = ToolTrajectoryCriterion(threshold=0.5, match_type="EXACT")
+  assert criterion_exact.match_type == ToolTrajectoryCriterion.MatchType.EXACT
+
+  criterion_in_order = ToolTrajectoryCriterion(
+      threshold=0.5, match_type="IN_ORDER"
+  )
+  assert (
+      criterion_in_order.match_type
+      == ToolTrajectoryCriterion.MatchType.IN_ORDER
+  )
+
+  criterion_any_order = ToolTrajectoryCriterion(
+      threshold=0.5, match_type="ANY_ORDER"
+  )
+  assert (
+      criterion_any_order.match_type
+      == ToolTrajectoryCriterion.MatchType.ANY_ORDER
+  )
+
+
+def test_tool_trajectory_criterion_accepts_enum_match_type():
+  """Tests that ToolTrajectoryCriterion still accepts enum values."""
+  # Test enum values (for backward compatibility)
+  criterion = ToolTrajectoryCriterion(
+      threshold=0.5, match_type=ToolTrajectoryCriterion.MatchType.IN_ORDER
+  )
+  assert criterion.match_type == ToolTrajectoryCriterion.MatchType.IN_ORDER
+
+
+def test_tool_trajectory_criterion_accepts_int_match_type():
+  """Tests that ToolTrajectoryCriterion still accepts integer values."""
+  # Test integer values (0, 1, 2)
+  criterion_0 = ToolTrajectoryCriterion(threshold=0.5, match_type=0)
+  assert criterion_0.match_type == ToolTrajectoryCriterion.MatchType.EXACT
+
+  criterion_1 = ToolTrajectoryCriterion(threshold=0.5, match_type=1)
+  assert criterion_1.match_type == ToolTrajectoryCriterion.MatchType.IN_ORDER
+
+  criterion_2 = ToolTrajectoryCriterion(threshold=0.5, match_type=2)
+  assert criterion_2.match_type == ToolTrajectoryCriterion.MatchType.ANY_ORDER
+
+
+def test_tool_trajectory_criterion_rejects_invalid_string():
+  """Tests that ToolTrajectoryCriterion rejects invalid string values."""
+  with pytest.raises(ValueError) as exc_info:
+    ToolTrajectoryCriterion(threshold=0.5, match_type="INVALID")
+  assert "Invalid match_type" in str(exc_info.value)
+  assert "EXACT, IN_ORDER, ANY_ORDER" in str(exc_info.value)

From b6cdaaafc12b9e41e66043b56e7e186c37d76d52 Mon Sep 17 00:00:00 2001
From: sreeharsha1902 <53158304+sreeharsha1902@users.noreply.github.com>
Date: Fri, 19 Dec 2025 09:17:01 -0500
Subject: [PATCH 3/3] refactor: Apply code review suggestions

- Use dynamic enum member list in error messages for better maintainability
- Consolidate test functions using pytest.mark.parametrize
- Reduce code duplication and improve test readability

Co-authored-by: gemini-code-assist[bot] <gemini-code-assist[bot]@users.noreply.github.com>
---
 contributing/samples/gepa/experiment.py       |  1 -
 contributing/samples/gepa/run_experiment.py   |  1 -
 src/google/adk/evaluation/eval_metrics.py     |  4 +-
 tests/unittests/cli/utils/test_cli_create.py  | 63 ++++++------------
 .../evaluation/test_trajectory_evaluator.py   | 65 +++++++------------
 5 files changed, 45 insertions(+), 89 deletions(-)

diff --git a/contributing/samples/gepa/experiment.py b/contributing/samples/gepa/experiment.py
index 2f5d03a772..f68b349d9c 100644
--- a/contributing/samples/gepa/experiment.py
+++ b/contributing/samples/gepa/experiment.py
@@ -43,7 +43,6 @@
 from tau_bench.types import EnvRunResult
 from tau_bench.types import RunConfig
 import tau_bench_agent as tau_bench_agent_lib
-
 import utils
 
 
diff --git a/contributing/samples/gepa/run_experiment.py b/contributing/samples/gepa/run_experiment.py
index cfd850b3a3..1bc4ee58c8 100644
--- a/contributing/samples/gepa/run_experiment.py
+++ b/contributing/samples/gepa/run_experiment.py
@@ -25,7 +25,6 @@
 from absl import flags
 import experiment
 from google.genai import types
-
 import utils
 
 _OUTPUT_DIR = flags.DEFINE_string(
diff --git a/src/google/adk/evaluation/eval_metrics.py b/src/google/adk/evaluation/eval_metrics.py
index c0e46e19c1..95ac8ba768 100644
--- a/src/google/adk/evaluation/eval_metrics.py
+++ b/src/google/adk/evaluation/eval_metrics.py
@@ -234,8 +234,8 @@ def _validate_match_type(cls, v):
         return cls.MatchType[v]
       except KeyError:
         raise ValueError(
-            f"Invalid match_type: '{v}'. Must be one of: EXACT, IN_ORDER,"
-            " ANY_ORDER"
+            f"Invalid match_type: '{v}'. Must be one of: "
+            f"{', '.join(cls.MatchType.__members__)}"
         )
     return v
 
diff --git a/tests/unittests/cli/utils/test_cli_create.py b/tests/unittests/cli/utils/test_cli_create.py
index 211bece525..d41ac66279 100644
--- a/tests/unittests/cli/utils/test_cli_create.py
+++ b/tests/unittests/cli/utils/test_cli_create.py
@@ -304,73 +304,48 @@ def test_get_gcp_region_from_gcloud_fail(
 
 
 # run_cmd validation
+@pytest.mark.parametrize(
+    "invalid_name, error_substring",
+    [
+        ("my-agent", "must be a valid identifier"),
+        ("my agent", "must be a valid identifier"),
+        ("user", "reserved for end-user input"),
+    ],
+)
 def test_run_cmd_rejects_invalid_agent_names(
-    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+    invalid_name: str,
+    error_substring: str,
 ) -> None:
-  """run_cmd should reject agent names with hyphens or other invalid chars."""
+  """run_cmd should reject invalid agent names."""
   monkeypatch.setattr(os, "getcwd", lambda: str(tmp_path))
-
-  # Test hyphen rejection
-  with pytest.raises(click.UsageError) as exc_info:
-    cli_create.run_cmd(
-        "my-agent",
-        model="gemini-2.5-flash",
-        google_api_key="test-key",
-        google_cloud_project=None,
-        google_cloud_region=None,
-        type="code",
-    )
-  assert "must be a valid identifier" in str(exc_info.value)
-
-  # Test other invalid characters
-  with pytest.raises(click.UsageError) as exc_info:
-    cli_create.run_cmd(
-        "my agent",  # space
-        model="gemini-2.5-flash",
-        google_api_key="test-key",
-        google_cloud_project=None,
-        google_cloud_region=None,
-        type="code",
-    )
-  assert "must be a valid identifier" in str(exc_info.value)
-
-  # Test reserved name
   with pytest.raises(click.UsageError) as exc_info:
     cli_create.run_cmd(
-        "user",
+        invalid_name,
         model="gemini-2.5-flash",
         google_api_key="test-key",
         google_cloud_project=None,
         google_cloud_region=None,
         type="code",
     )
-  assert "reserved for end-user input" in str(exc_info.value)
+  assert error_substring in str(exc_info.value)
 
 
+@pytest.mark.parametrize("valid_name", ["my_agent", "agent123"])
 def test_run_cmd_accepts_valid_agent_names(
-    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path, valid_name: str
 ) -> None:
   """run_cmd should accept valid Python identifiers."""
   monkeypatch.setattr(os, "getcwd", lambda: str(tmp_path))
   monkeypatch.setattr(click, "prompt", lambda *a, **k: "1")  # Choose type
 
-  # Valid names should work
-  cli_create.run_cmd(
-      "my_agent",
-      model="gemini-2.5-flash",
-      google_api_key="test-key",
-      google_cloud_project=None,
-      google_cloud_region=None,
-      type="code",
-  )
-  assert (tmp_path / "my_agent").exists()
-
   cli_create.run_cmd(
-      "agent123",
+      valid_name,
       model="gemini-2.5-flash",
       google_api_key="test-key",
       google_cloud_project=None,
       google_cloud_region=None,
       type="code",
   )
-  assert (tmp_path / "agent123").exists()
+  assert (tmp_path / valid_name).exists()
diff --git a/tests/unittests/evaluation/test_trajectory_evaluator.py b/tests/unittests/evaluation/test_trajectory_evaluator.py
index f0e94d3249..b8467f3d83 100644
--- a/tests/unittests/evaluation/test_trajectory_evaluator.py
+++ b/tests/unittests/evaluation/test_trajectory_evaluator.py
@@ -409,49 +409,32 @@ def test_evaluate_invocations_no_invocations(evaluator: TrajectoryEvaluator):
   assert not result.per_invocation_results
 
 
-def test_tool_trajectory_criterion_accepts_string_match_type():
-  """Tests that ToolTrajectoryCriterion accepts string enum values."""
-  # Test string values (as documented)
-  criterion_exact = ToolTrajectoryCriterion(threshold=0.5, match_type="EXACT")
-  assert criterion_exact.match_type == ToolTrajectoryCriterion.MatchType.EXACT
-
-  criterion_in_order = ToolTrajectoryCriterion(
-      threshold=0.5, match_type="IN_ORDER"
-  )
-  assert (
-      criterion_in_order.match_type
-      == ToolTrajectoryCriterion.MatchType.IN_ORDER
-  )
-
-  criterion_any_order = ToolTrajectoryCriterion(
-      threshold=0.5, match_type="ANY_ORDER"
-  )
-  assert (
-      criterion_any_order.match_type
-      == ToolTrajectoryCriterion.MatchType.ANY_ORDER
-  )
-
-
-def test_tool_trajectory_criterion_accepts_enum_match_type():
-  """Tests that ToolTrajectoryCriterion still accepts enum values."""
-  # Test enum values (for backward compatibility)
+@pytest.mark.parametrize(
+    "match_type_input, expected_enum",
+    [
+        # String values
+        ("EXACT", ToolTrajectoryCriterion.MatchType.EXACT),
+        ("IN_ORDER", ToolTrajectoryCriterion.MatchType.IN_ORDER),
+        ("ANY_ORDER", ToolTrajectoryCriterion.MatchType.ANY_ORDER),
+        # Enum values
+        (
+            ToolTrajectoryCriterion.MatchType.IN_ORDER,
+            ToolTrajectoryCriterion.MatchType.IN_ORDER,
+        ),
+        # Integer values
+        (0, ToolTrajectoryCriterion.MatchType.EXACT),
+        (1, ToolTrajectoryCriterion.MatchType.IN_ORDER),
+        (2, ToolTrajectoryCriterion.MatchType.ANY_ORDER),
+    ],
+)
+def test_tool_trajectory_criterion_accepts_valid_match_types(
+    match_type_input, expected_enum
+):
+  """Tests that ToolTrajectoryCriterion accepts string, enum, and int values."""
   criterion = ToolTrajectoryCriterion(
-      threshold=0.5, match_type=ToolTrajectoryCriterion.MatchType.IN_ORDER
+      threshold=0.5, match_type=match_type_input
   )
-  assert criterion.match_type == ToolTrajectoryCriterion.MatchType.IN_ORDER
-
-
-def test_tool_trajectory_criterion_accepts_int_match_type():
-  """Tests that ToolTrajectoryCriterion still accepts integer values."""
-  # Test integer values (0, 1, 2)
-  criterion_0 = ToolTrajectoryCriterion(threshold=0.5, match_type=0)
-  assert criterion_0.match_type == ToolTrajectoryCriterion.MatchType.EXACT
-
-  criterion_1 = ToolTrajectoryCriterion(threshold=0.5, match_type=1)
-  assert criterion_1.match_type == ToolTrajectoryCriterion.MatchType.IN_ORDER
-
-  criterion_2 = ToolTrajectoryCriterion(threshold=0.5, match_type=2)
-  assert criterion_2.match_type == ToolTrajectoryCriterion.MatchType.ANY_ORDER
+  assert criterion.match_type == expected_enum
 
 
 def test_tool_trajectory_criterion_rejects_invalid_string():