From 8a16b455951bd3ae2e1165151a323e68f71ade90 Mon Sep 17 00:00:00 2001 From: MagellaX Date: Wed, 19 Nov 2025 17:47:26 +0530 Subject: [PATCH 1/3] Improve text-2048 board setup tool --- .../src/hud_controller/setup/board.py | 83 ++++++++++++++++-- hud/tests/test_text_2048_setup_board.py | 86 +++++++++++++++++++ 2 files changed, 160 insertions(+), 9 deletions(-) create mode 100644 hud/tests/test_text_2048_setup_board.py diff --git a/environments/text_2048/src/hud_controller/setup/board.py b/environments/text_2048/src/hud_controller/setup/board.py index 52a521fb..1e99ce77 100644 --- a/environments/text_2048/src/hud_controller/setup/board.py +++ b/environments/text_2048/src/hud_controller/setup/board.py @@ -1,21 +1,86 @@ """Board-size setup function for 2048.""" -from mcp.types import TextContent, ContentBlock +from __future__ import annotations + +import json +from contextlib import contextmanager +from typing import Any + +from mcp.types import ContentBlock, TextContent + +try: + from hud.telemetry import trace +except ModuleNotFoundError: # pragma: no cover - optional dependency safeguard + + @contextmanager + def trace(*_args: Any, **_kwargs: Any) -> Any: # type: ignore[misc] + yield + from . import setup +DEFAULT_BOARD_SIZE = 4 +MIN_BOARD_SIZE = 3 +MAX_BOARD_SIZE = 8 + + +def _normalize_board_size(value: Any) -> tuple[int, str | None]: + """Convert ``value`` into a clamped integer plus a note if coercion was needed.""" + note: str | None = None + try: + size = int(value) + except (TypeError, ValueError): + size = DEFAULT_BOARD_SIZE + note = ( + f"Invalid board size {value!r}; defaulting to {DEFAULT_BOARD_SIZE}." + ) + return size, note + + if size < MIN_BOARD_SIZE: + note = ( + f"Requested board size {size} is below {MIN_BOARD_SIZE}; " + f"using {MIN_BOARD_SIZE}." + ) + return MIN_BOARD_SIZE, note + + if size > MAX_BOARD_SIZE: + note = ( + f"Requested board size {size} exceeds {MAX_BOARD_SIZE}; " + f"using {MAX_BOARD_SIZE}." + ) + return MAX_BOARD_SIZE, note + + return size, None + @setup.tool("board") -async def setup_board(board_size: int = 4) -> list[ContentBlock]: +async def setup_board(board_size: int = DEFAULT_BOARD_SIZE) -> list[ContentBlock]: """Initialize a new game with the specified board size.""" + normalized_size, validation_note = _normalize_board_size(board_size) game = setup.env - game.reset(size=board_size) - # Get the initial board state to show the agent - board_display = game.get_board_ascii() + with trace( + "text-2048 setup", + attrs={ + "requested_board_size": board_size, + "board_size": normalized_size, + "validation_note": validation_note or "", + }, + ): + game.reset(size=normalized_size) + board_display = game.get_board_ascii() + state_payload = { + "requested_board_size": board_size, + "board_size": normalized_size, + "state": game.get_state(), + } + + lines = [f"{normalized_size}x{normalized_size} game initialized"] + if validation_note: + lines.append(validation_note) + lines.append("") + lines.append(board_display) - # Return the initial board display return [ - TextContent( - text=f"{board_size}x{board_size} game initialized\n\n{board_display}", type="text" - ) + TextContent(text="\n".join(lines), type="text"), + TextContent(text=json.dumps(state_payload), type="text"), ] diff --git a/hud/tests/test_text_2048_setup_board.py b/hud/tests/test_text_2048_setup_board.py new file mode 100644 index 00000000..d2e18909 --- /dev/null +++ b/hud/tests/test_text_2048_setup_board.py @@ -0,0 +1,86 @@ +"""Tests for the text-2048 setup board tool.""" + +from __future__ import annotations + +import json +import sys +from contextlib import contextmanager +from pathlib import Path + +import pytest + +# Ensure the environment package is importable from the repository root +REPO_ROOT = Path(__file__).resolve().parents[2] +ENV_SRC = REPO_ROOT / "environments" / "text_2048" / "src" +if str(ENV_SRC) not in sys.path: + sys.path.append(str(ENV_SRC)) + +from hud_controller.setup import board as board_module # noqa: E402 + + +class DummyGame: + """Minimal game stub for exercising setup_board.""" + + def __init__(self) -> None: + self.size = 0 + self.reset_calls: list[int] = [] + + def reset(self, size: int = 4) -> None: + self.size = size + self.reset_calls.append(size) + + def get_board_ascii(self) -> str: + return f"{self.size}x{self.size} board" + + def get_state(self) -> dict: + return { + "board": [[self.size]], + "score": 0, + "moves": 0, + "game_over": False, + "won": False, + "highest_tile": self.size, + } + + +@pytest.fixture(autouse=True) +def stub_trace(monkeypatch: pytest.MonkeyPatch) -> None: + """Replace telemetry trace context manager with a no-op.""" + + @contextmanager + def _noop_trace(*args, **kwargs): # noqa: ANN001, ANN003 + yield + + monkeypatch.setattr(board_module, "trace", _noop_trace) + + +@pytest.mark.asyncio +async def test_setup_board_returns_ascii_and_json(monkeypatch: pytest.MonkeyPatch) -> None: + game = DummyGame() + monkeypatch.setattr(board_module.setup, "env", game, raising=False) + + result = await board_module.setup_board.fn(board_size=5) + assert len(result) == 2 + + ascii_block, json_block = result + assert "5x5 game initialized" in ascii_block.text + + payload = json.loads(json_block.text) + assert payload["board_size"] == 5 + assert payload["state"]["board"] == [[5]] + + +@pytest.mark.asyncio +async def test_setup_board_clamps_out_of_range_values(monkeypatch: pytest.MonkeyPatch) -> None: + game = DummyGame() + monkeypatch.setattr(board_module.setup, "env", game, raising=False) + + result = await board_module.setup_board.fn(board_size=99) + ascii_block, json_block = result + + assert str(board_module.MAX_BOARD_SIZE) in ascii_block.text + assert "using" in ascii_block.text.lower() + + payload = json.loads(json_block.text) + assert payload["board_size"] == board_module.MAX_BOARD_SIZE + assert payload["requested_board_size"] == 99 From 9625dafde0db73793b85773882fd5b04a4fc73ab Mon Sep 17 00:00:00 2001 From: MagellaX Date: Sun, 30 Nov 2025 12:45:36 +0530 Subject: [PATCH 2/3] Add lightweight MemoryTool with in-memory and optional Qdrant support --- hud/tools/__init__.py | 2 + hud/tools/memory.py | 174 ++++++++++++++++++++++++++++ hud/tools/tests/test_memory_tool.py | 34 ++++++ 3 files changed, 210 insertions(+) create mode 100644 hud/tools/memory.py create mode 100644 hud/tools/tests/test_memory_tool.py diff --git a/hud/tools/__init__.py b/hud/tools/__init__.py index 57f70e99..a9f77627 100644 --- a/hud/tools/__init__.py +++ b/hud/tools/__init__.py @@ -7,6 +7,7 @@ from .base import BaseHub, BaseTool from .bash import BashTool from .edit import EditTool +from .memory import MemoryTool from .playwright import PlaywrightTool from .response import ResponseTool from .submit import SubmitTool @@ -30,6 +31,7 @@ "OpenAIComputerTool", "PlaywrightTool", "ResponseTool", + "MemoryTool", "SubmitTool", ] diff --git a/hud/tools/memory.py b/hud/tools/memory.py new file mode 100644 index 00000000..437f68c0 --- /dev/null +++ b/hud/tools/memory.py @@ -0,0 +1,174 @@ +"""Lightweight memory tool with optional Qdrant backend.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from mcp.types import ContentBlock, TextContent + +from hud.tools.base import BaseTool + + +def _tokenize(text: str) -> set[str]: + return {t.lower() for t in text.split() if t} + + +def _jaccard(a: set[str], b: set[str]) -> float: + if not a or not b: + return 0.0 + inter = len(a & b) + union = len(a | b) + return inter / union if union else 0.0 + + +@dataclass +class MemoryEntry: + text: str + metadata: dict[str, Any] + tokens: set[str] + + +class InMemoryStore: + """Simple token-overlap store.""" + + def __init__(self) -> None: + self._entries: list[MemoryEntry] = [] + + def add(self, text: str, metadata: dict[str, Any] | None = None) -> None: + self._entries.append( + MemoryEntry(text=text, metadata=metadata or {}, tokens=_tokenize(text)) + ) + + def query(self, query: str, top_k: int = 5) -> list[MemoryEntry]: + q_tokens = _tokenize(query) + scored = [(entry, _jaccard(q_tokens, entry.tokens)) for entry in self._entries] + scored.sort(key=lambda x: x[1], reverse=True) + return [entry for entry, score in scored[:top_k] if score > 0.0] + + +class MemoryTool(BaseTool): + """Add and search short-term memory for a session. + + If Qdrant is available and configured (QDRANT_URL), a remote collection is used. + Otherwise, an in-memory fallback is used. + """ + + def __init__( + self, + collection: str = "hud_memory", + qdrant_url: str | None = None, + qdrant_api_key: str | None = None, + **kwargs: Any, + ) -> None: + super().__init__(**kwargs) + self._backend = self._build_backend(collection, qdrant_url, qdrant_api_key) + + def _build_backend( + self, collection: str, qdrant_url: str | None, qdrant_api_key: str | None + ) -> Any: + if qdrant_url: + try: + from qdrant_client import QdrantClient + from qdrant_client.http.models import Distance, VectorParams + except Exception: + pass + else: + client = QdrantClient(url=qdrant_url, api_key=qdrant_api_key) + try: + client.get_collection(collection) + except Exception: + client.create_collection( + collection_name=collection, + vectors_config=VectorParams(size=384, distance=Distance.COSINE), + ) + return QdrantBackend(client, collection) + return InMemoryStore() + + @property + def parameters(self) -> dict[str, Any]: # type: ignore[override] + return { + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["add", "search"], + "description": "add = store text, search = retrieve similar items", + }, + "text": {"type": "string", "description": "content to store or query"}, + "metadata": { + "type": "object", + "description": "optional metadata to store with the entry", + }, + "top_k": { + "type": "integer", + "minimum": 1, + "maximum": 50, + "default": 5, + "description": "results to return when searching", + }, + }, + "required": ["action", "text"], + } + + async def __call__( + self, action: str, text: str, metadata: dict[str, Any] | None = None, top_k: int = 5 + ) -> list[ContentBlock]: + if action == "add": + self._backend.add(text=text, metadata=metadata) + return [TextContent(text="stored", type="text")] + if action == "search": + entries = self._backend.query(query=text, top_k=top_k) + if not entries: + return [TextContent(text="no matches", type="text")] + lines = [] + for idx, entry in enumerate(entries, 1): + meta = entry.metadata or {} + meta_str = f" | metadata={meta}" if meta else "" + lines.append(f"{idx}. {entry.text}{meta_str}") + return [TextContent(text="\n".join(lines), type="text")] + return [TextContent(text="unknown action", type="text")] + + +class QdrantBackend: + """Minimal Qdrant wrapper with on-the-fly sentence-transformer embeddings.""" + + def __init__(self, client: Any, collection: str) -> None: + self.client = client + self.collection = collection + self._embedder = self._load_embedder() + + def _load_embedder(self) -> Any: + try: + from sentence_transformers import SentenceTransformer + except Exception as e: + raise RuntimeError("sentence-transformers is required for Qdrant backend") from e + return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") + + def add(self, text: str, metadata: dict[str, Any] | None = None) -> None: + vec = self._embedder.encode(text).tolist() + payload = {"text": text, "metadata": metadata or {}} + self.client.upsert( + collection_name=self.collection, + points=[{"vector": vec, "payload": payload}], + ) + + def query(self, query: str, top_k: int = 5) -> list[MemoryEntry]: + vec = self._embedder.encode(query).tolist() + res = self.client.search( + collection_name=self.collection, + query_vector=vec, + limit=top_k, + with_payload=True, + ) + entries: list[MemoryEntry] = [] + for point in res: + payload = point.payload or {} + entries.append( + MemoryEntry( + text=payload.get("text", ""), + metadata=payload.get("metadata", {}), + tokens=set(), + ) + ) + return entries diff --git a/hud/tools/tests/test_memory_tool.py b/hud/tools/tests/test_memory_tool.py new file mode 100644 index 00000000..4c4adc88 --- /dev/null +++ b/hud/tools/tests/test_memory_tool.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +import pytest +from mcp.types import TextContent + +from hud.tools.memory import InMemoryStore, MemoryTool + + +def test_inmemory_store_add_and_query() -> None: + store = InMemoryStore() + store.add("apple orange", {"kind": "fruit"}) + store.add("carrot celery", {"kind": "veg"}) + + results = store.query("apple", top_k=5) + assert len(results) == 1 + assert results[0].metadata["kind"] == "fruit" + + +@pytest.mark.asyncio +async def test_memory_tool_add_and_search() -> None: + tool = MemoryTool() + + out_add = await tool(action="add", text="alpha beta", metadata={"id": 1}) + assert isinstance(out_add[0], TextContent) + + out_search = await tool(action="search", text="alpha") + assert out_search[0].text.startswith("1.") + + +@pytest.mark.asyncio +async def test_memory_tool_unknown_action() -> None: + tool = MemoryTool() + res = await tool(action="noop", text="x") + assert res[0].text == "unknown action" From 01988977bacd8cf19059a46e7cd618a620c2e6af Mon Sep 17 00:00:00 2001 From: MagellaX Date: Sun, 30 Nov 2025 12:47:31 +0530 Subject: [PATCH 3/3] Revert "Add lightweight MemoryTool with in-memory and optional Qdrant support" This reverts commit 9625dafde0db73793b85773882fd5b04a4fc73ab. --- hud/tools/__init__.py | 2 - hud/tools/memory.py | 174 ---------------------------- hud/tools/tests/test_memory_tool.py | 34 ------ 3 files changed, 210 deletions(-) delete mode 100644 hud/tools/memory.py delete mode 100644 hud/tools/tests/test_memory_tool.py diff --git a/hud/tools/__init__.py b/hud/tools/__init__.py index a9f77627..57f70e99 100644 --- a/hud/tools/__init__.py +++ b/hud/tools/__init__.py @@ -7,7 +7,6 @@ from .base import BaseHub, BaseTool from .bash import BashTool from .edit import EditTool -from .memory import MemoryTool from .playwright import PlaywrightTool from .response import ResponseTool from .submit import SubmitTool @@ -31,7 +30,6 @@ "OpenAIComputerTool", "PlaywrightTool", "ResponseTool", - "MemoryTool", "SubmitTool", ] diff --git a/hud/tools/memory.py b/hud/tools/memory.py deleted file mode 100644 index 437f68c0..00000000 --- a/hud/tools/memory.py +++ /dev/null @@ -1,174 +0,0 @@ -"""Lightweight memory tool with optional Qdrant backend.""" - -from __future__ import annotations - -from dataclasses import dataclass -from typing import Any - -from mcp.types import ContentBlock, TextContent - -from hud.tools.base import BaseTool - - -def _tokenize(text: str) -> set[str]: - return {t.lower() for t in text.split() if t} - - -def _jaccard(a: set[str], b: set[str]) -> float: - if not a or not b: - return 0.0 - inter = len(a & b) - union = len(a | b) - return inter / union if union else 0.0 - - -@dataclass -class MemoryEntry: - text: str - metadata: dict[str, Any] - tokens: set[str] - - -class InMemoryStore: - """Simple token-overlap store.""" - - def __init__(self) -> None: - self._entries: list[MemoryEntry] = [] - - def add(self, text: str, metadata: dict[str, Any] | None = None) -> None: - self._entries.append( - MemoryEntry(text=text, metadata=metadata or {}, tokens=_tokenize(text)) - ) - - def query(self, query: str, top_k: int = 5) -> list[MemoryEntry]: - q_tokens = _tokenize(query) - scored = [(entry, _jaccard(q_tokens, entry.tokens)) for entry in self._entries] - scored.sort(key=lambda x: x[1], reverse=True) - return [entry for entry, score in scored[:top_k] if score > 0.0] - - -class MemoryTool(BaseTool): - """Add and search short-term memory for a session. - - If Qdrant is available and configured (QDRANT_URL), a remote collection is used. - Otherwise, an in-memory fallback is used. - """ - - def __init__( - self, - collection: str = "hud_memory", - qdrant_url: str | None = None, - qdrant_api_key: str | None = None, - **kwargs: Any, - ) -> None: - super().__init__(**kwargs) - self._backend = self._build_backend(collection, qdrant_url, qdrant_api_key) - - def _build_backend( - self, collection: str, qdrant_url: str | None, qdrant_api_key: str | None - ) -> Any: - if qdrant_url: - try: - from qdrant_client import QdrantClient - from qdrant_client.http.models import Distance, VectorParams - except Exception: - pass - else: - client = QdrantClient(url=qdrant_url, api_key=qdrant_api_key) - try: - client.get_collection(collection) - except Exception: - client.create_collection( - collection_name=collection, - vectors_config=VectorParams(size=384, distance=Distance.COSINE), - ) - return QdrantBackend(client, collection) - return InMemoryStore() - - @property - def parameters(self) -> dict[str, Any]: # type: ignore[override] - return { - "type": "object", - "properties": { - "action": { - "type": "string", - "enum": ["add", "search"], - "description": "add = store text, search = retrieve similar items", - }, - "text": {"type": "string", "description": "content to store or query"}, - "metadata": { - "type": "object", - "description": "optional metadata to store with the entry", - }, - "top_k": { - "type": "integer", - "minimum": 1, - "maximum": 50, - "default": 5, - "description": "results to return when searching", - }, - }, - "required": ["action", "text"], - } - - async def __call__( - self, action: str, text: str, metadata: dict[str, Any] | None = None, top_k: int = 5 - ) -> list[ContentBlock]: - if action == "add": - self._backend.add(text=text, metadata=metadata) - return [TextContent(text="stored", type="text")] - if action == "search": - entries = self._backend.query(query=text, top_k=top_k) - if not entries: - return [TextContent(text="no matches", type="text")] - lines = [] - for idx, entry in enumerate(entries, 1): - meta = entry.metadata or {} - meta_str = f" | metadata={meta}" if meta else "" - lines.append(f"{idx}. {entry.text}{meta_str}") - return [TextContent(text="\n".join(lines), type="text")] - return [TextContent(text="unknown action", type="text")] - - -class QdrantBackend: - """Minimal Qdrant wrapper with on-the-fly sentence-transformer embeddings.""" - - def __init__(self, client: Any, collection: str) -> None: - self.client = client - self.collection = collection - self._embedder = self._load_embedder() - - def _load_embedder(self) -> Any: - try: - from sentence_transformers import SentenceTransformer - except Exception as e: - raise RuntimeError("sentence-transformers is required for Qdrant backend") from e - return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") - - def add(self, text: str, metadata: dict[str, Any] | None = None) -> None: - vec = self._embedder.encode(text).tolist() - payload = {"text": text, "metadata": metadata or {}} - self.client.upsert( - collection_name=self.collection, - points=[{"vector": vec, "payload": payload}], - ) - - def query(self, query: str, top_k: int = 5) -> list[MemoryEntry]: - vec = self._embedder.encode(query).tolist() - res = self.client.search( - collection_name=self.collection, - query_vector=vec, - limit=top_k, - with_payload=True, - ) - entries: list[MemoryEntry] = [] - for point in res: - payload = point.payload or {} - entries.append( - MemoryEntry( - text=payload.get("text", ""), - metadata=payload.get("metadata", {}), - tokens=set(), - ) - ) - return entries diff --git a/hud/tools/tests/test_memory_tool.py b/hud/tools/tests/test_memory_tool.py deleted file mode 100644 index 4c4adc88..00000000 --- a/hud/tools/tests/test_memory_tool.py +++ /dev/null @@ -1,34 +0,0 @@ -from __future__ import annotations - -import pytest -from mcp.types import TextContent - -from hud.tools.memory import InMemoryStore, MemoryTool - - -def test_inmemory_store_add_and_query() -> None: - store = InMemoryStore() - store.add("apple orange", {"kind": "fruit"}) - store.add("carrot celery", {"kind": "veg"}) - - results = store.query("apple", top_k=5) - assert len(results) == 1 - assert results[0].metadata["kind"] == "fruit" - - -@pytest.mark.asyncio -async def test_memory_tool_add_and_search() -> None: - tool = MemoryTool() - - out_add = await tool(action="add", text="alpha beta", metadata={"id": 1}) - assert isinstance(out_add[0], TextContent) - - out_search = await tool(action="search", text="alpha") - assert out_search[0].text.startswith("1.") - - -@pytest.mark.asyncio -async def test_memory_tool_unknown_action() -> None: - tool = MemoryTool() - res = await tool(action="noop", text="x") - assert res[0].text == "unknown action"