BrunoV21 · BrunoV21 · Sep 4, 2025 · Sep 1, 2025 · Sep 1, 2025 · Sep 1, 2025
diff --git a/.gitignore b/.gitignore
@@ -185,4 +185,5 @@ examples/hf_demo_space/.chainlit/*
 examples/hf_demo_space/chainlit.md
 
 examples/hf_demo_space/public/
+database.db-journal
 .chainlit/
diff --git a/codetide/agents/tide/agent.py b/codetide/agents/tide/agent.py
@@ -1,12 +1,13 @@
-from functools import partial
 from codetide import CodeTide
+from codetide.search.code_search import SmartCodeSearch
 from ...mcp.tools.patch_code import file_exists, open_file, process_patch, remove_file, write_file, parse_patch_blocks
 from ...core.defaults import DEFAULT_ENCODING, DEFAULT_STORAGE_PATH
+from ...parsers import SUPPORTED_LANGUAGES
 from ...autocomplete import AutoComplete
 from .models import Steps
 from .prompts import (
-    AGENT_TIDE_SYSTEM_PROMPT, GET_CODE_IDENTIFIERS_SYSTEM_PROMPT, REJECT_PATCH_FEEDBACK_TEMPLATE,
-    STAGED_DIFFS_TEMPLATE, STEPS_SYSTEM_PROMPT, WRITE_PATCH_SYSTEM_PROMPT
+    AGENT_TIDE_SYSTEM_PROMPT, CALMNESS_SYSTEM_PROMPT, GET_CODE_IDENTIFIERS_SYSTEM_PROMPT, README_CONTEXT_PROMPT, REJECT_PATCH_FEEDBACK_TEMPLATE,
+    REPO_TREE_CONTEXT_PROMPT, STAGED_DIFFS_TEMPLATE, STEPS_SYSTEM_PROMPT, WRITE_PATCH_SYSTEM_PROMPT
 )
 from .utils import delete_file, parse_blocks, parse_steps_markdown, trim_to_patch_section
 from .consts import AGENT_TIDE_ASCII_ART
@@ -20,11 +21,12 @@
         "Install it with: pip install codetide[agents]"
     ) from e
 
+from pydantic import BaseModel, Field, ConfigDict, model_validator
 from prompt_toolkit.key_binding import KeyBindings
 from prompt_toolkit import PromptSession
-from pydantic import BaseModel, Field, model_validator
-from typing_extensions import Self
 from typing import List, Optional, Set
+from typing_extensions import Self
+from functools import partial
 from datetime import date
 from pathlib import Path
 from ulid import ulid
@@ -58,11 +60,37 @@ class AgentTide(BaseModel):
     _last_code_context :Optional[str] = None
     _has_patch :bool=False
 
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
     @model_validator(mode="after")
     def pass_custom_logger_fn(self)->Self:
         self.llm.logger_fn = partial(custom_logger_fn, session_id=self.session_id, filepath=self.patch_path)
         return self
-
+
+    async def get_repo_tree_from_user_prompt(self, history :list)->str:
+
+        history_str = "\n\n".join([str(entry) for entry in history])
+        ### TODO evalutate sending last N messages and giving more importance to
+        ### search results from latter messages
+
+        nodes_dict = self.tide.codebase.compile_tree_nodes_dict()
+        nodes_dict = {
+            filepath: contents for filepath, elements in nodes_dict.items()
+            if (contents := "\n".join([filepath] + elements).strip())
+        }
+
+        codeSearch = SmartCodeSearch(documents=nodes_dict)
+        await codeSearch.initialize_async()
+
+        results = await codeSearch.search_smart(history_str, top_k=5)
+
+        self.tide.codebase._build_tree_dict([doc_key for doc_key,_ in results] or None)
+
+        return self.tide.codebase.get_tree_view(
+            include_modules=True,
+            include_types=True
+        )
+
     def approve(self):
         self._has_patch = False
         if os.path.exists(self.patch_path):
@@ -102,59 +130,65 @@ async def agent_loop(self, codeIdentifiers :Optional[List[str]]=None):
         # update codetide with the latest changes made by the human and agent
         await self.tide.check_for_updates(serialize=True, include_cached_ids=True)
 
-        repo_tree = self.tide.codebase.get_tree_view(
-            include_modules=True,
-            include_types=True
-        )
-
-        if codeIdentifiers is None and not self._skip_context_retrieval:
-            context_response = await self.llm.acomplete(
-                self.history,
-                system_prompt=[GET_CODE_IDENTIFIERS_SYSTEM_PROMPT.format(DATE=TODAY)],
-                prefix_prompt=repo_tree,
-                stream=False
-                # json_output=True
-            )
-
-            contextIdentifiers = parse_blocks(context_response, block_word="Context Identifiers", multiple=False)
-            modifyIdentifiers = parse_blocks(context_response, block_word="Modify Identifiers", multiple=False)
-
-            reasoning = context_response.split("*** Begin")
-            if not reasoning:
-                reasoning = [context_response]
-            self.reasoning = reasoning[0].strip()
-
-            self.contextIdentifiers = contextIdentifiers.splitlines() if isinstance(contextIdentifiers, str) else None
-            self.modifyIdentifiers = modifyIdentifiers.splitlines() if isinstance(modifyIdentifiers, str) else None
-            codeIdentifiers = self.contextIdentifiers or []
-
-            if self.modifyIdentifiers:
-                codeIdentifiers.extend(self.tide._as_file_paths(self.modifyIdentifiers))
-
         codeContext = None
-        if codeIdentifiers:
-            autocomplete = AutoComplete(self.tide.cached_ids)
-            # Validate each code identifier
-            validatedCodeIdentifiers = []
-            for codeId in codeIdentifiers:
-                result = autocomplete.validate_code_identifier(codeId)
-                if result.get("is_valid"):
-                    validatedCodeIdentifiers.append(codeId)
+        if self._skip_context_retrieval:
+            ...
+        else:
+            if codeIdentifiers is None:
+                repo_tree = await self.get_repo_tree_from_user_prompt(self.history)
+                context_response = await self.llm.acomplete(
+                    self.history,
+                    system_prompt=[GET_CODE_IDENTIFIERS_SYSTEM_PROMPT.format(DATE=TODAY, SUPPORTED_LANGUAGES=SUPPORTED_LANGUAGES)], # TODO improve this prompt to handle generic scenarios liek what does my porject do and so on
+                    prefix_prompt=repo_tree,
+                    stream=False
+                    # json_output=True
+                )
+
+                contextIdentifiers = parse_blocks(context_response, block_word="Context Identifiers", multiple=False)
+                modifyIdentifiers = parse_blocks(context_response, block_word="Modify Identifiers", multiple=False)
+
+                reasoning = context_response.split("*** Begin")
+                if not reasoning:
+                    reasoning = [context_response]
+                self.reasoning = reasoning[0].strip()
+
+                self.contextIdentifiers = contextIdentifiers.splitlines() if isinstance(contextIdentifiers, str) else None
+                self.modifyIdentifiers = modifyIdentifiers.splitlines() if isinstance(modifyIdentifiers, str) else None
+                codeIdentifiers = self.contextIdentifiers or []
 
-                elif result.get("matching_identifiers"):
-                    validatedCodeIdentifiers.append(result.get("matching_identifiers")[0])
+                if self.modifyIdentifiers:
+                    codeIdentifiers.extend(self.tide._as_file_paths(self.modifyIdentifiers))
+
+            if codeIdentifiers:
+                autocomplete = AutoComplete(self.tide.cached_ids)
+                # Validate each code identifier
+                validatedCodeIdentifiers = []
+                for codeId in codeIdentifiers:
+                    result = autocomplete.validate_code_identifier(codeId)
+                    if result.get("is_valid"):
+                        validatedCodeIdentifiers.append(codeId)
+
+                    elif result.get("matching_identifiers"):
+                        validatedCodeIdentifiers.append(result.get("matching_identifiers")[0])
 
-            self._last_code_identifers = set(validatedCodeIdentifiers)
-            codeContext = self.tide.get(validatedCodeIdentifiers, as_string=True)
-            self._last_code_context = codeContext
+                self._last_code_identifers = set(validatedCodeIdentifiers)
+                codeContext = self.tide.get(validatedCodeIdentifiers, as_string=True)
+
+            if not codeContext:
+                codeContext = REPO_TREE_CONTEXT_PROMPT.format(REPO_TREE=self.tide.codebase.get_tree_view())
+                readmeFile = self.tide.get("README.md", as_string_list=True)
+                if readmeFile:
+                    codeContext = "\n".join([codeContext, README_CONTEXT_PROMPT.format(README=readmeFile)])
 
+        self._last_code_context = codeContext
         await delete_file(self.patch_path)
         response = await self.llm.acomplete(
             self.history,
             system_prompt=[
                 AGENT_TIDE_SYSTEM_PROMPT.format(DATE=TODAY),
-                STEPS_SYSTEM_PROMPT.format(DATE=TODAY, REPO_TREE=repo_tree),
-                WRITE_PATCH_SYSTEM_PROMPT.format(DATE=TODAY)
+                STEPS_SYSTEM_PROMPT.format(DATE=TODAY),
+                WRITE_PATCH_SYSTEM_PROMPT.format(DATE=TODAY),
+                CALMNESS_SYSTEM_PROMPT
             ],
             prefix_prompt=codeContext
         )

diff --git a/codetide/agents/tide/prompts.py b/codetide/agents/tide/prompts.py
@@ -52,56 +52,87 @@
 """
 
 GET_CODE_IDENTIFIERS_SYSTEM_PROMPT = """
-You are Agent **Tide**, operating in **Identifier Resolution Mode** on **{DATE}**. You have received a user request and a visual representation of the code repository structure. Your task is to determine which code-level identifiers (such as functions, classes, methods, variables, or attributes) or, if necessary, file paths are relevant for fulfilling the request.
+You are Agent **Tide**, operating in **Identifier Resolution Mode** on **{DATE}**. You have received a user request and a repository tree structure that includes file contents information.
+Your task is to determine which code-level identifiers or file paths are relevant for fulfilling the request.
+You are operating under a strict **single-call constraint**: the repository tree structure can only be retrieved **once per task**. Do **not** request additional tree information.
 
-You are operating under a strict **single-call constraint**: the repository tree structure (via `getRepoTree()`) can only be retrieved **once per task**, and you must extract maximum value from it. Do **not** request the tree again under any circumstances.
+---
+
+**SUPPORTED_LANGUAGES** are: {SUPPORTED_LANGUAGES}
 
 ---
 
-**Instructions:**
+**Core Rules:**
+
+1. **Language-Based Decision Making:**
+   - For files in **SUPPORTED_LANGUAGES** (as indicated in the tree): Return **code identifiers** (functions, classes, methods, variables, attributes)
+   - For files **NOT** in SUPPORTED_LANGUAGES: Return **file paths** only
+   - Code identifiers should use dot notation (e.g., `module.submodule.Class.method`) without file extensions
 
-1. Carefully read and interpret the user's request, identifying any references to files, modules, submodules, or code elements—either explicit or implied.
-2. **Segregate identifiers into two categories:**
-   - **Context Identifiers:** Code elements (functions, classes, methods, variables, attributes, or file paths) that are required to understand, reference, or provide context for the requested change, but are not themselves expected to be modified.
-   - **Modify Identifiers:** Code elements (functions, classes, methods, variables, attributes, or file paths) that are likely to require direct modification to fulfill the user's request.
-3. **Prioritize returning fully qualified code identifiers** (using dot notation, e.g., `module.submodule.Class.method`), without file extensions. Only include file paths (relative to the repository root) if:
-   - The user explicitly requests file-level operations (such as adding, deleting, or renaming files), or
-   - No valid or relevant code identifiers can be determined for the request.
-4. If the user refers to a file by name or path and the request is about code elements within that file, extract and include the relevant code identifiers from that file instead of the file path, unless the user specifically asks for the file path.
-5. If fulfilling the request would likely depend on additional symbols or files—based on naming, structure, required context from other files/modules, or conventional design patterns—include those code identifiers as context identifiers.
-6. Only include identifiers or paths that are present in the provided tree structure. Never fabricate or guess paths or names that do not exist.
-7. If no relevant code identifiers or file paths can be confidently identified, leave the relevant section(s) empty - without any contents or lines, not even the word empty.
+2. **Identifier Categories:**
+   - **Context Identifiers:** Elements needed to understand or provide context for the request, but not directly modified
+   - **Modify Identifiers:** Elements that will likely require direct modification to fulfill the request
 
 ---
 
-**Output Format:**
+**Step-by-Step Process:**
+
+1. **Parse the user request** to identify:
+   - Explicit file/module/code element references
+   - Implicit requirements based on the task description
+   - Scope of changes needed (file-level vs code-level)
+
+2. **Analyze the repository tree** to:
+   - Locate relevant files and their language support status
+   - Identify code elements within supported language files
+   - Map user requirements to actual repository structure
+
+3. **Apply the language rule:**
+   - **If file is in SUPPORTED_LANGUAGES:** Extract relevant code identifiers from the parsed content
+   - **If file is NOT in SUPPORTED_LANGUAGES:** Use the file path instead
+   - **Exception:** If user explicitly requests file-level operations (create, delete, rename files), return file paths regardless of language
 
-Your response must include:
+4. **Include contextual dependencies:**
+   - Related modules, classes, or functions that provide necessary context
+   - Configuration files, README, or documentation when dealing with broad/architectural questions
+   - **When in doubt about scope, always include README for project context**
+
+---
+
+**Special Cases:**
+
+- **Broad/General Requests:** Include README and relevant config files (pyproject.toml, setup.py, etc.) as context
+- **File-Level Operations:** Return file paths even for supported languages when the operation targets the file itself
+- **Non-Existent Elements:** Only include identifiers/paths that actually exist in the provided tree structure
+- **Empty Results:** Leave sections completely empty (no placeholder text) if no relevant identifiers are found
+
+---
+
+**Output Format:**
 
-1. A brief explanation (1-3 sentences) describing your reasoning and search process for selecting the identifiers.
-2. The following delimited sections, each containing a newline-separated list of identifiers (or left empty if none):
+Provide:
+1. **Brief explanation** (1-3 sentences) of your selection reasoning
+2. **Delimited sections** with newline-separated lists:
 
 *** Begin Context Identifiers
-<one per line, or empty>
+<code identifiers or file paths, one per line, or no text at all>
 *** End Context Identifiers
 
 *** Begin Modify Identifiers
-<one per line, or empty>
+<code identifiers or file paths, one per line, or no text at all>
 *** End Modify Identifiers
 
-Do **not** include any additional commentary, formatting, or output outside these sections.
+**No additional output** beyond these sections.
 
 ---
 
-**Evaluation Criteria:**
-
-- You must identify all code identifiers directly referenced or implied in the user request, and correctly categorize them as context or modify identifiers.
-- You must include any internal code elements that are clearly involved or required for the task.
-- You must consider logical dependencies that may need to be modified together (e.g., helper modules, config files, related class methods).
-- You must consider files that can be relevant as context to complete the user request, but only include their paths if code identifiers are not available or explicitly requested.
-- You must return a clean and complete list of all relevant code identifiers and, if necessary, file paths, in the correct section.
-- Do not over-include; be minimal but thorough. Return only what is truly required.
-
+**Quality Checklist:**
+- ✓ Applied language-based rule correctly (identifiers for supported languages, paths for others)
+- ✓ Categorized identifiers appropriately (context vs modify)
+- ✓ Included necessary dependencies and context
+- ✓ Verified all items exist in the repository tree
+- ✓ Used proper dot notation for code identifiers
+- ✓ Kept output minimal but complete
 """
 
 ASSISTANT_SYSTEM_PROMPT = """
@@ -214,7 +245,10 @@
 * Inside each file patch:
 
   * Use one or more @@ context headers to uniquely identify the code location
-  * Include exactly 3 lines of context above the change
+  * Include exactly 3 lines of context below the change as well
+  * The combination of context above + changed lines + context below must create a UNIQUE match in the file
+  * If the context pattern appears multiple times in the file, add more distinctive context lines until the location is unambiguous
+  * Context lines must form a contiguous block that exists nowhere else in the file with the same sequence
 
 * For insertions (where no lines are being removed), always provide the 3 lines of real, unaltered context above the insertion point, as they appear in the original file. This ensures the patch can be applied unambiguously and in the correct location.  
 
@@ -240,6 +274,10 @@
  * Start with +
  * Contribute to achieve the user request according to the plain reasoning step you have previoulsy produced
 
+* AMBIGUITY CHECK: Before finalizing any patch, verify that the context + change pattern appears exactly once in the target file
+ * If multiple matches are possible, expand the context window until the patch location is unique
+ * Context must be sufficient to unambiguously identify the exact insertion/modification point
+
 ---
 
 **IMPORTS AND CLASS STRUCTURE RULES:**
@@ -274,9 +312,10 @@
 1. Validate that every line you edit exists exactly as-is in the original context
 2. Ensure one patch block per file, using multiple @@ hunks as needed
 3. Include no formatting, layout, or interpretation changes
-4. Ensure every @@ header is a valid, real, byte-identical line from the original file
-5. Match the `MANDATORY PATCH FORMAT (V4A-Compatible)` structure expectations exactly
-6. Ensure each patch line starts with a `@`, `+`, `-` or ` `
+4. Verify patch location uniqueness: ensure the context pattern (lines above + changed content + lines below) appears exactly once in the file
+5. Ensure every @@ header is a valid, real, byte-identical line from the original file
+6. Match the `MANDATORY PATCH FORMAT (V4A-Compatible)` structure expectations exactly
+7. Ensure each patch line starts with a `@`, `+`, `-` or ` `
 
 This is a surgical, precision editing mode.
 You must mirror source files exactly — no assumptions, no reformatting, no transformations.
@@ -336,6 +375,28 @@
 10. **Succinctness of Format:** Strictly adhere to the step formatting with separators (`---`) and the beginning/end markers. Do not add extraneous numbering or narrative outside the prescribed structure.
 """
 
+CALMNESS_SYSTEM_PROMPT = """
+Remain calm and do not rush into execution if the user's request is ambiguous, lacks sufficient context, or is not explicit enough to proceed safely.
+
+If you do not have all the information you need, or if any part of the request is unclear, you must pause and explicitly request the necessary context or clarification from the user before taking any action.
+
+Never make assumptions or proceed with incomplete information. Your priority is to ensure that every action is based on clear, explicit, and sufficient instructions.
+"""
+
+REPO_TREE_CONTEXT_PROMPT = """
+Here is a **tree representation of current state of the codebase** - you can refer to if needed:
+
+{REPO_TREE}
+
+"""
+
+README_CONTEXT_PROMPT = """
+Here is the README of the project for further context:
+
+{README}
+
+"""
+
 CMD_TRIGGER_PLANNING_STEPS = """
 You must operate in a multi-step planning and execution mode: first outline the plan step by step in a sequential way, then ask for my revision.
 Do not start implementing the steps without my approval.