Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
376ec03
updateed .gitignore
BrunoV21 Sep 1, 2025
6ce6b6b
adedd tf-idf for quick filtering of paths from user prompt
BrunoV21 Sep 1, 2025
7d1a9b3
updated models to support filtering from tfidf results
BrunoV21 Sep 1, 2025
325e66c
integrated tfidf matcher and filtered repo tree into agenttide
BrunoV21 Sep 1, 2025
d9552fd
removed prints
BrunoV21 Sep 1, 2025
9138b06
updated models to include file level contens in tree
BrunoV21 Sep 1, 2025
a6e5f26
addeed fallback to ensure general context for broad like quesitons th…
BrunoV21 Sep 1, 2025
d9a8f33
reforced context with README file contens if available
BrunoV21 Sep 1, 2025
65caede
improved roobustness for general case scenarios
BrunoV21 Sep 1, 2025
346ea2b
updated get_code_identifiers_prompt
BrunoV21 Sep 1, 2025
ba339e2
only used the top 5 matched paths from tf_idf to filter repo_tree
BrunoV21 Sep 2, 2025
c276cfc
feat(ui): make entire ReasoningMessage header clickable for expand/co…
BrunoV21 Sep 2, 2025
963d760
improved agent codeContext logic
BrunoV21 Sep 2, 2025
7d424d5
updated GET_CODE_IDENTIFIERS_YSTEM_PROMPT and agent to receive suppor…
BrunoV21 Sep 3, 2025
dc6ee08
moved import to top of file
BrunoV21 Sep 3, 2025
9d53499
added initial versoin of search module for fast queries of the codebase
BrunoV21 Sep 3, 2025
f2d98ee
feat(tide/prompts): add calmness system prompt to enforce explicit co…
BrunoV21 Sep 4, 2025
1083a06
removed tf_idf_matcher
BrunoV21 Sep 4, 2025
aaf105b
updaed get tree view and adde nodes dict based on same tree dict inst…
BrunoV21 Sep 4, 2025
cffa8ac
replaced tf_idf_matcher with smart_code_search in agent prefilter
BrunoV21 Sep 4, 2025
a03f9e7
added smart_code_search_example
BrunoV21 Sep 4, 2025
9ff0463
updated WRITE_PATCH_SYSTEM_PROMPT
BrunoV21 Sep 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -185,4 +185,5 @@ examples/hf_demo_space/.chainlit/*
examples/hf_demo_space/chainlit.md

examples/hf_demo_space/public/
database.db-journal
.chainlit/
134 changes: 84 additions & 50 deletions codetide/agents/tide/agent.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from functools import partial
from codetide import CodeTide
from codetide.search.code_search import SmartCodeSearch
from ...mcp.tools.patch_code import file_exists, open_file, process_patch, remove_file, write_file, parse_patch_blocks
from ...core.defaults import DEFAULT_ENCODING, DEFAULT_STORAGE_PATH
from ...parsers import SUPPORTED_LANGUAGES
from ...autocomplete import AutoComplete
from .models import Steps
from .prompts import (
AGENT_TIDE_SYSTEM_PROMPT, GET_CODE_IDENTIFIERS_SYSTEM_PROMPT, REJECT_PATCH_FEEDBACK_TEMPLATE,
STAGED_DIFFS_TEMPLATE, STEPS_SYSTEM_PROMPT, WRITE_PATCH_SYSTEM_PROMPT
AGENT_TIDE_SYSTEM_PROMPT, CALMNESS_SYSTEM_PROMPT, GET_CODE_IDENTIFIERS_SYSTEM_PROMPT, README_CONTEXT_PROMPT, REJECT_PATCH_FEEDBACK_TEMPLATE,
REPO_TREE_CONTEXT_PROMPT, STAGED_DIFFS_TEMPLATE, STEPS_SYSTEM_PROMPT, WRITE_PATCH_SYSTEM_PROMPT
)
from .utils import delete_file, parse_blocks, parse_steps_markdown, trim_to_patch_section
from .consts import AGENT_TIDE_ASCII_ART
Expand All @@ -20,11 +21,12 @@
"Install it with: pip install codetide[agents]"
) from e

from pydantic import BaseModel, Field, ConfigDict, model_validator
from prompt_toolkit.key_binding import KeyBindings
from prompt_toolkit import PromptSession
from pydantic import BaseModel, Field, model_validator
from typing_extensions import Self
from typing import List, Optional, Set
from typing_extensions import Self
from functools import partial
from datetime import date
from pathlib import Path
from ulid import ulid
Expand Down Expand Up @@ -58,11 +60,37 @@ class AgentTide(BaseModel):
_last_code_context :Optional[str] = None
_has_patch :bool=False

model_config = ConfigDict(arbitrary_types_allowed=True)

@model_validator(mode="after")
def pass_custom_logger_fn(self)->Self:
self.llm.logger_fn = partial(custom_logger_fn, session_id=self.session_id, filepath=self.patch_path)
return self


async def get_repo_tree_from_user_prompt(self, history :list)->str:

history_str = "\n\n".join([str(entry) for entry in history])
### TODO evalutate sending last N messages and giving more importance to
### search results from latter messages

nodes_dict = self.tide.codebase.compile_tree_nodes_dict()
nodes_dict = {
filepath: contents for filepath, elements in nodes_dict.items()
if (contents := "\n".join([filepath] + elements).strip())
}

codeSearch = SmartCodeSearch(documents=nodes_dict)
await codeSearch.initialize_async()

results = await codeSearch.search_smart(history_str, top_k=5)

self.tide.codebase._build_tree_dict([doc_key for doc_key,_ in results] or None)

return self.tide.codebase.get_tree_view(
include_modules=True,
include_types=True
)

def approve(self):
self._has_patch = False
if os.path.exists(self.patch_path):
Expand Down Expand Up @@ -102,59 +130,65 @@ async def agent_loop(self, codeIdentifiers :Optional[List[str]]=None):
# update codetide with the latest changes made by the human and agent
await self.tide.check_for_updates(serialize=True, include_cached_ids=True)

repo_tree = self.tide.codebase.get_tree_view(
include_modules=True,
include_types=True
)

if codeIdentifiers is None and not self._skip_context_retrieval:
context_response = await self.llm.acomplete(
self.history,
system_prompt=[GET_CODE_IDENTIFIERS_SYSTEM_PROMPT.format(DATE=TODAY)],
prefix_prompt=repo_tree,
stream=False
# json_output=True
)

contextIdentifiers = parse_blocks(context_response, block_word="Context Identifiers", multiple=False)
modifyIdentifiers = parse_blocks(context_response, block_word="Modify Identifiers", multiple=False)

reasoning = context_response.split("*** Begin")
if not reasoning:
reasoning = [context_response]
self.reasoning = reasoning[0].strip()

self.contextIdentifiers = contextIdentifiers.splitlines() if isinstance(contextIdentifiers, str) else None
self.modifyIdentifiers = modifyIdentifiers.splitlines() if isinstance(modifyIdentifiers, str) else None
codeIdentifiers = self.contextIdentifiers or []

if self.modifyIdentifiers:
codeIdentifiers.extend(self.tide._as_file_paths(self.modifyIdentifiers))

codeContext = None
if codeIdentifiers:
autocomplete = AutoComplete(self.tide.cached_ids)
# Validate each code identifier
validatedCodeIdentifiers = []
for codeId in codeIdentifiers:
result = autocomplete.validate_code_identifier(codeId)
if result.get("is_valid"):
validatedCodeIdentifiers.append(codeId)
if self._skip_context_retrieval:
...
else:
if codeIdentifiers is None:
repo_tree = await self.get_repo_tree_from_user_prompt(self.history)
context_response = await self.llm.acomplete(
self.history,
system_prompt=[GET_CODE_IDENTIFIERS_SYSTEM_PROMPT.format(DATE=TODAY, SUPPORTED_LANGUAGES=SUPPORTED_LANGUAGES)], # TODO improve this prompt to handle generic scenarios liek what does my porject do and so on
prefix_prompt=repo_tree,
stream=False
# json_output=True
)

contextIdentifiers = parse_blocks(context_response, block_word="Context Identifiers", multiple=False)
modifyIdentifiers = parse_blocks(context_response, block_word="Modify Identifiers", multiple=False)

reasoning = context_response.split("*** Begin")
if not reasoning:
reasoning = [context_response]
self.reasoning = reasoning[0].strip()

self.contextIdentifiers = contextIdentifiers.splitlines() if isinstance(contextIdentifiers, str) else None
self.modifyIdentifiers = modifyIdentifiers.splitlines() if isinstance(modifyIdentifiers, str) else None
codeIdentifiers = self.contextIdentifiers or []

elif result.get("matching_identifiers"):
validatedCodeIdentifiers.append(result.get("matching_identifiers")[0])
if self.modifyIdentifiers:
codeIdentifiers.extend(self.tide._as_file_paths(self.modifyIdentifiers))

if codeIdentifiers:
autocomplete = AutoComplete(self.tide.cached_ids)
# Validate each code identifier
validatedCodeIdentifiers = []
for codeId in codeIdentifiers:
result = autocomplete.validate_code_identifier(codeId)
if result.get("is_valid"):
validatedCodeIdentifiers.append(codeId)

elif result.get("matching_identifiers"):
validatedCodeIdentifiers.append(result.get("matching_identifiers")[0])

self._last_code_identifers = set(validatedCodeIdentifiers)
codeContext = self.tide.get(validatedCodeIdentifiers, as_string=True)
self._last_code_context = codeContext
self._last_code_identifers = set(validatedCodeIdentifiers)
codeContext = self.tide.get(validatedCodeIdentifiers, as_string=True)

if not codeContext:
codeContext = REPO_TREE_CONTEXT_PROMPT.format(REPO_TREE=self.tide.codebase.get_tree_view())
readmeFile = self.tide.get("README.md", as_string_list=True)
if readmeFile:
codeContext = "\n".join([codeContext, README_CONTEXT_PROMPT.format(README=readmeFile)])

self._last_code_context = codeContext
await delete_file(self.patch_path)
response = await self.llm.acomplete(
self.history,
system_prompt=[
AGENT_TIDE_SYSTEM_PROMPT.format(DATE=TODAY),
STEPS_SYSTEM_PROMPT.format(DATE=TODAY, REPO_TREE=repo_tree),
WRITE_PATCH_SYSTEM_PROMPT.format(DATE=TODAY)
STEPS_SYSTEM_PROMPT.format(DATE=TODAY),
WRITE_PATCH_SYSTEM_PROMPT.format(DATE=TODAY),
CALMNESS_SYSTEM_PROMPT
],
prefix_prompt=codeContext
)
Expand Down
129 changes: 95 additions & 34 deletions codetide/agents/tide/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,56 +52,87 @@
"""

GET_CODE_IDENTIFIERS_SYSTEM_PROMPT = """
You are Agent **Tide**, operating in **Identifier Resolution Mode** on **{DATE}**. You have received a user request and a visual representation of the code repository structure. Your task is to determine which code-level identifiers (such as functions, classes, methods, variables, or attributes) or, if necessary, file paths are relevant for fulfilling the request.
You are Agent **Tide**, operating in **Identifier Resolution Mode** on **{DATE}**. You have received a user request and a repository tree structure that includes file contents information.
Your task is to determine which code-level identifiers or file paths are relevant for fulfilling the request.
You are operating under a strict **single-call constraint**: the repository tree structure can only be retrieved **once per task**. Do **not** request additional tree information.

You are operating under a strict **single-call constraint**: the repository tree structure (via `getRepoTree()`) can only be retrieved **once per task**, and you must extract maximum value from it. Do **not** request the tree again under any circumstances.
---

**SUPPORTED_LANGUAGES** are: {SUPPORTED_LANGUAGES}

---

**Instructions:**
**Core Rules:**

1. **Language-Based Decision Making:**
- For files in **SUPPORTED_LANGUAGES** (as indicated in the tree): Return **code identifiers** (functions, classes, methods, variables, attributes)
- For files **NOT** in SUPPORTED_LANGUAGES: Return **file paths** only
- Code identifiers should use dot notation (e.g., `module.submodule.Class.method`) without file extensions

1. Carefully read and interpret the user's request, identifying any references to files, modules, submodules, or code elements—either explicit or implied.
2. **Segregate identifiers into two categories:**
- **Context Identifiers:** Code elements (functions, classes, methods, variables, attributes, or file paths) that are required to understand, reference, or provide context for the requested change, but are not themselves expected to be modified.
- **Modify Identifiers:** Code elements (functions, classes, methods, variables, attributes, or file paths) that are likely to require direct modification to fulfill the user's request.
3. **Prioritize returning fully qualified code identifiers** (using dot notation, e.g., `module.submodule.Class.method`), without file extensions. Only include file paths (relative to the repository root) if:
- The user explicitly requests file-level operations (such as adding, deleting, or renaming files), or
- No valid or relevant code identifiers can be determined for the request.
4. If the user refers to a file by name or path and the request is about code elements within that file, extract and include the relevant code identifiers from that file instead of the file path, unless the user specifically asks for the file path.
5. If fulfilling the request would likely depend on additional symbols or files—based on naming, structure, required context from other files/modules, or conventional design patterns—include those code identifiers as context identifiers.
6. Only include identifiers or paths that are present in the provided tree structure. Never fabricate or guess paths or names that do not exist.
7. If no relevant code identifiers or file paths can be confidently identified, leave the relevant section(s) empty - without any contents or lines, not even the word empty.
2. **Identifier Categories:**
- **Context Identifiers:** Elements needed to understand or provide context for the request, but not directly modified
- **Modify Identifiers:** Elements that will likely require direct modification to fulfill the request

---

**Output Format:**
**Step-by-Step Process:**

1. **Parse the user request** to identify:
- Explicit file/module/code element references
- Implicit requirements based on the task description
- Scope of changes needed (file-level vs code-level)

2. **Analyze the repository tree** to:
- Locate relevant files and their language support status
- Identify code elements within supported language files
- Map user requirements to actual repository structure

3. **Apply the language rule:**
- **If file is in SUPPORTED_LANGUAGES:** Extract relevant code identifiers from the parsed content
- **If file is NOT in SUPPORTED_LANGUAGES:** Use the file path instead
- **Exception:** If user explicitly requests file-level operations (create, delete, rename files), return file paths regardless of language

Your response must include:
4. **Include contextual dependencies:**
- Related modules, classes, or functions that provide necessary context
- Configuration files, README, or documentation when dealing with broad/architectural questions
- **When in doubt about scope, always include README for project context**

---

**Special Cases:**

- **Broad/General Requests:** Include README and relevant config files (pyproject.toml, setup.py, etc.) as context
- **File-Level Operations:** Return file paths even for supported languages when the operation targets the file itself
- **Non-Existent Elements:** Only include identifiers/paths that actually exist in the provided tree structure
- **Empty Results:** Leave sections completely empty (no placeholder text) if no relevant identifiers are found

---

**Output Format:**

1. A brief explanation (1-3 sentences) describing your reasoning and search process for selecting the identifiers.
2. The following delimited sections, each containing a newline-separated list of identifiers (or left empty if none):
Provide:
1. **Brief explanation** (1-3 sentences) of your selection reasoning
2. **Delimited sections** with newline-separated lists:

*** Begin Context Identifiers
<one per line, or empty>
<code identifiers or file paths, one per line, or no text at all>
*** End Context Identifiers

*** Begin Modify Identifiers
<one per line, or empty>
<code identifiers or file paths, one per line, or no text at all>
*** End Modify Identifiers

Do **not** include any additional commentary, formatting, or output outside these sections.
**No additional output** beyond these sections.

---

**Evaluation Criteria:**

- You must identify all code identifiers directly referenced or implied in the user request, and correctly categorize them as context or modify identifiers.
- You must include any internal code elements that are clearly involved or required for the task.
- You must consider logical dependencies that may need to be modified together (e.g., helper modules, config files, related class methods).
- You must consider files that can be relevant as context to complete the user request, but only include their paths if code identifiers are not available or explicitly requested.
- You must return a clean and complete list of all relevant code identifiers and, if necessary, file paths, in the correct section.
- Do not over-include; be minimal but thorough. Return only what is truly required.

**Quality Checklist:**
- ✓ Applied language-based rule correctly (identifiers for supported languages, paths for others)
- ✓ Categorized identifiers appropriately (context vs modify)
- ✓ Included necessary dependencies and context
- ✓ Verified all items exist in the repository tree
- ✓ Used proper dot notation for code identifiers
- ✓ Kept output minimal but complete
"""

ASSISTANT_SYSTEM_PROMPT = """
Expand Down Expand Up @@ -214,7 +245,10 @@
* Inside each file patch:

* Use one or more @@ context headers to uniquely identify the code location
* Include exactly 3 lines of context above the change
* Include exactly 3 lines of context below the change as well
* The combination of context above + changed lines + context below must create a UNIQUE match in the file
* If the context pattern appears multiple times in the file, add more distinctive context lines until the location is unambiguous
* Context lines must form a contiguous block that exists nowhere else in the file with the same sequence

* For insertions (where no lines are being removed), always provide the 3 lines of real, unaltered context above the insertion point, as they appear in the original file. This ensures the patch can be applied unambiguously and in the correct location.

Expand All @@ -240,6 +274,10 @@
* Start with +
* Contribute to achieve the user request according to the plain reasoning step you have previoulsy produced

* AMBIGUITY CHECK: Before finalizing any patch, verify that the context + change pattern appears exactly once in the target file
* If multiple matches are possible, expand the context window until the patch location is unique
* Context must be sufficient to unambiguously identify the exact insertion/modification point

---

**IMPORTS AND CLASS STRUCTURE RULES:**
Expand Down Expand Up @@ -274,9 +312,10 @@
1. Validate that every line you edit exists exactly as-is in the original context
2. Ensure one patch block per file, using multiple @@ hunks as needed
3. Include no formatting, layout, or interpretation changes
4. Ensure every @@ header is a valid, real, byte-identical line from the original file
5. Match the `MANDATORY PATCH FORMAT (V4A-Compatible)` structure expectations exactly
6. Ensure each patch line starts with a `@`, `+`, `-` or ` `
4. Verify patch location uniqueness: ensure the context pattern (lines above + changed content + lines below) appears exactly once in the file
5. Ensure every @@ header is a valid, real, byte-identical line from the original file
6. Match the `MANDATORY PATCH FORMAT (V4A-Compatible)` structure expectations exactly
7. Ensure each patch line starts with a `@`, `+`, `-` or ` `

This is a surgical, precision editing mode.
You must mirror source files exactly — no assumptions, no reformatting, no transformations.
Expand Down Expand Up @@ -336,6 +375,28 @@
10. **Succinctness of Format:** Strictly adhere to the step formatting with separators (`---`) and the beginning/end markers. Do not add extraneous numbering or narrative outside the prescribed structure.
"""

CALMNESS_SYSTEM_PROMPT = """
Remain calm and do not rush into execution if the user's request is ambiguous, lacks sufficient context, or is not explicit enough to proceed safely.

If you do not have all the information you need, or if any part of the request is unclear, you must pause and explicitly request the necessary context or clarification from the user before taking any action.

Never make assumptions or proceed with incomplete information. Your priority is to ensure that every action is based on clear, explicit, and sufficient instructions.
"""

REPO_TREE_CONTEXT_PROMPT = """
Here is a **tree representation of current state of the codebase** - you can refer to if needed:

{REPO_TREE}

"""

README_CONTEXT_PROMPT = """
Here is the README of the project for further context:

{README}

"""

CMD_TRIGGER_PLANNING_STEPS = """
You must operate in a multi-step planning and execution mode: first outline the plan step by step in a sequential way, then ask for my revision.
Do not start implementing the steps without my approval.
Expand Down
Loading