diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0338f37 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,24 @@ +FROM python:3.13-slim + +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +# Set work directory +WORKDIR /app + +# Install system dependencies (if any are needed, add here) +# RUN apt-get update && apt-get install -y && rm -rf /var/lib/apt/lists/* + +# Copy requirements and install dependencies +COPY requirements.txt . +RUN pip install --upgrade pip && pip install -r requirements.txt + +# Copy the rest of the code +COPY . . + +# Install the package (so entry points are available) +RUN pip install -e . + +# Default command: launch the CLI +ENTRYPOINT ["codetide-cli"] diff --git a/README.md b/README.md index 00150f9..609b793 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,8 @@ uvx --from codetide codetide-cli --help AgentTide consists of a demo, showing how CodeTide can integrate with LLMs and augment code generation and condebase related workflows. If you ask Tide to describe himself, he will say something like this: I'm the next-generation, precision-driven software engineering agent built on top of CodeTide. You can use it via the command-line interface (CLI) or a beautiful interactive UI. +> **Demo available:** Try AgentTide live on Hugging Face Spaces: [https://mclovinittt-agenttidedemo.hf.space/](https://mclovinittt-agenttidedemo.hf.space/) + ---
@@ -541,6 +543,8 @@ Here’s what’s next for CodeTide: ## 🤖 Agents Module: AgentTide +> **Demo available:** Try AgentTide live on Hugging Face Spaces: [https://mclovinittt-agenttidedemo.hf.space/](https://mclovinittt-agenttidedemo.hf.space/) + CodeTide now includes an `agents` module, featuring **AgentTide**—a precision-driven software engineering agent that connects directly to your codebase and executes your requests with full code context. **AgentTide** leverages CodeTide’s symbolic code understanding to: diff --git a/codetide/agents/tide/agent.py b/codetide/agents/tide/agent.py index 33ef190..a3e618c 100644 --- a/codetide/agents/tide/agent.py +++ b/codetide/agents/tide/agent.py @@ -1,12 +1,11 @@ from codetide import CodeTide from ...mcp.tools.patch_code import file_exists, open_file, process_patch, remove_file, write_file, parse_patch_blocks from ...core.defaults import DEFAULT_ENCODING, DEFAULT_STORAGE_PATH -from ...search.code_search import SmartCodeSearch from ...parsers import SUPPORTED_LANGUAGES from ...autocomplete import AutoComplete from .models import Steps from .prompts import ( - AGENT_TIDE_SYSTEM_PROMPT, CALMNESS_SYSTEM_PROMPT, CMD_BRAINSTORM_PROMPT, CMD_CODE_REVIEW_PROMPT, CMD_TRIGGER_PLANNING_STEPS, CMD_WRITE_TESTS_PROMPT, GET_CODE_IDENTIFIERS_SYSTEM_PROMPT, README_CONTEXT_PROMPT, REJECT_PATCH_FEEDBACK_TEMPLATE, + AGENT_TIDE_SYSTEM_PROMPT, CALMNESS_SYSTEM_PROMPT, CMD_BRAINSTORM_PROMPT, CMD_CODE_REVIEW_PROMPT, CMD_TRIGGER_PLANNING_STEPS, CMD_WRITE_TESTS_PROMPT, GET_CODE_IDENTIFIERS_UNIFIED_PROMPT, README_CONTEXT_PROMPT, REJECT_PATCH_FEEDBACK_TEMPLATE, REPO_TREE_CONTEXT_PROMPT, STAGED_DIFFS_TEMPLATE, STEPS_SYSTEM_PROMPT, WRITE_PATCH_SYSTEM_PROMPT ) from .utils import delete_file, parse_blocks, parse_steps_markdown, trim_to_patch_section @@ -67,31 +66,19 @@ def pass_custom_logger_fn(self)->Self: self.llm.logger_fn = partial(custom_logger_fn, session_id=self.session_id, filepath=self.patch_path) return self - async def get_repo_tree_from_user_prompt(self, history :list)->str: + async def get_repo_tree_from_user_prompt(self, history :list, include_modules :bool=False, expand_paths :Optional[List[str]]=None)->str: - history_str = "\n\n".join([str(entry) for entry in history]) + history_str = "\n\n".join(history) for CMD_PROMPT in [CMD_TRIGGER_PLANNING_STEPS, CMD_WRITE_TESTS_PROMPT, CMD_BRAINSTORM_PROMPT, CMD_CODE_REVIEW_PROMPT]: history_str.replace(CMD_PROMPT, "") - ### TODO evalutate sending last N messages and giving more importance to - ### search results from latter messages - - nodes_dict = self.tide.codebase.compile_tree_nodes_dict() - nodes_dict = { - filepath: contents for filepath, elements in nodes_dict.items() - if (contents := "\n".join([filepath] + elements).strip()) - } - - codeSearch = SmartCodeSearch(documents=nodes_dict) - await codeSearch.initialize_async() - - results = await codeSearch.search_smart(history_str, top_k=15) - self.tide.codebase._build_tree_dict([doc_key for doc_key,_ in results] or None) + self.tide.codebase._build_tree_dict(expand_paths) - return self.tide.codebase.get_tree_view( - include_modules=True, + tree = self.tide.codebase.get_tree_view( + include_modules=include_modules, include_types=True ) + return tree def approve(self): self._has_patch = False @@ -126,56 +113,122 @@ def trim_messages(messages, tokenizer_fn, max_tokens :Optional[int]=None): while messages and sum(len(tokenizer_fn(str(msg))) for msg in messages) > max_tokens: messages.pop(0) # Remove from the beginning + @staticmethod + def get_valid_identifier(autocomplete :AutoComplete, identifier:str)->Optional[str]: + result = autocomplete.validate_code_identifier(identifier) + if result.get("is_valid"): + return identifier + elif result.get("matching_identifiers"): + return result.get("matching_identifiers")[0] + return None + + def _clean_history(self): + for i in range(len(self.history)): + message = self.history[i] + if isinstance(message, dict): + self.history[i] = message.get("content" ,"") + async def agent_loop(self, codeIdentifiers :Optional[List[str]]=None): TODAY = date.today() - - # update codetide with the latest changes made by the human and agent await self.tide.check_for_updates(serialize=True, include_cached_ids=True) + self._clean_history() codeContext = None if self._skip_context_retrieval: ... else: - if codeIdentifiers is None: - repo_tree = await self.get_repo_tree_from_user_prompt(self.history) - context_response = await self.llm.acomplete( + autocomplete = AutoComplete(self.tide.cached_ids) + matches = autocomplete.extract_words_from_text("\n\n".join(self.history)) + + # --- Begin Unified Identifier Retrieval --- + identifiers_accum = set(matches["all_found_words"]) if codeIdentifiers is None else set(codeIdentifiers + matches["all_found_words"]) + modify_accum = set() + reasoning_accum = [] + repo_tree = None + smart_search_attempts = 0 + max_smart_search_attempts = 3 + done = False + previous_reason = None + + while not done: + expand_paths = ["./"] + # 1. SmartCodeSearch to filter repo tree + if repo_tree is None or smart_search_attempts > 0: + repo_history = self.history + if previous_reason: + repo_history += [previous_reason] + + repo_tree = await self.get_repo_tree_from_user_prompt(self.history, include_modules=bool(smart_search_attempts), expand_paths=expand_paths) + + # 2. Single LLM call with unified prompt + # Pass accumulated identifiers for context if this isn't the first iteration + accumulated_context = "\n".join( + sorted((identifiers_accum or set()) | (modify_accum or set())) + ) if (identifiers_accum or modify_accum) else "" + + unified_response = await self.llm.acomplete( self.history, - system_prompt=[GET_CODE_IDENTIFIERS_SYSTEM_PROMPT.format(DATE=TODAY, SUPPORTED_LANGUAGES=SUPPORTED_LANGUAGES)], # TODO improve this prompt to handle generic scenarios liek what does my porject do and so on + system_prompt=[GET_CODE_IDENTIFIERS_UNIFIED_PROMPT.format( + DATE=TODAY, + SUPPORTED_LANGUAGES=SUPPORTED_LANGUAGES, + IDENTIFIERS=accumulated_context + )], prefix_prompt=repo_tree, stream=False - # json_output=True ) - contextIdentifiers = parse_blocks(context_response, block_word="Context Identifiers", multiple=False) - modifyIdentifiers = parse_blocks(context_response, block_word="Modify Identifiers", multiple=False) - - reasoning = context_response.split("*** Begin") - if not reasoning: - reasoning = [context_response] - self.reasoning = reasoning[0].strip() - - self.contextIdentifiers = contextIdentifiers.splitlines() if isinstance(contextIdentifiers, str) else None - self.modifyIdentifiers = modifyIdentifiers.splitlines() if isinstance(modifyIdentifiers, str) else None - codeIdentifiers = self.contextIdentifiers or [] + # Parse the unified response + contextIdentifiers = parse_blocks(unified_response, block_word="Context Identifiers", multiple=False) + modifyIdentifiers = parse_blocks(unified_response, block_word="Modify Identifiers", multiple=False) + expandPaths = parse_blocks(unified_response, block_word="Expand Paths", multiple=False) - if self.modifyIdentifiers: - codeIdentifiers.extend(self.tide._as_file_paths(self.modifyIdentifiers)) - + # Extract reasoning (everything before the first "*** Begin") + reasoning_parts = unified_response.split("*** Begin") + if reasoning_parts: + reasoning_accum.append(reasoning_parts[0].strip()) + previous_reason = reasoning_accum[-1] + + # Accumulate identifiers + if contextIdentifiers: + if smart_search_attempts == 0: + ### clean wrongly mismtatched idenitifers + identifiers_accum = set() + for ident in contextIdentifiers.splitlines(): + if ident := self.get_valid_identifier(autocomplete, ident.strip()): + identifiers_accum.add(ident) + + if modifyIdentifiers: + for ident in modifyIdentifiers.splitlines(): + if ident := self.get_valid_identifier(autocomplete, ident.strip()): + modify_accum.add(ident.strip()) + + if expandPaths: + expand_paths = [ + path for ident in expandPaths if (path := self.get_valid_identifier(autocomplete, ident.strip())) + ] + + # Check if we have enough identifiers (unified prompt includes this decision) + if "ENOUGH_IDENTIFIERS: TRUE" in unified_response.upper(): + done = True + else: + smart_search_attempts += 1 + if smart_search_attempts >= max_smart_search_attempts: + done = True + + # Finalize identifiers + self.reasoning = "\n\n".join(reasoning_accum) + self.contextIdentifiers = list(identifiers_accum) if identifiers_accum else None + self.modifyIdentifiers = list(modify_accum) if modify_accum else None + + codeIdentifiers = self.contextIdentifiers or [] + if self.modifyIdentifiers: + codeIdentifiers.extend(self.tide._as_file_paths(self.modifyIdentifiers)) + + # --- End Unified Identifier Retrieval --- if codeIdentifiers: - autocomplete = AutoComplete(self.tide.cached_ids) - # Validate each code identifier - validatedCodeIdentifiers = [] - for codeId in codeIdentifiers: - result = autocomplete.validate_code_identifier(codeId) - if result.get("is_valid"): - validatedCodeIdentifiers.append(codeId) - - elif result.get("matching_identifiers"): - validatedCodeIdentifiers.append(result.get("matching_identifiers")[0]) + self._last_code_identifers = set(codeIdentifiers) + codeContext = self.tide.get(codeIdentifiers, as_string=True) - self._last_code_identifers = set(validatedCodeIdentifiers) - codeContext = self.tide.get(validatedCodeIdentifiers, as_string=True) - if not codeContext: codeContext = REPO_TREE_CONTEXT_PROMPT.format(REPO_TREE=self.tide.codebase.get_tree_view()) readmeFile = self.tide.get("README.md", as_string_list=True) @@ -241,12 +294,19 @@ async def get_git_diff_staged_simple(directory: str) -> str: return stdout.decode() + def _has_staged(self)->bool: + status = self.tide.repo.status() + result = any([file_status == pygit2.GIT_STATUS_INDEX_MODIFIED for file_status in status.values()]) + _logger.logger.debug(f"_has_staged {result=}") + return result + async def _stage(self)->str: index = self.tide.repo.index - for path in self.changed_paths: - index.add(path) + if not self._has_staged(): + for path in self.changed_paths: + index.add(path) - index.write() + index.write() staged_diff = await self.get_git_diff_staged_simple(self.tide.rootpath) staged_diff = staged_diff.strip() diff --git a/codetide/agents/tide/prompts.py b/codetide/agents/tide/prompts.py index dec1929..221b087 100644 --- a/codetide/agents/tide/prompts.py +++ b/codetide/agents/tide/prompts.py @@ -51,90 +51,6 @@ """ -GET_CODE_IDENTIFIERS_SYSTEM_PROMPT = """ -You are Agent **Tide**, operating in **Identifier Resolution Mode** on **{DATE}**. You have received a user request and a repository tree structure that includes file contents information. -Your task is to determine which code-level identifiers or file paths are relevant for fulfilling the request. -You are operating under a strict **single-call constraint**: the repository tree structure can only be retrieved **once per task**. Do **not** request additional tree information. - ---- - -**SUPPORTED_LANGUAGES** are: {SUPPORTED_LANGUAGES} - ---- - -**Core Rules:** - -1. **Language-Based Decision Making:** - - For files in **SUPPORTED_LANGUAGES** (as indicated in the tree): Return **code identifiers** (functions, classes, methods, variables, attributes) - - For files **NOT** in SUPPORTED_LANGUAGES: Return **file paths** only - - Code identifiers should use dot notation (e.g., `module.submodule.Class.method`) without file extensions - -2. **Identifier Categories:** - - **Context Identifiers:** Elements needed to understand or provide context for the request, but not directly modified - - **Modify Identifiers:** Elements that will likely require direct modification to fulfill the request - ---- - -**Step-by-Step Process:** - -1. **Parse the user request** to identify: - - Explicit file/module/code element references - - Implicit requirements based on the task description - - Scope of changes needed (file-level vs code-level) - -2. **Analyze the repository tree** to: - - Locate relevant files and their language support status - - Identify code elements within supported language files - - Map user requirements to actual repository structure - -3. **Apply the language rule:** - - **If file is in SUPPORTED_LANGUAGES:** Extract relevant code identifiers from the parsed content - - **If file is NOT in SUPPORTED_LANGUAGES:** Use the file path instead - - **Exception:** If user explicitly requests file-level operations (create, delete, rename files), return file paths regardless of language - -4. **Include contextual dependencies:** - - Related modules, classes, or functions that provide necessary context - - Configuration files, README, or documentation when dealing with broad/architectural questions - - **When in doubt about scope, always include README for project context** - ---- - -**Special Cases:** - -- **Broad/General Requests:** Include README and relevant config files (pyproject.toml, setup.py, etc.) as context -- **File-Level Operations:** Return file paths even for supported languages when the operation targets the file itself -- **Non-Existent Elements:** Only include identifiers/paths that actually exist in the provided tree structure -- **Empty Results:** Leave sections completely empty (no placeholder text) if no relevant identifiers are found - ---- - -**Output Format:** - -Provide: -1. **Brief explanation** (1-3 sentences) of your selection reasoning -2. **Delimited sections** with newline-separated lists: - -*** Begin Context Identifiers - -*** End Context Identifiers - -*** Begin Modify Identifiers - -*** End Modify Identifiers - -**No additional output** beyond these sections. - ---- - -**Quality Checklist:** -- ✓ Applied language-based rule correctly (identifiers for supported languages, paths for others) -- ✓ Categorized identifiers appropriately (context vs modify) -- ✓ Included necessary dependencies and context -- ✓ Verified all items exist in the repository tree -- ✓ Used proper dot notation for code identifiers -- ✓ Kept output minimal but complete -""" - ASSISTANT_SYSTEM_PROMPT = """ You are Agent **Tide**, operating in **Lightweight Assistant Mode** on **{DATE}**. The user’s request does **not require repository context** or file-level editing. You are acting as a general-purpose software assistant. @@ -167,6 +83,7 @@ RESPONSE FORMAT (ALWAYS): + --- @@ -282,7 +199,8 @@ **IMPORTS AND CLASS STRUCTURE RULES:** -* All import statements must be placed at the very top of the file, before any other code. +* All import statements must be placed at the very top of the file, before any other code: + - If you realize after writing a patch that an additional import is required, create a new patch that adds the missing import at the very top of the file. * When referencing imports in the patch, use a separate context block at the start of the file, distinct from code changes. * When adding or modifying methods or attributes in a class, ensure they are placed in the correct logical order (attributes first, then methods). Do not insert methods or attributes at the beginning of the class unless it is appropriate by convention. @@ -421,20 +339,58 @@ """ CMD_COMMIT_PROMPT = """ -Generate a conventional commit message that summarizes the work done since the previous commit. +Generate a conventional commit message that summarizes the changes since the previous commit. **Instructions:** +1. Write a brief, clear description, focusing on what was changed, added, removed, or refactored. Summarize the implementation approach or the nature of the changes, while providing just enough context to understand them: + - This description must be written in third person and should begin with "This commit" followed by a verb (e.g., "This commit adds", "This commit fixes", "This commit refactors") or "This commit introduces" for new features or concepts. -1. First, write a body (before the commit block) that explains the problem solved and the implementation approach. This should be clear, concise, and provide context for the change. -2. Then, place the commit subject line (only) inside the commit block, using this format: +2. Place only the commit subject line inside the commit block: *** Begin Commit - [subject line only, up to 3 lines, straight to the point and descriptive of the broad changes] + [subject line only, up to 3 lines, descriptive of the broad changes] *** End Commit -3. The subject line should follow the conventional commit format with a clear type/scope prefix, and summarize the broad changes made. Do not include the body or any explanation inside the commit block—only the subject line. -4. You may include additional comments about the changes made outside of this block, if needed. -5. If no diffs for staged files are provided in the context, reply that there's nothing to commit.context, reply that there's nothing to commit -The commit message should follow conventional commit format with a clear type/scope prefix +3. **Conventional Commit Format Rules:** + - Use format: `type(scope): description` + - **Types:** feat, fix, docs, style, refactor, perf, test, build, ci, chore, revert + - **Scope:** Optional, use lowercase (e.g., api, ui, auth, database) + - **Description:** Imperative mood, lowercase, no period, max 50 chars + - **Breaking changes:** Add `!` after type/scope or use `BREAKING CHANGE:` in footer + +4. **Best Practices:** + - Use imperative mood: "add feature" not "added feature" + - Be specific but concise + - Focus on the "what" and "why", not the "how" + - Group related changes under appropriate types + - Use consistent terminology across commits + +5. If no staged diffs are provided, reply that there's nothing to commit. + +**Type Guidelines:** +- `feat`: New features or functionality +- `fix`: Bug fixes +- `docs`: Documentation changes only +- `style`: Code formatting, missing semicolons (no logic changes) +- `refactor`: Code restructuring without changing functionality +- `perf`: Performance improvements +- `test`: Adding/updating tests +- `build`: Build system or dependency changes +- `ci`: CI configuration changes +- `chore`: Maintenance tasks, tooling updates +- `revert`: Reverting previous commits +- `prompt`: updates made to prompts used by llms + +**Examples:** +- `feat(auth): add OAuth2 login integration` +- `fix(api): resolve memory leak in user sessions` +- `docs: update installation guide for v2.0` +- `refactor(utils): extract validation logic to separate module` +- `perf(query): optimize database indexing for user search` +- `test(auth): add unit tests for password validation` +- `tests: configure Jest for React component testing` +- `prompts(ai): update system prompt for better code generation` +- `build: upgrade webpack to v5.0` +- `feat!: remove deprecated user endpoints` """ STAGED_DIFFS_TEMPLATE = """ @@ -450,5 +406,152 @@ {FEEDBACK} +**Important:** + - Since your patch was rejected, the file(s) remain in their original state.~ + - All future changes must be made relative to the original file content (i.e., use the context and removed lines from the previous diff, not the added ones). + - Do not assume any changes from the rejected patch are present. + **Next steps:** Please revise your approach to fulfill the task requirements based on the feedback above. +""" + +GET_CODE_IDENTIFIERS_UNIFIED_PROMPT = """ +You are Agent **Tide**, operating in **Unified Identifier Resolution Mode** on **{DATE}**. + +**SUPPORTED_LANGUAGES** are: {SUPPORTED_LANGUAGES} + +**CRITICAL CONSTRAINTS:** + +**ABSOLUTE PROHIBITION - NEVER UNDER ANY CIRCUMSTANCE:** +- Answer or address the user request directly or indirectly +- Provide solutions, suggestions, or advice about the user's problem +- View or analyze file contents +- Check implementation details inside files +- Verify inter-file dependencies +- Write solutions or code modifications +- Access actual identifier definitions +- Acknowledge that viewing file contents is outside your scope + +**YOUR SOLE PURPOSE:** Gather required identifiers superficially and minimally based only on file/directory structure and naming patterns. + +**DO** focus on: +- Making educated guesses based on file/directory names and structure +- Selecting identifiers based on naming patterns and location context +- Minimizing expansion requests - aim for as few calls as possible +- Being decisive rather than perfectionist + +**DECISION-MAKING APPROACH:** +- **Trust naming conventions**: If a file is named `auth.py` or `user_manager.py`, assume it contains relevant identifiers +- **Use structural clues**: Directory organization and file placement indicate functionality +- **Make reasonable assumptions**: Don't second-guess obvious connections +- **Prefer sufficiency**: When in doubt, declare ENOUGH_IDENTIFIERS: TRUE rather than endless exploration + +**Core Rules:** + +1. **Language-Based Decision Making:** + - For files in **SUPPORTED_LANGUAGES** (as indicated in the tree): Return **code identifiers** (functions, classes, methods, variables, attributes) + - For files **NOT** in SUPPORTED_LANGUAGES: Return **file paths** only + - Code identifiers should use dot notation (e.g., `module.submodule.Class.method`) without file extensions + +2. **Identifier Categories:** + - **Context Identifiers:** Elements needed to understand or provide context for the request, but not directly modified + - **Modify Identifiers:** Elements that will likely require direct modification to fulfill the request + +**UNIFIED ANALYSIS PROTOCOL** + +**Current State Assessment:** +- **Repository tree**: Filtered view provided +- **User request**: Requires quick identifier selection based on structure +- **Analysis depth**: Surface-level examination of file/directory names and organization +- **Accumulated context**: {IDENTIFIERS} (if applicable from previous iterations) + +**Quick Decision Framework:** +1. **Scan tree structure** for obviously relevant files based on naming +2. **Make educated guesses** about functionality from file/directory names +3. **Select identifiers decisively** based on structural patterns +4. **Minimize expansions** - only when absolutely necessary for file visibility + +**FAST SELECTION RULES** + +**Immediate Analysis:** +- **Identify obvious targets**: Files whose names clearly relate to user request +- **Apply naming intuition**: Use common patterns (auth, user, config, handler, model, etc.) +- **Trust directory organization**: Assume logical file placement +- **Make quick categorizations**: Context vs Modify based on request type + +**Context vs Modification Logic:** +- **Context Identifiers**: Supporting files that provide understanding (configs, utilities, base classes) +- **Modify Identifiers**: Files that clearly need changes based on request +- **When uncertain**: Choose Context to be safe + +**SUFFICIENCY ASSESSMENT PROTOCOL** + +**Quick Evaluation:** +1. **Obvious files identified**: Can see files that clearly relate to request +2. **Reasonable coverage**: File names suggest adequate scope for request +3. **No major gaps**: All main functional areas seem represented in visible structure + +**SUFFICIENT CONDITIONS (TRUE):** +- Can identify files that obviously relate to the user request based on naming +- Directory structure provides clear indication of where functionality lives +- File organization allows reasonable assumptions about what needs modification +- Visible tree structure covers the main areas mentioned in user request + +**INSUFFICIENT CONDITIONS (FALSE):** +- **Missing obvious file structure**: Core directories/files for the request are collapsed and not visible +- **Unclear file organization**: Cannot make educated guesses from current file names and structure +- **Essential paths hidden**: Key directories mentioned in request are not expanded +- **Cannot locate functionality**: File names don't provide enough clues about where relevant code lives + +**MANDATORY OUTPUT FORMAT** + +**RESPONSE STRUCTURE (STRICT):** +- Begin with a single short paragraph that briefly explains your reasoning. Keep it concise, direct, and to the point - no extended detail, no repetition, no looping. Plain text only, no labels, headers, or formatting. +- Then output the required blocks exactly as shown below. +- **Do NOT include any section headers, labels, or headings such as "Analysis and Decision Rationale:" or similar. Only output the explanation and the required blocks.** + +**Identifier Sections:** +``` +*** Begin Context Identifiers + +*** End Context Identifiers + +*** Begin Modify Identifiers + +*** End Modify Identifiers +``` + +**Expansion Paths:** +``` +*** Begin Expand Paths + +*** End Expand Paths +``` + +**Sufficiency Decision:** +``` +ENOUGH_IDENTIFIERS: [TRUE|FALSE] +``` + +**MINIMAL EXPANSION GUIDELINES** + +**Only Expand When:** +- **File structure invisible**: Essential directories are collapsed, can't see file names +- **Cannot identify targets**: Directory names don't reveal where functionality might live +- **Missing core areas**: Key functional areas from request are not visible in tree +- **Insufficient file names**: Current file names too generic to make educated guesses + +**Path Specification:** +- **Directory paths only**: Expand directories to see file organization (e.g., `src/auth/`) +- **Avoid file expansion**: Don't expand individual files - work with file names only +- **One path per line**: Each expansion request on separate line +- **Minimal requests**: Expand only what's absolutely necessary + +**QUALITY GUIDELINES** +- **Speed over perfection**: Make quick, reasonable decisions +- **Trust file naming**: Assume developers used logical file names +- **Minimal expansions**: Prefer working with current view +- **Decisive categorization**: Don't overthink Context vs Modify decisions +- **Focus on obvious patterns**: Look for clear naming matches with user request + +**REMEMBER**: This is rapid identifier selection based on educated guessing from file/directory structure. Your job is to quickly identify likely relevant files based on naming patterns and organization. Make reasonable assumptions and avoid perfectionist analysis. Speed and decisiveness over exhaustive exploration. """ \ No newline at end of file diff --git a/codetide/agents/tide/ui/agent_tide_ui.py b/codetide/agents/tide/ui/agent_tide_ui.py index 58a4b86..961928c 100644 --- a/codetide/agents/tide/ui/agent_tide_ui.py +++ b/codetide/agents/tide/ui/agent_tide_ui.py @@ -27,7 +27,7 @@ def __init__(self, project_path: Path = Path("./"), history :Optional[list]=None if llm_config is None: try: - config = Config.from_yaml(self.project_path / self.config_path) + config = Config.from_yaml(self.config_path) self.llm_config: LlmConfig = config.llm except Exception: self.llm_config = LlmConfig(**PLACEHOLDER_LLM_CONFIG) diff --git a/codetide/agents/tide/ui/app.py b/codetide/agents/tide/ui/app.py index 07e03cb..2c4f7a6 100644 --- a/codetide/agents/tide/ui/app.py +++ b/codetide/agents/tide/ui/app.py @@ -265,6 +265,38 @@ async def on_inspect_context(action :cl.Action): await inspect_msg.send() +@cl.action_callback("approve_patch") +async def on_approve_patch(action :cl.Action): + agent_tide_ui: AgentTideUi = cl.user_session.get("AgentTideUi") + + await action.remove() + latest_action_message :cl.Message = cl.user_session.get("latest_patch_msg") + if latest_action_message.id == action.payload.get("action_id"): + latest_action_message.actions = [] + + if action.payload.get("lgtm"): + agent_tide_ui.agent_tide.approve() + +@cl.action_callback("reject_patch") +async def on_reject_patch(action :cl.Action): + agent_tide_ui: AgentTideUi = cl.user_session.get("AgentTideUi") + chat_history = cl.user_session.get("chat_history") + + await action.remove() + latest_action_message :cl.Message = cl.user_session.get("latest_patch_msg") + if latest_action_message.id == action.payload.get("action_id"): + latest_action_message.actions = [] + + response = await cl.AskUserMessage( + content="""Please provide specific feedback explaining why the patch was rejected. Include what's wrong, which parts are problematic, and what needs to change. Avoid vague responses like "doesn't work" - instead be specific like "missing error handling for FileNotFoundError" or "function should return boolean, not None." Your detailed feedback helps generate a better solution.""", + timeout=3600 + ).send() + + feedback = response.get("output") + agent_tide_ui.agent_tide.reject(feedback) + chat_history.append({"role": "user", "content": feedback}) + await agent_loop(agent_tide_ui=agent_tide_ui) + @cl.on_message async def agent_loop(message: Optional[cl.Message]=None, codeIdentifiers: Optional[list] = None, agent_tide_ui :Optional[AgentTideUi]=None): @@ -381,20 +413,25 @@ async def agent_loop(message: Optional[cl.Message]=None, codeIdentifiers: Option await agent_tide_ui.add_to_history(msg.content) if agent_tide_ui.agent_tide._has_patch: - choice = await cl.AskActionMessage( + action_msg = cl.AskActionMessage( content="AgentTide is asking you to review the Patch before applying it.", - actions=[ - cl.Action(name="approve_patch", payload={"lgtm": True}, label="✔️ Approve"), - cl.Action(name="reject_patch", payload={"lgtm": False}, label="❌ Reject"), - ], + actions=[], timeout=3600 - ).send() + ) + action_msg.actions = [ + cl.Action(name="approve_patch", payload={"lgtm": True, "msg_id": action_msg.id}, label="✔️ Approve"), + cl.Action(name="reject_patch", payload={"lgtm": False, "msg_id": action_msg.id}, label="❌ Reject") + ] + cl.user_session.set("latest_patch_msg", action_msg) + choice = await action_msg.send() if choice: lgtm = choice.get("payload", []).get("lgtm") if lgtm: + action_msg.actions = [] agent_tide_ui.agent_tide.approve() else: + action_msg.actions = [] response = await cl.AskUserMessage( content="""Please provide specific feedback explaining why the patch was rejected. Include what's wrong, which parts are problematic, and what needs to change. Avoid vague responses like "doesn't work" - instead be specific like "missing error handling for FileNotFoundError" or "function should return boolean, not None." Your detailed feedback helps generate a better solution.""", timeout=3600 @@ -476,8 +513,8 @@ def main(): parser.add_argument("--config-path", type=str, default=DEFAULT_AGENT_TIDE_LLM_CONFIG_PATH, help="Path to the config file") args = parser.parse_args() - os.environ["AGENT_TIDE_PROJECT_PATH"] = args.project_path - os.environ["AGENT_TIDE_CONFIG_PATH"] = args.config_path + os.environ["AGENT_TIDE_PROJECT_PATH"] = str(Path(args.project_path)) + os.environ["AGENT_TIDE_CONFIG_PATH"] = str(Path(args.project_path) / args.config_path) asyncio.run(init_db(f"{os.environ['CHAINLIT_APP_ROOT']}/database.db")) @@ -492,10 +529,13 @@ def main(): ) if __name__ == "__main__": - import asyncio - os.environ["AGENT_TIDE_CONFIG_PATH"] = DEFAULT_AGENT_TIDE_LLM_CONFIG_PATH - asyncio.run(init_db(f"{os.environ['CHAINLIT_APP_ROOT']}/database.db")) - serve() + main() + +# if __name__ == "__main__": +# import asyncio +# os.environ["AGENT_TIDE_CONFIG_PATH"] = DEFAULT_AGENT_TIDE_LLM_CONFIG_PATH +# asyncio.run(init_db(f"{os.environ['CHAINLIT_APP_ROOT']}/database.db")) +# serve() # TODO fix the no time being inserted to msg bug in data-persistance # TODO there's a bug that changes are not being persistied in untracked files that are deleted so will need to update codetide to track that # TODO add chainlit commands for writing tests, updating readme, writing commit message and planning diff --git a/codetide/autocomplete.py b/codetide/autocomplete.py index 80a13fc..19f42cc 100644 --- a/codetide/autocomplete.py +++ b/codetide/autocomplete.py @@ -1,6 +1,7 @@ from typing import List import difflib import os +import re class AutoComplete: def __init__(self, word_list: List[str]) -> None: @@ -167,4 +168,89 @@ def validate_paths(self, file_paths): suggestions = self.get_fuzzy_suggestions(path, 1) if not suggestions: raise ValueError(f"Invalid file path: '{path}'") - return valid_paths \ No newline at end of file + return valid_paths + + def extract_words_from_text(self, text: str, similarity_threshold: float = 0.6, case_sensitive: bool = False) -> dict: + """ + Extract words from the word list that are present in the given text, including similar words (potential typos). + + Args: + text (str): The input text to analyze + similarity_threshold (float): Minimum similarity score for fuzzy matching (0.0 to 1.0) + case_sensitive (bool): Whether matching should be case sensitive + + Returns: + dict: Dictionary containing: + - 'exact_matches': List of words found exactly in the text + - 'fuzzy_matches': List of tuples (word_from_list, similar_word_in_text, similarity_score) + - 'all_found_words': Combined list of all matched words from the word list + """ + if not text: + return { + 'exact_matches': [], + 'fuzzy_matches': [], + 'all_found_words': [] + } + + # Split text into words (remove punctuation and split by whitespace) + text_words = re.findall(r'\b\w+\b', text) + + exact_matches = [] + fuzzy_matches = [] + all_found_words = set() + + # Convert to appropriate case for comparison + if case_sensitive: + text_words_search = text_words + word_list_search = self.words + else: + text_words_search = [word.lower() for word in text_words] + word_list_search = [word.lower() for word in self.words] + + # Find exact matches + for i, text_word in enumerate(text_words_search): + for j, list_word in enumerate(word_list_search): + if text_word == list_word: + original_word = self.words[j] + if original_word not in all_found_words: + exact_matches.append(original_word) + all_found_words.add(original_word) + + # Find fuzzy matches for words that didn't match exactly + matched_text_words = set() + for match in exact_matches: + search_match = match if case_sensitive else match.lower() + for i, text_word in enumerate(text_words_search): + if text_word == search_match: + matched_text_words.add(i) + + # Check remaining text words for fuzzy matches + for i, text_word in enumerate(text_words_search): + if i in matched_text_words: + continue + + # Find the most similar word from our word list + best_matches = [] + for j, list_word in enumerate(word_list_search): + similarity = difflib.SequenceMatcher(None, text_word, list_word).ratio() + if similarity >= similarity_threshold: + best_matches.append((self.words[j], text_words[i], similarity)) + + # Sort by similarity and add to results + if best_matches: + best_matches.sort(key=lambda x: x[2], reverse=True) + for match in best_matches: + word_from_list, word_in_text, score = match + if word_from_list not in all_found_words: + fuzzy_matches.append((word_from_list, word_in_text, score)) + all_found_words.add(word_from_list) + + # Sort results + exact_matches.sort() + fuzzy_matches.sort(key=lambda x: x[2], reverse=True) # Sort by similarity score + + return { + 'exact_matches': exact_matches, + 'fuzzy_matches': fuzzy_matches, + 'all_found_words': sorted(list(all_found_words)) + } \ No newline at end of file diff --git a/codetide/core/models.py b/codetide/core/models.py index 28362d6..ce89ed6 100644 --- a/codetide/core/models.py +++ b/codetide/core/models.py @@ -645,7 +645,14 @@ def get_tree_view(self, include_modules: bool = False, include_types: bool = Fal return "\n".join(lines) def _build_tree_dict(self, filter_paths: list = None): - """Creates nested dictionary representing codebase directory structure with optional filtering.""" + """Creates nested dictionary representing codebase directory structure with optional filtering. + + When filtering is applied, includes: + 1. Filtered files (with full content) + 2. Sibling files in same directories as filtered files + 3. Sibling directories at the same level as directories containing filtered files + 4. Contents of sibling directories (files and subdirectories) + """ tree = {} @@ -657,6 +664,7 @@ def _build_tree_dict(self, filter_paths: list = None): # Convert filter paths to normalized format for comparison normalized_filter_paths = set() filter_directories = set() + parent_directories = set() for path in filter_paths: normalized_path = path.replace("\\", "/") @@ -667,13 +675,85 @@ def _build_tree_dict(self, filter_paths: list = None): if len(path_parts) > 1: dir_path = "/".join(path_parts[:-1]) filter_directories.add(dir_path) + + # Extract parent directory to find sibling directories + parent_parts = path_parts[:-2] # Remove filename and immediate directory + if parent_parts: + parent_dir = "/".join(parent_parts) + parent_directories.add(parent_dir) + else: + # The filtered file's directory is at root level + parent_directories.add("") else: # File is at root level filter_directories.add("") - # Find all files that are siblings (in the same directories as filtered files) - relevant_files = [] # Files that should show full content - sibling_files = [] # Files that should show as siblings only + # Find all directories that are siblings to directories containing filtered files + # AND all their subdirectories (to peek below) + sibling_directories = set() + for code_file in self.root: + if not code_file.file_path: + continue + + normalized_file_path = code_file.file_path.replace("\\", "/") + file_parts = normalized_file_path.split("/") + + if len(file_parts) > 1: + file_dir = "/".join(file_parts[:-1]) + + # Check if this file's directory is a sibling to any filter directory + file_dir_parts = file_dir.split("/") + if len(file_dir_parts) > 1: + file_parent_dir = "/".join(file_dir_parts[:-1]) + if file_parent_dir in parent_directories: + sibling_directories.add(file_dir) + else: + # File's directory is at root level + if "" in parent_directories: + sibling_directories.add(file_dir) + + # Also check if this directory is a subdirectory of any sibling directory + # This allows peeking into subdirectories + for parent_dir in parent_directories: + if parent_dir == "": + # Root level parent - include all top-level directories and their subdirs + if len(file_dir_parts) >= 1: + sibling_directories.add(file_dir) + else: + # Check if file_dir starts with any parent directory path + if file_dir.startswith(parent_dir + "/") or file_dir == parent_dir: + sibling_directories.add(file_dir) + else: + # File is at root level, check if root is a parent directory + if "" in parent_directories: + sibling_directories.add("") + + # Also add subdirectories of filter directories themselves + subdirectories = set() + for code_file in self.root: + if not code_file.file_path: + continue + + normalized_file_path = code_file.file_path.replace("\\", "/") + file_parts = normalized_file_path.split("/") + + if len(file_parts) > 1: + file_dir = "/".join(file_parts[:-1]) + + # Check if this directory is a subdirectory of any filter directory + for filter_dir in filter_directories: + if filter_dir == "": + # Root level filter - include everything + subdirectories.add(file_dir) + elif file_dir.startswith(filter_dir + "/") or file_dir == filter_dir: + subdirectories.add(file_dir) + + # Combine all relevant directories + all_relevant_directories = filter_directories.union(sibling_directories).union(subdirectories) + + # Find all files that should be included + relevant_files = [] # Files that should show full content (filtered files) + sibling_files = [] # Files that should show as context (siblings and directory contents) for code_file in self.root: if not code_file.file_path: @@ -686,14 +766,14 @@ def _build_tree_dict(self, filter_paths: list = None): relevant_files.append(code_file) continue - # Check if this file is a sibling of any filtered file + # Check if this file is in any of the relevant directories file_parts = normalized_file_path.split("/") if len(file_parts) > 1: file_dir = "/".join(file_parts[:-1]) else: file_dir = "" - if file_dir in filter_directories: + if file_dir in all_relevant_directories: sibling_files.append(code_file) # Build tree structure from relevant files (with full content) @@ -714,7 +794,7 @@ def _build_tree_dict(self, filter_paths: list = None): current_level[part] = {"_type": "directory"} current_level = current_level[part] - # Add sibling files (without full content) + # Add sibling files and directory contents (show content for all when filtering for broader context) for code_file in sibling_files: if not code_file.file_path: continue @@ -726,7 +806,10 @@ def _build_tree_dict(self, filter_paths: list = None): current_level = tree for i, part in enumerate(path_parts): if i == len(path_parts) - 1: # This is the file - current_level[part] = {"_type": "file", "_data": code_file, "_show_content": True} + # Check if file already exists (might have been added as relevant_files) + if part not in current_level: + # Show content for all files to provide broader context + current_level[part] = {"_type": "file", "_data": code_file, "_show_content": True} else: # This is a directory if part not in current_level: current_level[part] = {"_type": "directory"} diff --git a/codetide/mcp/tools/patch_code/parser.py b/codetide/mcp/tools/patch_code/parser.py index 8a58e6c..33169f8 100644 --- a/codetide/mcp/tools/patch_code/parser.py +++ b/codetide/mcp/tools/patch_code/parser.py @@ -278,6 +278,8 @@ def parse(self) -> None: # ---------- DELETE ---------- # if path := self.read_str("*** Delete File: "): + if self.rootpath is not None: + path = str(self.rootpath / path) if path in self.patch.actions: raise DiffError(f"Duplicate delete for file: {path}") if path not in self.current_files: @@ -287,6 +289,8 @@ def parse(self) -> None: # ---------- ADD ---------- # if path := self.read_str("*** Add File: "): + if self.rootpath is not None: + path = str(self.rootpath / path) if path in self.patch.actions: raise DiffError(f"Duplicate add for file: {path}") # The check for file existence is now handled in `process_patch`