From 3ed6cffc64ba98afaa8594785e8caa65666eca4d Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Wed, 17 Sep 2025 23:04:43 +0100 Subject: [PATCH 1/6] feat(agent,ui): add direct_mode command to skip repo analysis --- codetide/agents/tide/agent.py | 184 ++++++++++++----------- codetide/agents/tide/ui/agent_tide_ui.py | 9 +- 2 files changed, 105 insertions(+), 88 deletions(-) diff --git a/codetide/agents/tide/agent.py b/codetide/agents/tide/agent.py index 295fb69..4e0974c 100644 --- a/codetide/agents/tide/agent.py +++ b/codetide/agents/tide/agent.py @@ -58,6 +58,7 @@ class AgentTide(BaseModel): _last_code_identifers :Optional[Set[str]]=set() _last_code_context :Optional[str] = None _has_patch :bool=False + _direct_mode :bool=False model_config = ConfigDict(arbitrary_types_allowed=True) @@ -138,92 +139,103 @@ async def agent_loop(self, codeIdentifiers :Optional[List[str]]=None): ... else: autocomplete = AutoComplete(self.tide.cached_ids) - matches = autocomplete.extract_words_from_text("\n\n".join(self.history)) - - # --- Begin Unified Identifier Retrieval --- - identifiers_accum = set(matches["all_found_words"]) if codeIdentifiers is None else set(codeIdentifiers + matches["all_found_words"]) - modify_accum = set() - reasoning_accum = [] - repo_tree = None - smart_search_attempts = 0 - max_smart_search_attempts = 3 - done = False - previous_reason = None - - while not done: - expand_paths = ["./"] - # 1. SmartCodeSearch to filter repo tree - if repo_tree is None or smart_search_attempts > 0: - repo_history = self.history - if previous_reason: - repo_history += [previous_reason] + if self._direct_mode: + self.contextIdentifiers = None + exact_matches = autocomplete.extract_words_from_text(self.history[-1])["all_found_words"] + self.modifyIdentifiers = self.tide._as_file_paths(exact_matches) + codeIdentifiers = self.modifyIdentifiers + self._direct_mode = False + + else: + matches = autocomplete.extract_words_from_text("\n\n".join(self.history)) + + # --- Begin Unified Identifier Retrieval --- + identifiers_accum = set(matches["all_found_words"]) if codeIdentifiers is None else set(codeIdentifiers + matches["all_found_words"]) + modify_accum = set() + reasoning_accum = [] + repo_tree = None + smart_search_attempts = 0 + max_smart_search_attempts = 3 + done = False + previous_reason = None + + while not done: + expand_paths = ["./"] + # 1. SmartCodeSearch to filter repo tree + if repo_tree is None or smart_search_attempts > 0: + repo_history = self.history + if previous_reason: + repo_history += [previous_reason] + + repo_tree = await self.get_repo_tree_from_user_prompt(self.history, include_modules=bool(smart_search_attempts), expand_paths=expand_paths) + + # 2. Single LLM call with unified prompt + # Pass accumulated identifiers for context if this isn't the first iteration + accumulated_context = "\n".join( + sorted((identifiers_accum or set()) | (modify_accum or set())) + ) if (identifiers_accum or modify_accum) else "" + + unified_response = await self.llm.acomplete( + self.history, + system_prompt=[GET_CODE_IDENTIFIERS_UNIFIED_PROMPT.format( + DATE=TODAY, + SUPPORTED_LANGUAGES=SUPPORTED_LANGUAGES, + IDENTIFIERS=accumulated_context + )], + prefix_prompt=repo_tree, + stream=False + ) + print(f"{unified_response=}") + + # Parse the unified response + contextIdentifiers = parse_blocks(unified_response, block_word="Context Identifiers", multiple=False) + modifyIdentifiers = parse_blocks(unified_response, block_word="Modify Identifiers", multiple=False) + expandPaths = parse_blocks(unified_response, block_word="Expand Paths", multiple=False) + + # Extract reasoning (everything before the first "*** Begin") + reasoning_parts = unified_response.split("*** Begin") + if reasoning_parts: + reasoning_accum.append(reasoning_parts[0].strip()) + previous_reason = reasoning_accum[-1] - repo_tree = await self.get_repo_tree_from_user_prompt(self.history, include_modules=bool(smart_search_attempts), expand_paths=expand_paths) - - # 2. Single LLM call with unified prompt - # Pass accumulated identifiers for context if this isn't the first iteration - accumulated_context = "\n".join( - sorted((identifiers_accum or set()) | (modify_accum or set())) - ) if (identifiers_accum or modify_accum) else "" - - unified_response = await self.llm.acomplete( - self.history, - system_prompt=[GET_CODE_IDENTIFIERS_UNIFIED_PROMPT.format( - DATE=TODAY, - SUPPORTED_LANGUAGES=SUPPORTED_LANGUAGES, - IDENTIFIERS=accumulated_context - )], - prefix_prompt=repo_tree, - stream=False - ) - - # Parse the unified response - contextIdentifiers = parse_blocks(unified_response, block_word="Context Identifiers", multiple=False) - modifyIdentifiers = parse_blocks(unified_response, block_word="Modify Identifiers", multiple=False) - expandPaths = parse_blocks(unified_response, block_word="Expand Paths", multiple=False) - - # Extract reasoning (everything before the first "*** Begin") - reasoning_parts = unified_response.split("*** Begin") - if reasoning_parts: - reasoning_accum.append(reasoning_parts[0].strip()) - previous_reason = reasoning_accum[-1] - - # Accumulate identifiers - if contextIdentifiers: - if smart_search_attempts == 0: - ### clean wrongly mismtatched idenitifers - identifiers_accum = set() - for ident in contextIdentifiers.splitlines(): - if ident := self.get_valid_identifier(autocomplete, ident.strip()): - identifiers_accum.add(ident) - - if modifyIdentifiers: - for ident in modifyIdentifiers.splitlines(): - if ident := self.get_valid_identifier(autocomplete, ident.strip()): - modify_accum.add(ident.strip()) - - if expandPaths: - expand_paths = [ - path for ident in expandPaths if (path := self.get_valid_identifier(autocomplete, ident.strip())) - ] - - # Check if we have enough identifiers (unified prompt includes this decision) - if "ENOUGH_IDENTIFIERS: TRUE" in unified_response.upper(): - done = True - else: - smart_search_attempts += 1 - if smart_search_attempts >= max_smart_search_attempts: + # Accumulate identifiers + if contextIdentifiers: + if smart_search_attempts == 0: + ### clean wrongly mismtatched idenitifers + identifiers_accum = set() + for ident in contextIdentifiers.splitlines(): + if ident := self.get_valid_identifier(autocomplete, ident.strip()): + identifiers_accum.add(ident) + + if modifyIdentifiers: + for ident in modifyIdentifiers.splitlines(): + if ident := self.get_valid_identifier(autocomplete, ident.strip()): + modify_accum.add(ident.strip()) + + if expandPaths: + expand_paths = [ + path for ident in expandPaths if (path := self.get_valid_identifier(autocomplete, ident.strip())) + ] + + # Check if we have enough identifiers (unified prompt includes this decision) + if "ENOUGH_IDENTIFIERS: TRUE" in unified_response.upper(): done = True - - # Finalize identifiers - self.reasoning = "\n\n".join(reasoning_accum) - self.contextIdentifiers = list(identifiers_accum) if identifiers_accum else None - self.modifyIdentifiers = list(modify_accum) if modify_accum else None - - codeIdentifiers = self.contextIdentifiers or [] - if self.modifyIdentifiers: - self.modifyIdentifiers = self.tide._as_file_paths(self.modifyIdentifiers) - codeIdentifiers.extend(self.modifyIdentifiers) + else: + smart_search_attempts += 1 + if smart_search_attempts >= max_smart_search_attempts: + done = True + + # Finalize identifiers + self.reasoning = "\n\n".join(reasoning_accum) + self.contextIdentifiers = list(identifiers_accum) if identifiers_accum else None + self.modifyIdentifiers = list(modify_accum) if modify_accum else None + + codeIdentifiers = self.contextIdentifiers or [] + if self.modifyIdentifiers: + self.modifyIdentifiers = self.tide._as_file_paths(self.modifyIdentifiers) + codeIdentifiers.extend(self.modifyIdentifiers) + # TODO preserve passed identifiers by the user + codeIdentifiers += matches["all_found_words"] # --- End Unified Identifier Retrieval --- if codeIdentifiers: @@ -232,7 +244,7 @@ async def agent_loop(self, codeIdentifiers :Optional[List[str]]=None): if not codeContext: codeContext = REPO_TREE_CONTEXT_PROMPT.format(REPO_TREE=self.tide.codebase.get_tree_view()) - readmeFile = self.tide.get("README.md", as_string_list=True) + readmeFile = self.tide.get(["README.md"] + matches["all_found_words"] , as_string_list=True) if readmeFile: codeContext = "\n".join([codeContext, README_CONTEXT_PROMPT.format(README=readmeFile)]) @@ -431,5 +443,7 @@ async def _handle_commands(self, command :str) -> str: context = "" if command == "commit": context = await self.prepare_commit() + elif command == "direct_mode": + self._direct_mode = True return context diff --git a/codetide/agents/tide/ui/agent_tide_ui.py b/codetide/agents/tide/ui/agent_tide_ui.py index 961928c..2b9945d 100644 --- a/codetide/agents/tide/ui/agent_tide_ui.py +++ b/codetide/agents/tide/ui/agent_tide_ui.py @@ -43,7 +43,8 @@ def __init__(self, project_path: Path = Path("./"), history :Optional[list]=None "review": CMD_CODE_REVIEW_PROMPT, "test": CMD_WRITE_TESTS_PROMPT, "commit": CMD_COMMIT_PROMPT, - "brainstorm": CMD_BRAINSTORM_PROMPT + "brainstorm": CMD_BRAINSTORM_PROMPT, + "direct_mode": "" } self.session_id = session_id if session_id else ulid() @@ -52,7 +53,8 @@ def __init__(self, project_path: Path = Path("./"), history :Optional[list]=None {"id": "test", "icon": "flask-conical", "description": "Test file(s) or object(s)"}, {"id": "commit", "icon": "git-commit", "description": "Commit changed files"}, {"id": "plan", "icon": "notepad-text-dashed", "description": "Create a step-by-step task plan"}, - {"id": "brainstorm", "icon": "brain-circuit", "description": "Brainstorm and discuss solutions (no code generation)"} + {"id": "brainstorm", "icon": "brain-circuit", "description": "Brainstorm and discuss solutions (no code generation)"}, + {"id": "direct_mode", "icon": "search-code", "description": "Skip repository analysis and jump straight into code generation with the specified context (identifiers or paths)"} ] async def load(self): @@ -133,4 +135,5 @@ def settings(self): async def get_command_prompt(self, command :str)->Optional[str]: context = await self.agent_tide._handle_commands(command) - return f"{self.commands_prompts.get(command)} {context}" + return f"{self.commands_prompts.get(command)} {context}".strip() + From 4f8b781be91d3e2e639ef04589ad5764e6876279 Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Wed, 17 Sep 2025 23:08:48 +0100 Subject: [PATCH 2/6] prompt: restrict identifiers to code elements not package deps --- codetide/agents/tide/prompts.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/codetide/agents/tide/prompts.py b/codetide/agents/tide/prompts.py index a6c12d1..99b279f 100644 --- a/codetide/agents/tide/prompts.py +++ b/codetide/agents/tide/prompts.py @@ -457,8 +457,12 @@ - Code identifiers should use dot notation (e.g., `module.submodule.Class.method`) without file extensions 2. **Identifier Categories:** - - **Context Identifiers:** Elements needed to understand or provide context for the request, but not directly modified - - **Modify Identifiers:** Elements that will likely require direct modification to fulfill the request + - **Context Identifiers:** Only include identifiers that correspond to functions, classes, methods, variables, or attributes defined in the codebase. Do **not** include package names, import statements, or dependencies based solely on import/package presence—even if they are present in the accumulated context. + - **Modify Identifiers:** Only include identifiers that correspond to functions, classes, methods, variables, or attributes that will likely require direct modification. Do **not** include package names, import statements, or dependencies based solely on import/package presence—even if they are present in the accumulated context. + +3. **ABSOLUTE PROHIBITION ON DEPENDENCY INCLUSION:** + - Never include identifiers in the Context Identifiers or Modify Identifiers sections that represent only package imports, external dependencies, or modules that are not actual code elements (functions, classes, methods, variables, or attributes) defined in the codebase. + - Even if a package or import name is present in the accumulated context, do not include it unless it refers to a concrete function, class, method, variable, or attribute in the codebase. **UNIFIED ANALYSIS PROTOCOL** From 5b83c253a8e52ad8282dcf916004e8f4121d9343 Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Wed, 17 Sep 2025 23:34:00 +0100 Subject: [PATCH 3/6] feat(autocomplete): add max_matches_per_word to extract_words_from_text --- codetide/autocomplete.py | 50 ++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/codetide/autocomplete.py b/codetide/autocomplete.py index 19f42cc..f1f95f4 100644 --- a/codetide/autocomplete.py +++ b/codetide/autocomplete.py @@ -170,15 +170,24 @@ def validate_paths(self, file_paths): raise ValueError(f"Invalid file path: '{path}'") return valid_paths - def extract_words_from_text(self, text: str, similarity_threshold: float = 0.6, case_sensitive: bool = False) -> dict: + def extract_words_from_text( + self, + text: str, + similarity_threshold: float = 0.6, + case_sensitive: bool = False, + max_matches_per_word: int = None + ) -> dict: """ Extract words from the word list that are present in the given text, including similar words (potential typos). - + Optionally limit the number of matches returned per word found in the text. + Args: text (str): The input text to analyze similarity_threshold (float): Minimum similarity score for fuzzy matching (0.0 to 1.0) case_sensitive (bool): Whether matching should be case sensitive - + max_matches_per_word (int, optional): Maximum number of matches to return per word in the text. + If None, all matches are returned. If 1, only the top match per word is returned. + Returns: dict: Dictionary containing: - 'exact_matches': List of words found exactly in the text @@ -191,14 +200,14 @@ def extract_words_from_text(self, text: str, similarity_threshold: float = 0.6, 'fuzzy_matches': [], 'all_found_words': [] } - + # Split text into words (remove punctuation and split by whitespace) text_words = re.findall(r'\b\w+\b', text) - + exact_matches = [] fuzzy_matches = [] all_found_words = set() - + # Convert to appropriate case for comparison if case_sensitive: text_words_search = text_words @@ -206,49 +215,56 @@ def extract_words_from_text(self, text: str, similarity_threshold: float = 0.6, else: text_words_search = [word.lower() for word in text_words] word_list_search = [word.lower() for word in self.words] - + # Find exact matches for i, text_word in enumerate(text_words_search): + per_word_matches = 0 for j, list_word in enumerate(word_list_search): if text_word == list_word: original_word = self.words[j] if original_word not in all_found_words: exact_matches.append(original_word) all_found_words.add(original_word) - + per_word_matches += 1 + if max_matches_per_word is not None and per_word_matches >= max_matches_per_word: + break + # Find fuzzy matches for words that didn't match exactly matched_text_words = set() for match in exact_matches: search_match = match if case_sensitive else match.lower() for i, text_word in enumerate(text_words_search): if text_word == search_match: - matched_text_words.add(i) - + matched_text_words.add(i) + # Check remaining text words for fuzzy matches for i, text_word in enumerate(text_words_search): if i in matched_text_words: continue - - # Find the most similar word from our word list + + # Find the most similar word(s) from our word list best_matches = [] for j, list_word in enumerate(word_list_search): similarity = difflib.SequenceMatcher(None, text_word, list_word).ratio() if similarity >= similarity_threshold: best_matches.append((self.words[j], text_words[i], similarity)) - - # Sort by similarity and add to results + + # Sort by similarity and add up to max_matches_per_word to results if best_matches: best_matches.sort(key=lambda x: x[2], reverse=True) - for match in best_matches: + matches_to_add = best_matches + if max_matches_per_word is not None: + matches_to_add = best_matches[:max_matches_per_word] + for match in matches_to_add: word_from_list, word_in_text, score = match if word_from_list not in all_found_words: fuzzy_matches.append((word_from_list, word_in_text, score)) all_found_words.add(word_from_list) - + # Sort results exact_matches.sort() fuzzy_matches.sort(key=lambda x: x[2], reverse=True) # Sort by similarity score - + return { 'exact_matches': exact_matches, 'fuzzy_matches': fuzzy_matches, From b407f510492d540da39cbb3cf3ef66fc73cd59b3 Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Wed, 17 Sep 2025 23:34:39 +0100 Subject: [PATCH 4/6] feat(core): add non_import_unique_ids to CodeBase and update cached_ids --- codetide/__init__.py | 2 +- codetide/core/models.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/codetide/__init__.py b/codetide/__init__.py index fa98570..47bd818 100644 --- a/codetide/__init__.py +++ b/codetide/__init__.py @@ -100,7 +100,7 @@ def relative_filepaths(self)->List[str]: @property def cached_ids(self)->List[str]: - return self.codebase.unique_ids+self.relative_filepaths + return self.codebase.non_import_unique_ids+self.relative_filepaths @property def repo(self)->Optional[pygit2.Repository]: diff --git a/codetide/core/models.py b/codetide/core/models.py index ce89ed6..87602ef 100644 --- a/codetide/core/models.py +++ b/codetide/core/models.py @@ -1225,4 +1225,12 @@ def unique_ids(self)->List[str]: return list(self._cached_elements.keys()) + @property + def non_import_unique_ids(self)->List[str]: + + return [ + non_import_id for non_import_id, value in self.cached_elements.items() + if not isinstance(value, ImportStatement) + ] + # TODO add mcp support for agent -> leverage CodeFile pydantic model to apply changes via unique_ids and generate file from there \ No newline at end of file From 4a03d42dac8c1a42e281fa1075136a1d8edf1b40 Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Wed, 17 Sep 2025 23:36:18 +0100 Subject: [PATCH 5/6] feat(agent): use max_matches_per_word in autocomplete extraction --- codetide/agents/tide/agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codetide/agents/tide/agent.py b/codetide/agents/tide/agent.py index 4e0974c..0919ebe 100644 --- a/codetide/agents/tide/agent.py +++ b/codetide/agents/tide/agent.py @@ -141,13 +141,13 @@ async def agent_loop(self, codeIdentifiers :Optional[List[str]]=None): autocomplete = AutoComplete(self.tide.cached_ids) if self._direct_mode: self.contextIdentifiers = None - exact_matches = autocomplete.extract_words_from_text(self.history[-1])["all_found_words"] + exact_matches = autocomplete.extract_words_from_text(self.history[-1], max_matches_per_word=1)["all_found_words"] self.modifyIdentifiers = self.tide._as_file_paths(exact_matches) codeIdentifiers = self.modifyIdentifiers self._direct_mode = False else: - matches = autocomplete.extract_words_from_text("\n\n".join(self.history)) + matches = autocomplete.extract_words_from_text("\n\n".join(self.history), max_matches_per_word=1) # --- Begin Unified Identifier Retrieval --- identifiers_accum = set(matches["all_found_words"]) if codeIdentifiers is None else set(codeIdentifiers + matches["all_found_words"]) From dc4c0aa01bb7c682b12380e3654d5777cb0f8f2c Mon Sep 17 00:00:00 2001 From: BrunoV21 Date: Wed, 17 Sep 2025 23:36:39 +0100 Subject: [PATCH 6/6] chore(gitignore): ignore db-shm, .chainlit, and pgdata directories --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 0b1c523..1dde4d9 100644 --- a/.gitignore +++ b/.gitignore @@ -186,4 +186,6 @@ examples/hf_demo_space/chainlit.md examples/hf_demo_space/public/ database.db-journal +database.db-shm .chainlit/ +pgdata/ \ No newline at end of file