diff --git a/.gitignore b/.gitignore index 0b1c523..1dde4d9 100644 --- a/.gitignore +++ b/.gitignore @@ -186,4 +186,6 @@ examples/hf_demo_space/chainlit.md examples/hf_demo_space/public/ database.db-journal +database.db-shm .chainlit/ +pgdata/ \ No newline at end of file diff --git a/codetide/__init__.py b/codetide/__init__.py index fa98570..47bd818 100644 --- a/codetide/__init__.py +++ b/codetide/__init__.py @@ -100,7 +100,7 @@ def relative_filepaths(self)->List[str]: @property def cached_ids(self)->List[str]: - return self.codebase.unique_ids+self.relative_filepaths + return self.codebase.non_import_unique_ids+self.relative_filepaths @property def repo(self)->Optional[pygit2.Repository]: diff --git a/codetide/agents/tide/agent.py b/codetide/agents/tide/agent.py index 295fb69..0919ebe 100644 --- a/codetide/agents/tide/agent.py +++ b/codetide/agents/tide/agent.py @@ -58,6 +58,7 @@ class AgentTide(BaseModel): _last_code_identifers :Optional[Set[str]]=set() _last_code_context :Optional[str] = None _has_patch :bool=False + _direct_mode :bool=False model_config = ConfigDict(arbitrary_types_allowed=True) @@ -138,92 +139,103 @@ async def agent_loop(self, codeIdentifiers :Optional[List[str]]=None): ... else: autocomplete = AutoComplete(self.tide.cached_ids) - matches = autocomplete.extract_words_from_text("\n\n".join(self.history)) - - # --- Begin Unified Identifier Retrieval --- - identifiers_accum = set(matches["all_found_words"]) if codeIdentifiers is None else set(codeIdentifiers + matches["all_found_words"]) - modify_accum = set() - reasoning_accum = [] - repo_tree = None - smart_search_attempts = 0 - max_smart_search_attempts = 3 - done = False - previous_reason = None - - while not done: - expand_paths = ["./"] - # 1. SmartCodeSearch to filter repo tree - if repo_tree is None or smart_search_attempts > 0: - repo_history = self.history - if previous_reason: - repo_history += [previous_reason] + if self._direct_mode: + self.contextIdentifiers = None + exact_matches = autocomplete.extract_words_from_text(self.history[-1], max_matches_per_word=1)["all_found_words"] + self.modifyIdentifiers = self.tide._as_file_paths(exact_matches) + codeIdentifiers = self.modifyIdentifiers + self._direct_mode = False + + else: + matches = autocomplete.extract_words_from_text("\n\n".join(self.history), max_matches_per_word=1) + + # --- Begin Unified Identifier Retrieval --- + identifiers_accum = set(matches["all_found_words"]) if codeIdentifiers is None else set(codeIdentifiers + matches["all_found_words"]) + modify_accum = set() + reasoning_accum = [] + repo_tree = None + smart_search_attempts = 0 + max_smart_search_attempts = 3 + done = False + previous_reason = None + + while not done: + expand_paths = ["./"] + # 1. SmartCodeSearch to filter repo tree + if repo_tree is None or smart_search_attempts > 0: + repo_history = self.history + if previous_reason: + repo_history += [previous_reason] + + repo_tree = await self.get_repo_tree_from_user_prompt(self.history, include_modules=bool(smart_search_attempts), expand_paths=expand_paths) + + # 2. Single LLM call with unified prompt + # Pass accumulated identifiers for context if this isn't the first iteration + accumulated_context = "\n".join( + sorted((identifiers_accum or set()) | (modify_accum or set())) + ) if (identifiers_accum or modify_accum) else "" + + unified_response = await self.llm.acomplete( + self.history, + system_prompt=[GET_CODE_IDENTIFIERS_UNIFIED_PROMPT.format( + DATE=TODAY, + SUPPORTED_LANGUAGES=SUPPORTED_LANGUAGES, + IDENTIFIERS=accumulated_context + )], + prefix_prompt=repo_tree, + stream=False + ) + print(f"{unified_response=}") + + # Parse the unified response + contextIdentifiers = parse_blocks(unified_response, block_word="Context Identifiers", multiple=False) + modifyIdentifiers = parse_blocks(unified_response, block_word="Modify Identifiers", multiple=False) + expandPaths = parse_blocks(unified_response, block_word="Expand Paths", multiple=False) + + # Extract reasoning (everything before the first "*** Begin") + reasoning_parts = unified_response.split("*** Begin") + if reasoning_parts: + reasoning_accum.append(reasoning_parts[0].strip()) + previous_reason = reasoning_accum[-1] - repo_tree = await self.get_repo_tree_from_user_prompt(self.history, include_modules=bool(smart_search_attempts), expand_paths=expand_paths) - - # 2. Single LLM call with unified prompt - # Pass accumulated identifiers for context if this isn't the first iteration - accumulated_context = "\n".join( - sorted((identifiers_accum or set()) | (modify_accum or set())) - ) if (identifiers_accum or modify_accum) else "" - - unified_response = await self.llm.acomplete( - self.history, - system_prompt=[GET_CODE_IDENTIFIERS_UNIFIED_PROMPT.format( - DATE=TODAY, - SUPPORTED_LANGUAGES=SUPPORTED_LANGUAGES, - IDENTIFIERS=accumulated_context - )], - prefix_prompt=repo_tree, - stream=False - ) - - # Parse the unified response - contextIdentifiers = parse_blocks(unified_response, block_word="Context Identifiers", multiple=False) - modifyIdentifiers = parse_blocks(unified_response, block_word="Modify Identifiers", multiple=False) - expandPaths = parse_blocks(unified_response, block_word="Expand Paths", multiple=False) - - # Extract reasoning (everything before the first "*** Begin") - reasoning_parts = unified_response.split("*** Begin") - if reasoning_parts: - reasoning_accum.append(reasoning_parts[0].strip()) - previous_reason = reasoning_accum[-1] - - # Accumulate identifiers - if contextIdentifiers: - if smart_search_attempts == 0: - ### clean wrongly mismtatched idenitifers - identifiers_accum = set() - for ident in contextIdentifiers.splitlines(): - if ident := self.get_valid_identifier(autocomplete, ident.strip()): - identifiers_accum.add(ident) - - if modifyIdentifiers: - for ident in modifyIdentifiers.splitlines(): - if ident := self.get_valid_identifier(autocomplete, ident.strip()): - modify_accum.add(ident.strip()) - - if expandPaths: - expand_paths = [ - path for ident in expandPaths if (path := self.get_valid_identifier(autocomplete, ident.strip())) - ] - - # Check if we have enough identifiers (unified prompt includes this decision) - if "ENOUGH_IDENTIFIERS: TRUE" in unified_response.upper(): - done = True - else: - smart_search_attempts += 1 - if smart_search_attempts >= max_smart_search_attempts: + # Accumulate identifiers + if contextIdentifiers: + if smart_search_attempts == 0: + ### clean wrongly mismtatched idenitifers + identifiers_accum = set() + for ident in contextIdentifiers.splitlines(): + if ident := self.get_valid_identifier(autocomplete, ident.strip()): + identifiers_accum.add(ident) + + if modifyIdentifiers: + for ident in modifyIdentifiers.splitlines(): + if ident := self.get_valid_identifier(autocomplete, ident.strip()): + modify_accum.add(ident.strip()) + + if expandPaths: + expand_paths = [ + path for ident in expandPaths if (path := self.get_valid_identifier(autocomplete, ident.strip())) + ] + + # Check if we have enough identifiers (unified prompt includes this decision) + if "ENOUGH_IDENTIFIERS: TRUE" in unified_response.upper(): done = True - - # Finalize identifiers - self.reasoning = "\n\n".join(reasoning_accum) - self.contextIdentifiers = list(identifiers_accum) if identifiers_accum else None - self.modifyIdentifiers = list(modify_accum) if modify_accum else None - - codeIdentifiers = self.contextIdentifiers or [] - if self.modifyIdentifiers: - self.modifyIdentifiers = self.tide._as_file_paths(self.modifyIdentifiers) - codeIdentifiers.extend(self.modifyIdentifiers) + else: + smart_search_attempts += 1 + if smart_search_attempts >= max_smart_search_attempts: + done = True + + # Finalize identifiers + self.reasoning = "\n\n".join(reasoning_accum) + self.contextIdentifiers = list(identifiers_accum) if identifiers_accum else None + self.modifyIdentifiers = list(modify_accum) if modify_accum else None + + codeIdentifiers = self.contextIdentifiers or [] + if self.modifyIdentifiers: + self.modifyIdentifiers = self.tide._as_file_paths(self.modifyIdentifiers) + codeIdentifiers.extend(self.modifyIdentifiers) + # TODO preserve passed identifiers by the user + codeIdentifiers += matches["all_found_words"] # --- End Unified Identifier Retrieval --- if codeIdentifiers: @@ -232,7 +244,7 @@ async def agent_loop(self, codeIdentifiers :Optional[List[str]]=None): if not codeContext: codeContext = REPO_TREE_CONTEXT_PROMPT.format(REPO_TREE=self.tide.codebase.get_tree_view()) - readmeFile = self.tide.get("README.md", as_string_list=True) + readmeFile = self.tide.get(["README.md"] + matches["all_found_words"] , as_string_list=True) if readmeFile: codeContext = "\n".join([codeContext, README_CONTEXT_PROMPT.format(README=readmeFile)]) @@ -431,5 +443,7 @@ async def _handle_commands(self, command :str) -> str: context = "" if command == "commit": context = await self.prepare_commit() + elif command == "direct_mode": + self._direct_mode = True return context diff --git a/codetide/agents/tide/prompts.py b/codetide/agents/tide/prompts.py index a6c12d1..99b279f 100644 --- a/codetide/agents/tide/prompts.py +++ b/codetide/agents/tide/prompts.py @@ -457,8 +457,12 @@ - Code identifiers should use dot notation (e.g., `module.submodule.Class.method`) without file extensions 2. **Identifier Categories:** - - **Context Identifiers:** Elements needed to understand or provide context for the request, but not directly modified - - **Modify Identifiers:** Elements that will likely require direct modification to fulfill the request + - **Context Identifiers:** Only include identifiers that correspond to functions, classes, methods, variables, or attributes defined in the codebase. Do **not** include package names, import statements, or dependencies based solely on import/package presence—even if they are present in the accumulated context. + - **Modify Identifiers:** Only include identifiers that correspond to functions, classes, methods, variables, or attributes that will likely require direct modification. Do **not** include package names, import statements, or dependencies based solely on import/package presence—even if they are present in the accumulated context. + +3. **ABSOLUTE PROHIBITION ON DEPENDENCY INCLUSION:** + - Never include identifiers in the Context Identifiers or Modify Identifiers sections that represent only package imports, external dependencies, or modules that are not actual code elements (functions, classes, methods, variables, or attributes) defined in the codebase. + - Even if a package or import name is present in the accumulated context, do not include it unless it refers to a concrete function, class, method, variable, or attribute in the codebase. **UNIFIED ANALYSIS PROTOCOL** diff --git a/codetide/agents/tide/ui/agent_tide_ui.py b/codetide/agents/tide/ui/agent_tide_ui.py index 961928c..2b9945d 100644 --- a/codetide/agents/tide/ui/agent_tide_ui.py +++ b/codetide/agents/tide/ui/agent_tide_ui.py @@ -43,7 +43,8 @@ def __init__(self, project_path: Path = Path("./"), history :Optional[list]=None "review": CMD_CODE_REVIEW_PROMPT, "test": CMD_WRITE_TESTS_PROMPT, "commit": CMD_COMMIT_PROMPT, - "brainstorm": CMD_BRAINSTORM_PROMPT + "brainstorm": CMD_BRAINSTORM_PROMPT, + "direct_mode": "" } self.session_id = session_id if session_id else ulid() @@ -52,7 +53,8 @@ def __init__(self, project_path: Path = Path("./"), history :Optional[list]=None {"id": "test", "icon": "flask-conical", "description": "Test file(s) or object(s)"}, {"id": "commit", "icon": "git-commit", "description": "Commit changed files"}, {"id": "plan", "icon": "notepad-text-dashed", "description": "Create a step-by-step task plan"}, - {"id": "brainstorm", "icon": "brain-circuit", "description": "Brainstorm and discuss solutions (no code generation)"} + {"id": "brainstorm", "icon": "brain-circuit", "description": "Brainstorm and discuss solutions (no code generation)"}, + {"id": "direct_mode", "icon": "search-code", "description": "Skip repository analysis and jump straight into code generation with the specified context (identifiers or paths)"} ] async def load(self): @@ -133,4 +135,5 @@ def settings(self): async def get_command_prompt(self, command :str)->Optional[str]: context = await self.agent_tide._handle_commands(command) - return f"{self.commands_prompts.get(command)} {context}" + return f"{self.commands_prompts.get(command)} {context}".strip() + diff --git a/codetide/autocomplete.py b/codetide/autocomplete.py index 19f42cc..f1f95f4 100644 --- a/codetide/autocomplete.py +++ b/codetide/autocomplete.py @@ -170,15 +170,24 @@ def validate_paths(self, file_paths): raise ValueError(f"Invalid file path: '{path}'") return valid_paths - def extract_words_from_text(self, text: str, similarity_threshold: float = 0.6, case_sensitive: bool = False) -> dict: + def extract_words_from_text( + self, + text: str, + similarity_threshold: float = 0.6, + case_sensitive: bool = False, + max_matches_per_word: int = None + ) -> dict: """ Extract words from the word list that are present in the given text, including similar words (potential typos). - + Optionally limit the number of matches returned per word found in the text. + Args: text (str): The input text to analyze similarity_threshold (float): Minimum similarity score for fuzzy matching (0.0 to 1.0) case_sensitive (bool): Whether matching should be case sensitive - + max_matches_per_word (int, optional): Maximum number of matches to return per word in the text. + If None, all matches are returned. If 1, only the top match per word is returned. + Returns: dict: Dictionary containing: - 'exact_matches': List of words found exactly in the text @@ -191,14 +200,14 @@ def extract_words_from_text(self, text: str, similarity_threshold: float = 0.6, 'fuzzy_matches': [], 'all_found_words': [] } - + # Split text into words (remove punctuation and split by whitespace) text_words = re.findall(r'\b\w+\b', text) - + exact_matches = [] fuzzy_matches = [] all_found_words = set() - + # Convert to appropriate case for comparison if case_sensitive: text_words_search = text_words @@ -206,49 +215,56 @@ def extract_words_from_text(self, text: str, similarity_threshold: float = 0.6, else: text_words_search = [word.lower() for word in text_words] word_list_search = [word.lower() for word in self.words] - + # Find exact matches for i, text_word in enumerate(text_words_search): + per_word_matches = 0 for j, list_word in enumerate(word_list_search): if text_word == list_word: original_word = self.words[j] if original_word not in all_found_words: exact_matches.append(original_word) all_found_words.add(original_word) - + per_word_matches += 1 + if max_matches_per_word is not None and per_word_matches >= max_matches_per_word: + break + # Find fuzzy matches for words that didn't match exactly matched_text_words = set() for match in exact_matches: search_match = match if case_sensitive else match.lower() for i, text_word in enumerate(text_words_search): if text_word == search_match: - matched_text_words.add(i) - + matched_text_words.add(i) + # Check remaining text words for fuzzy matches for i, text_word in enumerate(text_words_search): if i in matched_text_words: continue - - # Find the most similar word from our word list + + # Find the most similar word(s) from our word list best_matches = [] for j, list_word in enumerate(word_list_search): similarity = difflib.SequenceMatcher(None, text_word, list_word).ratio() if similarity >= similarity_threshold: best_matches.append((self.words[j], text_words[i], similarity)) - - # Sort by similarity and add to results + + # Sort by similarity and add up to max_matches_per_word to results if best_matches: best_matches.sort(key=lambda x: x[2], reverse=True) - for match in best_matches: + matches_to_add = best_matches + if max_matches_per_word is not None: + matches_to_add = best_matches[:max_matches_per_word] + for match in matches_to_add: word_from_list, word_in_text, score = match if word_from_list not in all_found_words: fuzzy_matches.append((word_from_list, word_in_text, score)) all_found_words.add(word_from_list) - + # Sort results exact_matches.sort() fuzzy_matches.sort(key=lambda x: x[2], reverse=True) # Sort by similarity score - + return { 'exact_matches': exact_matches, 'fuzzy_matches': fuzzy_matches, diff --git a/codetide/core/models.py b/codetide/core/models.py index ce89ed6..87602ef 100644 --- a/codetide/core/models.py +++ b/codetide/core/models.py @@ -1225,4 +1225,12 @@ def unique_ids(self)->List[str]: return list(self._cached_elements.keys()) + @property + def non_import_unique_ids(self)->List[str]: + + return [ + non_import_id for non_import_id, value in self.cached_elements.items() + if not isinstance(value, ImportStatement) + ] + # TODO add mcp support for agent -> leverage CodeFile pydantic model to apply changes via unique_ids and generate file from there \ No newline at end of file