From 3ed6cffc64ba98afaa8594785e8caa65666eca4d Mon Sep 17 00:00:00 2001
From: BrunoV21 <bruno.vitorino@tecnico.ulisboa.pt>
Date: Wed, 17 Sep 2025 23:04:43 +0100
Subject: [PATCH 1/6] feat(agent,ui): add direct_mode command to skip repo
 analysis

---
 codetide/agents/tide/agent.py            | 184 ++++++++++++-----------
 codetide/agents/tide/ui/agent_tide_ui.py |   9 +-
 2 files changed, 105 insertions(+), 88 deletions(-)

diff --git a/codetide/agents/tide/agent.py b/codetide/agents/tide/agent.py
index 295fb69..4e0974c 100644
--- a/codetide/agents/tide/agent.py
+++ b/codetide/agents/tide/agent.py
@@ -58,6 +58,7 @@ class AgentTide(BaseModel):
     _last_code_identifers :Optional[Set[str]]=set()
     _last_code_context :Optional[str] = None
     _has_patch :bool=False
+    _direct_mode :bool=False
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
@@ -138,92 +139,103 @@ async def agent_loop(self, codeIdentifiers :Optional[List[str]]=None):
             ...
         else:
             autocomplete = AutoComplete(self.tide.cached_ids)
-            matches = autocomplete.extract_words_from_text("\n\n".join(self.history))
-
-            # --- Begin Unified Identifier Retrieval ---
-            identifiers_accum = set(matches["all_found_words"]) if codeIdentifiers is None else set(codeIdentifiers + matches["all_found_words"])
-            modify_accum = set()
-            reasoning_accum = []
-            repo_tree = None
-            smart_search_attempts = 0
-            max_smart_search_attempts = 3
-            done = False
-            previous_reason = None
-
-            while not done:
-                expand_paths = ["./"]
-                # 1. SmartCodeSearch to filter repo tree
-                if repo_tree is None or smart_search_attempts > 0:
-                    repo_history = self.history
-                    if previous_reason:
-                        repo_history += [previous_reason]
+            if self._direct_mode:
+                self.contextIdentifiers = None
+                exact_matches = autocomplete.extract_words_from_text(self.history[-1])["all_found_words"]
+                self.modifyIdentifiers = self.tide._as_file_paths(exact_matches)
+                codeIdentifiers = self.modifyIdentifiers
+                self._direct_mode = False
+
+            else:
+                matches = autocomplete.extract_words_from_text("\n\n".join(self.history))
+
+                # --- Begin Unified Identifier Retrieval ---
+                identifiers_accum = set(matches["all_found_words"]) if codeIdentifiers is None else set(codeIdentifiers + matches["all_found_words"])
+                modify_accum = set()
+                reasoning_accum = []
+                repo_tree = None
+                smart_search_attempts = 0
+                max_smart_search_attempts = 3
+                done = False
+                previous_reason = None
+
+                while not done:
+                    expand_paths = ["./"]
+                    # 1. SmartCodeSearch to filter repo tree
+                    if repo_tree is None or smart_search_attempts > 0:
+                        repo_history = self.history
+                        if previous_reason:
+                            repo_history += [previous_reason]
+                        
+                        repo_tree = await self.get_repo_tree_from_user_prompt(self.history, include_modules=bool(smart_search_attempts), expand_paths=expand_paths)
+                    
+                    # 2. Single LLM call with unified prompt
+                    # Pass accumulated identifiers for context if this isn't the first iteration
+                    accumulated_context = "\n".join(
+                        sorted((identifiers_accum or set()) | (modify_accum or set()))
+                    ) if (identifiers_accum or modify_accum) else ""
+                    
+                    unified_response = await self.llm.acomplete(
+                        self.history,
+                        system_prompt=[GET_CODE_IDENTIFIERS_UNIFIED_PROMPT.format(
+                            DATE=TODAY, 
+                            SUPPORTED_LANGUAGES=SUPPORTED_LANGUAGES,
+                            IDENTIFIERS=accumulated_context
+                        )],
+                        prefix_prompt=repo_tree,
+                        stream=False
+                    )
+                    print(f"{unified_response=}")
+
+                    # Parse the unified response
+                    contextIdentifiers = parse_blocks(unified_response, block_word="Context Identifiers", multiple=False)
+                    modifyIdentifiers = parse_blocks(unified_response, block_word="Modify Identifiers", multiple=False)
+                    expandPaths = parse_blocks(unified_response, block_word="Expand Paths", multiple=False)                
+                    
+                    # Extract reasoning (everything before the first "*** Begin")
+                    reasoning_parts = unified_response.split("*** Begin")
+                    if reasoning_parts:
+                        reasoning_accum.append(reasoning_parts[0].strip())
+                        previous_reason = reasoning_accum[-1]
                     
-                    repo_tree = await self.get_repo_tree_from_user_prompt(self.history, include_modules=bool(smart_search_attempts), expand_paths=expand_paths)
-                
-                # 2. Single LLM call with unified prompt
-                # Pass accumulated identifiers for context if this isn't the first iteration
-                accumulated_context = "\n".join(
-                    sorted((identifiers_accum or set()) | (modify_accum or set()))
-                ) if (identifiers_accum or modify_accum) else ""
-                
-                unified_response = await self.llm.acomplete(
-                    self.history,
-                    system_prompt=[GET_CODE_IDENTIFIERS_UNIFIED_PROMPT.format(
-                        DATE=TODAY, 
-                        SUPPORTED_LANGUAGES=SUPPORTED_LANGUAGES,
-                        IDENTIFIERS=accumulated_context
-                    )],
-                    prefix_prompt=repo_tree,
-                    stream=False
-                )
-
-                # Parse the unified response
-                contextIdentifiers = parse_blocks(unified_response, block_word="Context Identifiers", multiple=False)
-                modifyIdentifiers = parse_blocks(unified_response, block_word="Modify Identifiers", multiple=False)
-                expandPaths = parse_blocks(unified_response, block_word="Expand Paths", multiple=False)                
-                
-                # Extract reasoning (everything before the first "*** Begin")
-                reasoning_parts = unified_response.split("*** Begin")
-                if reasoning_parts:
-                    reasoning_accum.append(reasoning_parts[0].strip())
-                    previous_reason = reasoning_accum[-1]
-                
-                # Accumulate identifiers
-                if contextIdentifiers:
-                    if smart_search_attempts == 0:
-                        ### clean wrongly mismtatched idenitifers
-                        identifiers_accum = set()
-                    for ident in contextIdentifiers.splitlines():
-                        if ident := self.get_valid_identifier(autocomplete, ident.strip()): 
-                            identifiers_accum.add(ident)
-                
-                if modifyIdentifiers:
-                    for ident in modifyIdentifiers.splitlines():
-                        if ident := self.get_valid_identifier(autocomplete, ident.strip()):
-                            modify_accum.add(ident.strip())
-                
-                if expandPaths:
-                    expand_paths = [
-                        path for ident in expandPaths if (path := self.get_valid_identifier(autocomplete, ident.strip()))
-                    ]
-                
-                # Check if we have enough identifiers (unified prompt includes this decision)
-                if "ENOUGH_IDENTIFIERS: TRUE" in unified_response.upper():
-                    done = True
-                else:
-                    smart_search_attempts += 1
-                    if smart_search_attempts >= max_smart_search_attempts:
+                    # Accumulate identifiers
+                    if contextIdentifiers:
+                        if smart_search_attempts == 0:
+                            ### clean wrongly mismtatched idenitifers
+                            identifiers_accum = set()
+                        for ident in contextIdentifiers.splitlines():
+                            if ident := self.get_valid_identifier(autocomplete, ident.strip()): 
+                                identifiers_accum.add(ident)
+                    
+                    if modifyIdentifiers:
+                        for ident in modifyIdentifiers.splitlines():
+                            if ident := self.get_valid_identifier(autocomplete, ident.strip()):
+                                modify_accum.add(ident.strip())
+                    
+                    if expandPaths:
+                        expand_paths = [
+                            path for ident in expandPaths if (path := self.get_valid_identifier(autocomplete, ident.strip()))
+                        ]
+                    
+                    # Check if we have enough identifiers (unified prompt includes this decision)
+                    if "ENOUGH_IDENTIFIERS: TRUE" in unified_response.upper():
                         done = True
-
-            # Finalize identifiers
-            self.reasoning = "\n\n".join(reasoning_accum)
-            self.contextIdentifiers = list(identifiers_accum) if identifiers_accum else None
-            self.modifyIdentifiers = list(modify_accum) if modify_accum else None
-
-            codeIdentifiers = self.contextIdentifiers or []
-            if self.modifyIdentifiers:
-                self.modifyIdentifiers = self.tide._as_file_paths(self.modifyIdentifiers)
-                codeIdentifiers.extend(self.modifyIdentifiers)
+                    else:
+                        smart_search_attempts += 1
+                        if smart_search_attempts >= max_smart_search_attempts:
+                            done = True
+
+                # Finalize identifiers
+                self.reasoning = "\n\n".join(reasoning_accum)
+                self.contextIdentifiers = list(identifiers_accum) if identifiers_accum else None
+                self.modifyIdentifiers = list(modify_accum) if modify_accum else None
+
+                codeIdentifiers = self.contextIdentifiers or []
+                if self.modifyIdentifiers:
+                    self.modifyIdentifiers = self.tide._as_file_paths(self.modifyIdentifiers)
+                    codeIdentifiers.extend(self.modifyIdentifiers)
+                # TODO preserve passed identifiers by the user
+                codeIdentifiers += matches["all_found_words"] 
 
             # --- End Unified Identifier Retrieval ---
             if codeIdentifiers:
@@ -232,7 +244,7 @@ async def agent_loop(self, codeIdentifiers :Optional[List[str]]=None):
 
             if not codeContext:
                 codeContext = REPO_TREE_CONTEXT_PROMPT.format(REPO_TREE=self.tide.codebase.get_tree_view())
-                readmeFile = self.tide.get("README.md", as_string_list=True)
+                readmeFile = self.tide.get(["README.md"] + matches["all_found_words"] , as_string_list=True)
                 if readmeFile:
                     codeContext = "\n".join([codeContext, README_CONTEXT_PROMPT.format(README=readmeFile)])
 
@@ -431,5 +443,7 @@ async def _handle_commands(self, command :str) -> str:
         context = ""
         if command == "commit":
             context = await self.prepare_commit()
+        elif command == "direct_mode":
+            self._direct_mode = True
 
         return context
diff --git a/codetide/agents/tide/ui/agent_tide_ui.py b/codetide/agents/tide/ui/agent_tide_ui.py
index 961928c..2b9945d 100644
--- a/codetide/agents/tide/ui/agent_tide_ui.py
+++ b/codetide/agents/tide/ui/agent_tide_ui.py
@@ -43,7 +43,8 @@ def __init__(self, project_path: Path = Path("./"), history :Optional[list]=None
             "review": CMD_CODE_REVIEW_PROMPT,
             "test": CMD_WRITE_TESTS_PROMPT,
             "commit": CMD_COMMIT_PROMPT,
-            "brainstorm": CMD_BRAINSTORM_PROMPT
+            "brainstorm": CMD_BRAINSTORM_PROMPT,
+            "direct_mode": ""
         }
         self.session_id = session_id if session_id else ulid()
     
@@ -52,7 +53,8 @@ def __init__(self, project_path: Path = Path("./"), history :Optional[list]=None
         {"id": "test", "icon": "flask-conical", "description": "Test file(s) or object(s)"},
         {"id": "commit", "icon": "git-commit", "description": "Commit changed files"},
         {"id": "plan", "icon": "notepad-text-dashed", "description": "Create a step-by-step task plan"},
-        {"id": "brainstorm", "icon": "brain-circuit", "description": "Brainstorm and discuss solutions (no code generation)"}
+        {"id": "brainstorm", "icon": "brain-circuit", "description": "Brainstorm and discuss solutions (no code generation)"},
+        {"id": "direct_mode", "icon": "search-code", "description": "Skip repository analysis and jump straight into code generation with the specified context (identifiers or paths)"}
     ]
 
     async def load(self):
@@ -133,4 +135,5 @@ def settings(self):
     
     async def get_command_prompt(self, command :str)->Optional[str]:
         context = await self.agent_tide._handle_commands(command)
-        return f"{self.commands_prompts.get(command)} {context}" 
+        return f"{self.commands_prompts.get(command)} {context}".strip()
+

From 4f8b781be91d3e2e639ef04589ad5764e6876279 Mon Sep 17 00:00:00 2001
From: BrunoV21 <bruno.vitorino@tecnico.ulisboa.pt>
Date: Wed, 17 Sep 2025 23:08:48 +0100
Subject: [PATCH 2/6] prompt: restrict identifiers to code elements not package
 deps

---
 codetide/agents/tide/prompts.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/codetide/agents/tide/prompts.py b/codetide/agents/tide/prompts.py
index a6c12d1..99b279f 100644
--- a/codetide/agents/tide/prompts.py
+++ b/codetide/agents/tide/prompts.py
@@ -457,8 +457,12 @@
    - Code identifiers should use dot notation (e.g., `module.submodule.Class.method`) without file extensions
 
 2. **Identifier Categories:**
-   - **Context Identifiers:** Elements needed to understand or provide context for the request, but not directly modified
-   - **Modify Identifiers:** Elements that will likely require direct modification to fulfill the request
+   - **Context Identifiers:** Only include identifiers that correspond to functions, classes, methods, variables, or attributes defined in the codebase. Do **not** include package names, import statements, or dependencies based solely on import/package presence—even if they are present in the accumulated context.
+   - **Modify Identifiers:** Only include identifiers that correspond to functions, classes, methods, variables, or attributes that will likely require direct modification. Do **not** include package names, import statements, or dependencies based solely on import/package presence—even if they are present in the accumulated context.
+
+3. **ABSOLUTE PROHIBITION ON DEPENDENCY INCLUSION:**
+   - Never include identifiers in the Context Identifiers or Modify Identifiers sections that represent only package imports, external dependencies, or modules that are not actual code elements (functions, classes, methods, variables, or attributes) defined in the codebase.
+   - Even if a package or import name is present in the accumulated context, do not include it unless it refers to a concrete function, class, method, variable, or attribute in the codebase.
 
 **UNIFIED ANALYSIS PROTOCOL**
 

From 5b83c253a8e52ad8282dcf916004e8f4121d9343 Mon Sep 17 00:00:00 2001
From: BrunoV21 <bruno.vitorino@tecnico.ulisboa.pt>
Date: Wed, 17 Sep 2025 23:34:00 +0100
Subject: [PATCH 3/6] feat(autocomplete): add max_matches_per_word to
 extract_words_from_text

---
 codetide/autocomplete.py | 50 ++++++++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 17 deletions(-)

diff --git a/codetide/autocomplete.py b/codetide/autocomplete.py
index 19f42cc..f1f95f4 100644
--- a/codetide/autocomplete.py
+++ b/codetide/autocomplete.py
@@ -170,15 +170,24 @@ def validate_paths(self, file_paths):
                     raise ValueError(f"Invalid file path: '{path}'")
         return valid_paths
     
-    def extract_words_from_text(self, text: str, similarity_threshold: float = 0.6, case_sensitive: bool = False) -> dict:
+    def extract_words_from_text(
+       self,
+        text: str,
+        similarity_threshold: float = 0.6,
+        case_sensitive: bool = False,
+        max_matches_per_word: int = None
+    ) -> dict:
         """
         Extract words from the word list that are present in the given text, including similar words (potential typos).
-        
+        Optionally limit the number of matches returned per word found in the text.
+
         Args:
             text (str): The input text to analyze
             similarity_threshold (float): Minimum similarity score for fuzzy matching (0.0 to 1.0)
             case_sensitive (bool): Whether matching should be case sensitive
-        
+            max_matches_per_word (int, optional): Maximum number of matches to return per word in the text.
+                If None, all matches are returned. If 1, only the top match per word is returned.
+
         Returns:
             dict: Dictionary containing:
                 - 'exact_matches': List of words found exactly in the text
@@ -191,14 +200,14 @@ def extract_words_from_text(self, text: str, similarity_threshold: float = 0.6,
                 'fuzzy_matches': [],
                 'all_found_words': []
             }
-        
+
         # Split text into words (remove punctuation and split by whitespace)
         text_words = re.findall(r'\b\w+\b', text)
-        
+
         exact_matches = []
         fuzzy_matches = []
         all_found_words = set()
-        
+
         # Convert to appropriate case for comparison
         if case_sensitive:
             text_words_search = text_words
@@ -206,49 +215,56 @@ def extract_words_from_text(self, text: str, similarity_threshold: float = 0.6,
         else:
             text_words_search = [word.lower() for word in text_words]
             word_list_search = [word.lower() for word in self.words]
-        
+
         # Find exact matches
         for i, text_word in enumerate(text_words_search):
+            per_word_matches = 0
             for j, list_word in enumerate(word_list_search):
                 if text_word == list_word:
                     original_word = self.words[j]
                     if original_word not in all_found_words:
                         exact_matches.append(original_word)
                         all_found_words.add(original_word)
-        
+                        per_word_matches += 1
+                        if max_matches_per_word is not None and per_word_matches >= max_matches_per_word:
+                            break
+
         # Find fuzzy matches for words that didn't match exactly
         matched_text_words = set()
         for match in exact_matches:
             search_match = match if case_sensitive else match.lower()
             for i, text_word in enumerate(text_words_search):
                 if text_word == search_match:
-                    matched_text_words.add(i)
-        
+                   matched_text_words.add(i)
+
         # Check remaining text words for fuzzy matches
         for i, text_word in enumerate(text_words_search):
             if i in matched_text_words:
                 continue
-                
-            # Find the most similar word from our word list
+
+            # Find the most similar word(s) from our word list
             best_matches = []
             for j, list_word in enumerate(word_list_search):
                 similarity = difflib.SequenceMatcher(None, text_word, list_word).ratio()
                 if similarity >= similarity_threshold:
                     best_matches.append((self.words[j], text_words[i], similarity))
-            
-            # Sort by similarity and add to results
+
+            # Sort by similarity and add up to max_matches_per_word to results
             if best_matches:
                 best_matches.sort(key=lambda x: x[2], reverse=True)
-                for match in best_matches:
+                matches_to_add = best_matches
+                if max_matches_per_word is not None:
+                    matches_to_add = best_matches[:max_matches_per_word]
+                for match in matches_to_add:
                     word_from_list, word_in_text, score = match
                     if word_from_list not in all_found_words:
                         fuzzy_matches.append((word_from_list, word_in_text, score))
                         all_found_words.add(word_from_list)
-        
+
         # Sort results
         exact_matches.sort()
         fuzzy_matches.sort(key=lambda x: x[2], reverse=True)  # Sort by similarity score
-        
+
         return {
             'exact_matches': exact_matches,
             'fuzzy_matches': fuzzy_matches,

From b407f510492d540da39cbb3cf3ef66fc73cd59b3 Mon Sep 17 00:00:00 2001
From: BrunoV21 <bruno.vitorino@tecnico.ulisboa.pt>
Date: Wed, 17 Sep 2025 23:34:39 +0100
Subject: [PATCH 4/6] feat(core): add non_import_unique_ids to CodeBase and
 update cached_ids

---
 codetide/__init__.py    | 2 +-
 codetide/core/models.py | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/codetide/__init__.py b/codetide/__init__.py
index fa98570..47bd818 100644
--- a/codetide/__init__.py
+++ b/codetide/__init__.py
@@ -100,7 +100,7 @@ def relative_filepaths(self)->List[str]:
     
     @property
     def cached_ids(self)->List[str]:
-        return self.codebase.unique_ids+self.relative_filepaths
+        return self.codebase.non_import_unique_ids+self.relative_filepaths
     
     @property
     def repo(self)->Optional[pygit2.Repository]:
diff --git a/codetide/core/models.py b/codetide/core/models.py
index ce89ed6..87602ef 100644
--- a/codetide/core/models.py
+++ b/codetide/core/models.py
@@ -1225,4 +1225,12 @@ def unique_ids(self)->List[str]:
 
         return list(self._cached_elements.keys())
 
+    @property
+    def non_import_unique_ids(self)->List[str]:
+
+        return [
+            non_import_id for non_import_id, value in self.cached_elements.items()
+            if not isinstance(value, ImportStatement)
+        ]
+
 # TODO add mcp support for agent -> leverage CodeFile pydantic model to apply changes via unique_ids and generate file from there
\ No newline at end of file

From 4a03d42dac8c1a42e281fa1075136a1d8edf1b40 Mon Sep 17 00:00:00 2001
From: BrunoV21 <bruno.vitorino@tecnico.ulisboa.pt>
Date: Wed, 17 Sep 2025 23:36:18 +0100
Subject: [PATCH 5/6] feat(agent): use max_matches_per_word in autocomplete
 extraction

---
 codetide/agents/tide/agent.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/codetide/agents/tide/agent.py b/codetide/agents/tide/agent.py
index 4e0974c..0919ebe 100644
--- a/codetide/agents/tide/agent.py
+++ b/codetide/agents/tide/agent.py
@@ -141,13 +141,13 @@ async def agent_loop(self, codeIdentifiers :Optional[List[str]]=None):
             autocomplete = AutoComplete(self.tide.cached_ids)
             if self._direct_mode:
                 self.contextIdentifiers = None
-                exact_matches = autocomplete.extract_words_from_text(self.history[-1])["all_found_words"]
+                exact_matches = autocomplete.extract_words_from_text(self.history[-1], max_matches_per_word=1)["all_found_words"]
                 self.modifyIdentifiers = self.tide._as_file_paths(exact_matches)
                 codeIdentifiers = self.modifyIdentifiers
                 self._direct_mode = False
 
             else:
-                matches = autocomplete.extract_words_from_text("\n\n".join(self.history))
+                matches = autocomplete.extract_words_from_text("\n\n".join(self.history), max_matches_per_word=1)
 
                 # --- Begin Unified Identifier Retrieval ---
                 identifiers_accum = set(matches["all_found_words"]) if codeIdentifiers is None else set(codeIdentifiers + matches["all_found_words"])

From dc4c0aa01bb7c682b12380e3654d5777cb0f8f2c Mon Sep 17 00:00:00 2001
From: BrunoV21 <bruno.vitorino@tecnico.ulisboa.pt>
Date: Wed, 17 Sep 2025 23:36:39 +0100
Subject: [PATCH 6/6] chore(gitignore): ignore db-shm, .chainlit, and pgdata
 directories

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index 0b1c523..1dde4d9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -186,4 +186,6 @@ examples/hf_demo_space/chainlit.md
 
 examples/hf_demo_space/public/
 database.db-journal
+database.db-shm
 .chainlit/
+pgdata/
\ No newline at end of file