Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -186,4 +186,6 @@ examples/hf_demo_space/chainlit.md

examples/hf_demo_space/public/
database.db-journal
database.db-shm
.chainlit/
pgdata/
2 changes: 1 addition & 1 deletion codetide/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def relative_filepaths(self)->List[str]:

@property
def cached_ids(self)->List[str]:
return self.codebase.unique_ids+self.relative_filepaths
return self.codebase.non_import_unique_ids+self.relative_filepaths

@property
def repo(self)->Optional[pygit2.Repository]:
Expand Down
184 changes: 99 additions & 85 deletions codetide/agents/tide/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class AgentTide(BaseModel):
_last_code_identifers :Optional[Set[str]]=set()
_last_code_context :Optional[str] = None
_has_patch :bool=False
_direct_mode :bool=False

model_config = ConfigDict(arbitrary_types_allowed=True)

Expand Down Expand Up @@ -138,92 +139,103 @@ async def agent_loop(self, codeIdentifiers :Optional[List[str]]=None):
...
else:
autocomplete = AutoComplete(self.tide.cached_ids)
matches = autocomplete.extract_words_from_text("\n\n".join(self.history))

# --- Begin Unified Identifier Retrieval ---
identifiers_accum = set(matches["all_found_words"]) if codeIdentifiers is None else set(codeIdentifiers + matches["all_found_words"])
modify_accum = set()
reasoning_accum = []
repo_tree = None
smart_search_attempts = 0
max_smart_search_attempts = 3
done = False
previous_reason = None

while not done:
expand_paths = ["./"]
# 1. SmartCodeSearch to filter repo tree
if repo_tree is None or smart_search_attempts > 0:
repo_history = self.history
if previous_reason:
repo_history += [previous_reason]
if self._direct_mode:
self.contextIdentifiers = None
exact_matches = autocomplete.extract_words_from_text(self.history[-1], max_matches_per_word=1)["all_found_words"]
self.modifyIdentifiers = self.tide._as_file_paths(exact_matches)
codeIdentifiers = self.modifyIdentifiers
self._direct_mode = False

else:
matches = autocomplete.extract_words_from_text("\n\n".join(self.history), max_matches_per_word=1)

# --- Begin Unified Identifier Retrieval ---
identifiers_accum = set(matches["all_found_words"]) if codeIdentifiers is None else set(codeIdentifiers + matches["all_found_words"])
modify_accum = set()
reasoning_accum = []
repo_tree = None
smart_search_attempts = 0
max_smart_search_attempts = 3
done = False
previous_reason = None

while not done:
expand_paths = ["./"]
# 1. SmartCodeSearch to filter repo tree
if repo_tree is None or smart_search_attempts > 0:
repo_history = self.history
if previous_reason:
repo_history += [previous_reason]

repo_tree = await self.get_repo_tree_from_user_prompt(self.history, include_modules=bool(smart_search_attempts), expand_paths=expand_paths)

# 2. Single LLM call with unified prompt
# Pass accumulated identifiers for context if this isn't the first iteration
accumulated_context = "\n".join(
sorted((identifiers_accum or set()) | (modify_accum or set()))
) if (identifiers_accum or modify_accum) else ""

unified_response = await self.llm.acomplete(
self.history,
system_prompt=[GET_CODE_IDENTIFIERS_UNIFIED_PROMPT.format(
DATE=TODAY,
SUPPORTED_LANGUAGES=SUPPORTED_LANGUAGES,
IDENTIFIERS=accumulated_context
)],
prefix_prompt=repo_tree,
stream=False
)
print(f"{unified_response=}")

# Parse the unified response
contextIdentifiers = parse_blocks(unified_response, block_word="Context Identifiers", multiple=False)
modifyIdentifiers = parse_blocks(unified_response, block_word="Modify Identifiers", multiple=False)
expandPaths = parse_blocks(unified_response, block_word="Expand Paths", multiple=False)

# Extract reasoning (everything before the first "*** Begin")
reasoning_parts = unified_response.split("*** Begin")
if reasoning_parts:
reasoning_accum.append(reasoning_parts[0].strip())
previous_reason = reasoning_accum[-1]

repo_tree = await self.get_repo_tree_from_user_prompt(self.history, include_modules=bool(smart_search_attempts), expand_paths=expand_paths)

# 2. Single LLM call with unified prompt
# Pass accumulated identifiers for context if this isn't the first iteration
accumulated_context = "\n".join(
sorted((identifiers_accum or set()) | (modify_accum or set()))
) if (identifiers_accum or modify_accum) else ""

unified_response = await self.llm.acomplete(
self.history,
system_prompt=[GET_CODE_IDENTIFIERS_UNIFIED_PROMPT.format(
DATE=TODAY,
SUPPORTED_LANGUAGES=SUPPORTED_LANGUAGES,
IDENTIFIERS=accumulated_context
)],
prefix_prompt=repo_tree,
stream=False
)

# Parse the unified response
contextIdentifiers = parse_blocks(unified_response, block_word="Context Identifiers", multiple=False)
modifyIdentifiers = parse_blocks(unified_response, block_word="Modify Identifiers", multiple=False)
expandPaths = parse_blocks(unified_response, block_word="Expand Paths", multiple=False)

# Extract reasoning (everything before the first "*** Begin")
reasoning_parts = unified_response.split("*** Begin")
if reasoning_parts:
reasoning_accum.append(reasoning_parts[0].strip())
previous_reason = reasoning_accum[-1]

# Accumulate identifiers
if contextIdentifiers:
if smart_search_attempts == 0:
### clean wrongly mismtatched idenitifers
identifiers_accum = set()
for ident in contextIdentifiers.splitlines():
if ident := self.get_valid_identifier(autocomplete, ident.strip()):
identifiers_accum.add(ident)

if modifyIdentifiers:
for ident in modifyIdentifiers.splitlines():
if ident := self.get_valid_identifier(autocomplete, ident.strip()):
modify_accum.add(ident.strip())

if expandPaths:
expand_paths = [
path for ident in expandPaths if (path := self.get_valid_identifier(autocomplete, ident.strip()))
]

# Check if we have enough identifiers (unified prompt includes this decision)
if "ENOUGH_IDENTIFIERS: TRUE" in unified_response.upper():
done = True
else:
smart_search_attempts += 1
if smart_search_attempts >= max_smart_search_attempts:
# Accumulate identifiers
if contextIdentifiers:
if smart_search_attempts == 0:
### clean wrongly mismtatched idenitifers
identifiers_accum = set()
for ident in contextIdentifiers.splitlines():
if ident := self.get_valid_identifier(autocomplete, ident.strip()):
identifiers_accum.add(ident)

if modifyIdentifiers:
for ident in modifyIdentifiers.splitlines():
if ident := self.get_valid_identifier(autocomplete, ident.strip()):
modify_accum.add(ident.strip())

if expandPaths:
expand_paths = [
path for ident in expandPaths if (path := self.get_valid_identifier(autocomplete, ident.strip()))
]

# Check if we have enough identifiers (unified prompt includes this decision)
if "ENOUGH_IDENTIFIERS: TRUE" in unified_response.upper():
done = True

# Finalize identifiers
self.reasoning = "\n\n".join(reasoning_accum)
self.contextIdentifiers = list(identifiers_accum) if identifiers_accum else None
self.modifyIdentifiers = list(modify_accum) if modify_accum else None

codeIdentifiers = self.contextIdentifiers or []
if self.modifyIdentifiers:
self.modifyIdentifiers = self.tide._as_file_paths(self.modifyIdentifiers)
codeIdentifiers.extend(self.modifyIdentifiers)
else:
smart_search_attempts += 1
if smart_search_attempts >= max_smart_search_attempts:
done = True

# Finalize identifiers
self.reasoning = "\n\n".join(reasoning_accum)
self.contextIdentifiers = list(identifiers_accum) if identifiers_accum else None
self.modifyIdentifiers = list(modify_accum) if modify_accum else None

codeIdentifiers = self.contextIdentifiers or []
if self.modifyIdentifiers:
self.modifyIdentifiers = self.tide._as_file_paths(self.modifyIdentifiers)
codeIdentifiers.extend(self.modifyIdentifiers)
# TODO preserve passed identifiers by the user
codeIdentifiers += matches["all_found_words"]

# --- End Unified Identifier Retrieval ---
if codeIdentifiers:
Expand All @@ -232,7 +244,7 @@ async def agent_loop(self, codeIdentifiers :Optional[List[str]]=None):

if not codeContext:
codeContext = REPO_TREE_CONTEXT_PROMPT.format(REPO_TREE=self.tide.codebase.get_tree_view())
readmeFile = self.tide.get("README.md", as_string_list=True)
readmeFile = self.tide.get(["README.md"] + matches["all_found_words"] , as_string_list=True)
if readmeFile:
codeContext = "\n".join([codeContext, README_CONTEXT_PROMPT.format(README=readmeFile)])

Expand Down Expand Up @@ -431,5 +443,7 @@ async def _handle_commands(self, command :str) -> str:
context = ""
if command == "commit":
context = await self.prepare_commit()
elif command == "direct_mode":
self._direct_mode = True

return context
8 changes: 6 additions & 2 deletions codetide/agents/tide/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,8 +457,12 @@
- Code identifiers should use dot notation (e.g., `module.submodule.Class.method`) without file extensions

2. **Identifier Categories:**
- **Context Identifiers:** Elements needed to understand or provide context for the request, but not directly modified
- **Modify Identifiers:** Elements that will likely require direct modification to fulfill the request
- **Context Identifiers:** Only include identifiers that correspond to functions, classes, methods, variables, or attributes defined in the codebase. Do **not** include package names, import statements, or dependencies based solely on import/package presence—even if they are present in the accumulated context.
- **Modify Identifiers:** Only include identifiers that correspond to functions, classes, methods, variables, or attributes that will likely require direct modification. Do **not** include package names, import statements, or dependencies based solely on import/package presence—even if they are present in the accumulated context.

3. **ABSOLUTE PROHIBITION ON DEPENDENCY INCLUSION:**
- Never include identifiers in the Context Identifiers or Modify Identifiers sections that represent only package imports, external dependencies, or modules that are not actual code elements (functions, classes, methods, variables, or attributes) defined in the codebase.
- Even if a package or import name is present in the accumulated context, do not include it unless it refers to a concrete function, class, method, variable, or attribute in the codebase.

**UNIFIED ANALYSIS PROTOCOL**

Expand Down
9 changes: 6 additions & 3 deletions codetide/agents/tide/ui/agent_tide_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ def __init__(self, project_path: Path = Path("./"), history :Optional[list]=None
"review": CMD_CODE_REVIEW_PROMPT,
"test": CMD_WRITE_TESTS_PROMPT,
"commit": CMD_COMMIT_PROMPT,
"brainstorm": CMD_BRAINSTORM_PROMPT
"brainstorm": CMD_BRAINSTORM_PROMPT,
"direct_mode": ""
}
self.session_id = session_id if session_id else ulid()

Expand All @@ -52,7 +53,8 @@ def __init__(self, project_path: Path = Path("./"), history :Optional[list]=None
{"id": "test", "icon": "flask-conical", "description": "Test file(s) or object(s)"},
{"id": "commit", "icon": "git-commit", "description": "Commit changed files"},
{"id": "plan", "icon": "notepad-text-dashed", "description": "Create a step-by-step task plan"},
{"id": "brainstorm", "icon": "brain-circuit", "description": "Brainstorm and discuss solutions (no code generation)"}
{"id": "brainstorm", "icon": "brain-circuit", "description": "Brainstorm and discuss solutions (no code generation)"},
{"id": "direct_mode", "icon": "search-code", "description": "Skip repository analysis and jump straight into code generation with the specified context (identifiers or paths)"}
]

async def load(self):
Expand Down Expand Up @@ -133,4 +135,5 @@ def settings(self):

async def get_command_prompt(self, command :str)->Optional[str]:
context = await self.agent_tide._handle_commands(command)
return f"{self.commands_prompts.get(command)} {context}"
return f"{self.commands_prompts.get(command)} {context}".strip()

50 changes: 33 additions & 17 deletions codetide/autocomplete.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,15 +170,24 @@ def validate_paths(self, file_paths):
raise ValueError(f"Invalid file path: '{path}'")
return valid_paths

def extract_words_from_text(self, text: str, similarity_threshold: float = 0.6, case_sensitive: bool = False) -> dict:
def extract_words_from_text(
self,
text: str,
similarity_threshold: float = 0.6,
case_sensitive: bool = False,
max_matches_per_word: int = None
) -> dict:
"""
Extract words from the word list that are present in the given text, including similar words (potential typos).

Optionally limit the number of matches returned per word found in the text.

Args:
text (str): The input text to analyze
similarity_threshold (float): Minimum similarity score for fuzzy matching (0.0 to 1.0)
case_sensitive (bool): Whether matching should be case sensitive

max_matches_per_word (int, optional): Maximum number of matches to return per word in the text.
If None, all matches are returned. If 1, only the top match per word is returned.

Returns:
dict: Dictionary containing:
- 'exact_matches': List of words found exactly in the text
Expand All @@ -191,64 +200,71 @@ def extract_words_from_text(self, text: str, similarity_threshold: float = 0.6,
'fuzzy_matches': [],
'all_found_words': []
}

# Split text into words (remove punctuation and split by whitespace)
text_words = re.findall(r'\b\w+\b', text)

exact_matches = []
fuzzy_matches = []
all_found_words = set()

# Convert to appropriate case for comparison
if case_sensitive:
text_words_search = text_words
word_list_search = self.words
else:
text_words_search = [word.lower() for word in text_words]
word_list_search = [word.lower() for word in self.words]

# Find exact matches
for i, text_word in enumerate(text_words_search):
per_word_matches = 0
for j, list_word in enumerate(word_list_search):
if text_word == list_word:
original_word = self.words[j]
if original_word not in all_found_words:
exact_matches.append(original_word)
all_found_words.add(original_word)

per_word_matches += 1
if max_matches_per_word is not None and per_word_matches >= max_matches_per_word:
break

# Find fuzzy matches for words that didn't match exactly
matched_text_words = set()
for match in exact_matches:
search_match = match if case_sensitive else match.lower()
for i, text_word in enumerate(text_words_search):
if text_word == search_match:
matched_text_words.add(i)
matched_text_words.add(i)

# Check remaining text words for fuzzy matches
for i, text_word in enumerate(text_words_search):
if i in matched_text_words:
continue
# Find the most similar word from our word list

# Find the most similar word(s) from our word list
best_matches = []
for j, list_word in enumerate(word_list_search):
similarity = difflib.SequenceMatcher(None, text_word, list_word).ratio()
if similarity >= similarity_threshold:
best_matches.append((self.words[j], text_words[i], similarity))
# Sort by similarity and add to results

# Sort by similarity and add up to max_matches_per_word to results
if best_matches:
best_matches.sort(key=lambda x: x[2], reverse=True)
for match in best_matches:
matches_to_add = best_matches
if max_matches_per_word is not None:
matches_to_add = best_matches[:max_matches_per_word]
for match in matches_to_add:
word_from_list, word_in_text, score = match
if word_from_list not in all_found_words:
fuzzy_matches.append((word_from_list, word_in_text, score))
all_found_words.add(word_from_list)

# Sort results
exact_matches.sort()
fuzzy_matches.sort(key=lambda x: x[2], reverse=True) # Sort by similarity score

return {
'exact_matches': exact_matches,
'fuzzy_matches': fuzzy_matches,
Expand Down
Loading