feat: update file system work to support huge workspaces

kosz78 · kosz78 · commit f14a992451c6 · 2025-05-26T19:13:56.000+07:00
diff --git a/src/agent/mod.rs b/src/agent/mod.rs
@@ -25,6 +25,7 @@ use crate::tools::write_to_file::WriteToFileTool;
 use crate::Config;
 use anyhow::Result;
 use futures::StreamExt;
+use itertools::Itertools;
 use mcp_core::types::ProtocolVersion;
 use rig::agent::AgentBuilder;
 use rig::completion::CompletionError;
@@ -93,8 +94,8 @@ impl Display for AgentError {
     }
 }
 
-fn count_tokens(system_prompt: &str) -> u32 {
-    system_prompt.len() as u32 / 4
+fn count_tokens(text: &str) -> u32 {
+    text.len() as u32 / 4
 }
 
 impl Agent {
@@ -246,7 +247,8 @@ impl Agent {
     async fn configure_agent<M>(
         mut agent_builder: AgentBuilder<M>,
         context: BuildAgentContext<'_>,
-    ) -> Result<AgentBuilder<M>>
+        tools_tokens: &mut u32,
+    ) -> Result<rig::agent::Agent<M>>
     where
         M: CompletionModel,
     {
@@ -256,10 +258,24 @@ impl Agent {
         let mcp_config = context.config.mcp.as_ref();
         agent_builder = Self::add_static_tools(agent_builder, context);
         agent_builder = Self::add_mcp_tools(agent_builder, mcp_config).await?;
-        Ok(agent_builder)
+        let agent = agent_builder.build();
+        *tools_tokens = count_tokens(
+            &agent
+                .tools
+                .documents()
+                .await
+                .unwrap()
+                .iter()
+                .map(|d| &d.text)
+                .join("\n"),
+        );
+        Ok(agent)
     }
 
-    async fn build_agent(context: BuildAgentContext<'_>) -> Result<Box<dyn HulyAgent>> {
+    async fn build_agent(
+        context: BuildAgentContext<'_>,
+        tools_tokens: &mut u32,
+    ) -> Result<Box<dyn HulyAgent>> {
         match context.config.provider {
             ProviderKind::OpenAI => {
                 let agent_builder = rig::providers::openai::Client::new(
@@ -271,7 +287,7 @@ impl Agent {
                 )
                 .agent(&context.config.model);
                 Ok(Box::new(
-                    Self::configure_agent(agent_builder, context).await?.build(),
+                    Self::configure_agent(agent_builder, context, tools_tokens).await?,
                 ))
             }
             ProviderKind::Anthropic => {
@@ -286,7 +302,7 @@ impl Agent {
                 .agent(&context.config.model)
                 .max_tokens(20000);
                 Ok(Box::new(
-                    Self::configure_agent(agent_builder, context).await?.build(),
+                    Self::configure_agent(agent_builder, context, tools_tokens).await?,
                 ))
             }
             ProviderKind::OpenRouter => {
@@ -299,7 +315,7 @@ impl Agent {
                 )
                 .agent(&context.config.model);
                 Ok(Box::new(
-                    Self::configure_agent(agent_builder, context).await?.build(),
+                    Self::configure_agent(agent_builder, context, tools_tokens).await?,
                 ))
             }
             ProviderKind::LMStudio => {
@@ -313,7 +329,7 @@ impl Agent {
                 )
                 .agent(&context.config.model);
                 Ok(Box::new(
-                    Self::configure_agent(agent_builder, context).await?.build(),
+                    Self::configure_agent(agent_builder, context, tools_tokens).await?,
                 ))
             }
         }
@@ -332,6 +348,16 @@ impl Agent {
         self.sender
             .send(AgentOutputEvent::AddMessage(message.clone()))
             .unwrap();
+        if let Message::User { .. } = &message {
+            // clear previous messages from env details
+            self.messages.iter_mut().for_each(|m| {
+                if let Message::User { content, .. } = m {
+                    if content.len() > 1 {
+                        *content = OneOrMany::one(content.first());
+                    }
+                }
+            });
+        }
         self.messages.push(message);
     }
 
@@ -477,7 +503,7 @@ impl Agent {
                             } else {
                                 add_env_message(
                                     &mut result_message,
-                                    None,
+                                    self.memory_index.as_ref(),
                                     &self.config.workspace,
                                     self.process_registry.clone(),
                                 )
@@ -565,17 +591,23 @@ impl Agent {
         let system_prompt =
             prepare_system_prompt(&self.config.workspace, &self.config.user_instructions).await;
         let system_prompt_token_count = count_tokens(&system_prompt);
+        let mut tools_tokens = 0;
         self.agent = Some(
-            Self::build_agent(BuildAgentContext {
-                config: &self.config,
-                system_prompt,
-                memory: self.memory.clone(),
-                process_registry: self.process_registry.clone(),
-                sender: self.sender.clone(),
-            })
+            Self::build_agent(
+                BuildAgentContext {
+                    config: &self.config,
+                    system_prompt,
+                    memory: self.memory.clone(),
+                    process_registry: self.process_registry.clone(),
+                    sender: self.sender.clone(),
+                },
+                &mut tools_tokens,
+            )
             .await
             .unwrap(),
         );
+        // This is workaround to calculate tokens from system prompt and tools for providers like LMStudio
+        let system_prompt_token_count = system_prompt_token_count + tools_tokens / 2;
         // restore state from messages
         self.set_state(if self.messages.is_empty() {
             AgentState::WaitingUserPrompt
diff --git a/src/agent/utils.rs b/src/agent/utils.rs
@@ -56,6 +56,7 @@ pub async fn add_env_message<'a>(
 
     for entry in ignore::WalkBuilder::new(&workspace)
         .filter_entry(|e| e.file_name() != "node_modules")
+        .max_depth(Some(2))
         .build()
         .filter_map(|e| e.ok())
         .take(MAX_FILES)
@@ -81,9 +82,16 @@ pub async fn add_env_message<'a>(
         let text = content.first();
         let mut memory_entries = String::new();
         if let Some(memory_index) = memory_index {
-            if let UserContent::Text(text) = text {
-                let res: Vec<(f64, String, Entity)> =
-                    memory_index.top_n(&text.text, 10).await.unwrap();
+            let txt = match text {
+                UserContent::Text(text) => &text.text.to_string(),
+                UserContent::ToolResult(tool_result) => match tool_result.content.first() {
+                    rig::message::ToolResultContent::Text(text) => &text.text.to_string(),
+                    rig::message::ToolResultContent::Image(_) => "",
+                },
+                _ => "",
+            };
+            if !txt.is_empty() {
+                let res: Vec<(f64, String, Entity)> = memory_index.top_n(txt, 10).await.unwrap();
                 let result: Vec<_> = res.into_iter().map(|(_, _, entity)| entity).collect();
                 memory_entries = serde_yaml::to_string(&result).unwrap();
             }
diff --git a/src/main.rs b/src/main.rs
@@ -57,7 +57,7 @@ fn init_logger() {
                         .with_target("ort", tracing::Level::WARN)
                         .with_target("tokenizers", tracing::Level::WARN)
                         .with_target("process_wrap", tracing::Level::INFO)
-                        .with_default(tracing::Level::TRACE),
+                        .with_default(tracing::Level::DEBUG),
                 ),
         )
         .init()
diff --git a/src/templates/env_details.txt b/src/templates/env_details.txt
@@ -10,6 +10,6 @@ ${MEMORY_ENTRIES}
 |------------|-------------------------|---------|
 ${COMMANDS}
 
-# Current Working Directory (${WORKING_DIR}) Files
+# Current Working Directory (${WORKING_DIR}) Files (max depth 2)
 ${FILES}
 </environment_details>
diff --git a/src/tools/list_files.rs b/src/tools/list_files.rs
@@ -14,7 +14,7 @@ use super::{normalize_path, AgentToolError};
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ListFilesToolArgs {
     pub path: String,
-    pub recursive: Option<bool>,
+    pub max_depth: Option<usize>,
 }
 
 pub struct ListFilesTool {
@@ -38,10 +38,10 @@ impl Tool for ListFilesTool {
         ToolDefinition {
             name: self.name(),
             description: formatdoc! {"\
-                Request to list files and directories within the specified directory. If recursive is true, it will list \
-                all files and directories recursively. If recursive is false or not provided, it will only list the top-level contents. \
-                Do not use this tool to confirm the existence of files you may have created, as the user will let you know \
-                if the files were created successfully or not."}.to_string(),
+                Request to list files and directories within the specified directory. If max_depth equals 1 or not provided, \
+                it will only list the top-level contents. If max_depth is greater than 1, it will list the contents of the directory \
+                and its subdirectories up to the specified depth. Do not use this tool to confirm the existence of files you may have created,\
+                as the user will let you know if the files were created successfully or not."}.to_string(),
             parameters: json!({
                 "type": "object",
                 "properties": {
@@ -50,9 +50,9 @@ impl Tool for ListFilesTool {
                         "description": formatdoc!{"The path of the directory to list contents for (relative to the current \
                                                    working directory {})", workspace_to_string(&self.workspace)},
                     },
-                    "recursive": {
-                        "type": "boolean",
-                        "description": "Whether to list files recursively. Use true for recursive listing, false or omit for top-level only."
+                    "max_depth": {
+                        "type": "number",
+                        "description": "Max depth to list files (default: 1)",
                     }
                 },
                 "required": ["path"]
@@ -63,10 +63,10 @@ impl Tool for ListFilesTool {
 
     async fn call(&self, args: Self::Args) -> Result<Self::Output, Self::Error> {
         let path = normalize_path(&self.workspace, &args.path);
-        let recursive = args.recursive.unwrap_or(false);
+        let max_depth = args.max_depth.unwrap_or(1);
         let mut files: Vec<String> = Vec::default();
         for entry in ignore::WalkBuilder::new(path.clone())
-            .max_depth(if recursive { None } else { Some(1) })
+            .max_depth(Some(max_depth))
             .filter_entry(|e| e.file_name() != "node_modules")
             .build()
             .filter_map(|e| e.ok())
diff --git a/src/tools/memory/mod.rs b/src/tools/memory/mod.rs
@@ -244,6 +244,9 @@ impl MemoryManager {
                     self.knowledge_graph
                         .entities
                         .retain(|entity| entity.name != entity_name);
+                    self.knowledge_graph.relations.retain(|relation| {
+                        relation.from != entity_name || relation.to != entity_name
+                    });
                 }
                 self.save();
                 Ok("Entities deleted successfully".to_string())

Original file line number	Diff line number	Diff line change
`@@ -57,7 +57,7 @@ fn init_logger() {`
`57`	`57`	`.with_target("ort", tracing::Level::WARN)`
`58`	`58`	`.with_target("tokenizers", tracing::Level::WARN)`
`59`	`59`	`.with_target("process_wrap", tracing::Level::INFO)`
`60`		`- .with_default(tracing::Level::TRACE),`
	`60`	`+ .with_default(tracing::Level::DEBUG),`
`61`	`61`	`),`
`62`	`62`	`)`
`63`	`63`	`.init()`