From 2eb81f47bd21fdd0e3e1d02e3863bafe8a256676 Mon Sep 17 00:00:00 2001 From: Hannes Rudolph Date: Sun, 21 Dec 2025 16:42:01 -0700 Subject: [PATCH 01/12] feat(read_file): implement CodexCLI-inspired slice/indentation reading modes Replace line_ranges with new offset/limit/mode API for improved file reading: - Slice mode: simple line-by-line reading with offset pagination - Indentation mode: smart code block extraction based on indentation - Add rich metadata for pagination awareness (hasMoreBefore/After, etc.) - Remove deprecated truncateDefinitions helper - Update tool definition with new parameters and examples - Default maxReadFileLine from 500 to 2000 lines --- packages/types/src/tool-params.ts | 31 +- .../assistant-message/NativeToolCallParser.ts | 91 ++- .../__tests__/NativeToolCallParser.spec.ts | 122 ++-- src/core/prompts/tools/native-tools/index.ts | 26 +- .../prompts/tools/native-tools/read_file.ts | 107 +++- src/core/task/build-tools.ts | 2 + src/core/tools/ReadFileTool.ts | 312 ++++------ src/core/tools/__tests__/readFileTool.spec.ts | 169 ++---- .../__tests__/truncateDefinitions.spec.ts | 160 ----- src/core/tools/helpers/truncateDefinitions.ts | 44 -- src/i18n/locales/en/tools.json | 1 + .../misc/__tests__/read-file-content.spec.ts | 282 +++++++++ src/integrations/misc/read-file-content.ts | 557 ++++++++++++++++++ .../settings/ContextManagementSettings.tsx | 2 +- 14 files changed, 1281 insertions(+), 625 deletions(-) delete mode 100644 src/core/tools/helpers/__tests__/truncateDefinitions.spec.ts delete mode 100644 src/core/tools/helpers/truncateDefinitions.ts create mode 100644 src/integrations/misc/__tests__/read-file-content.spec.ts create mode 100644 src/integrations/misc/read-file-content.ts diff --git a/packages/types/src/tool-params.ts b/packages/types/src/tool-params.ts index f8708b0c2b4..5d18657daef 100644 --- a/packages/types/src/tool-params.ts +++ b/packages/types/src/tool-params.ts @@ -2,14 +2,37 @@ * Tool parameter type definitions for native protocol */ -export interface LineRange { - start: number - end: number +/** + * Configuration for indentation-aware block extraction + */ +export interface IndentationConfig { + /** The line to anchor the block expansion from (defaults to offset) */ + anchorLine?: number + /** Maximum indentation depth to collect; 0 = unlimited */ + maxLevels?: number + /** Whether to include sibling blocks at same indentation level */ + includeSiblings?: boolean + /** Whether to include comment headers above the anchor block */ + includeHeader?: boolean + /** Hard cap on returned lines (defaults to limit) */ + maxLines?: number } +/** + * Read mode for file content extraction + */ +export type ReadMode = "slice" | "indentation" + export interface FileEntry { path: string - lineRanges?: LineRange[] + /** 1-indexed line number to start reading from (default: 1) */ + offset?: number + /** Maximum number of lines to return (default: 2000) */ + limit?: number + /** Reading mode: "slice" for simple reading, "indentation" for smart block extraction */ + mode?: ReadMode + /** Configuration for indentation mode */ + indentation?: IndentationConfig } export interface Coordinate { diff --git a/src/core/assistant-message/NativeToolCallParser.ts b/src/core/assistant-message/NativeToolCallParser.ts index f6eac36a9c1..2c59da69cb9 100644 --- a/src/core/assistant-message/NativeToolCallParser.ts +++ b/src/core/assistant-message/NativeToolCallParser.ts @@ -298,40 +298,75 @@ export class NativeToolCallParser { } /** - * Convert raw file entries from API (with line_ranges) to FileEntry objects - * (with lineRanges). Handles multiple formats for compatibility: + * Convert raw file entries from API to FileEntry objects. + * Supports the new slice/indentation API: * - * New tuple format: { path: string, line_ranges: [[1, 50], [100, 150]] } - * Object format: { path: string, line_ranges: [{ start: 1, end: 50 }] } - * Legacy string format: { path: string, line_ranges: ["1-50"] } - * - * Returns: { path: string, lineRanges: [{ start: 1, end: 50 }] } + * { path: string, offset?: number, limit?: number, mode?: "slice" | "indentation", indentation?: {...} } */ private static convertFileEntries(files: any[]): FileEntry[] { return files.map((file: any) => { const entry: FileEntry = { path: file.path } - if (file.line_ranges && Array.isArray(file.line_ranges)) { - entry.lineRanges = file.line_ranges - .map((range: any) => { - // Handle tuple format: [start, end] - if (Array.isArray(range) && range.length >= 2) { - return { start: Number(range[0]), end: Number(range[1]) } - } - // Handle object format: { start: number, end: number } - if (typeof range === "object" && range !== null && "start" in range && "end" in range) { - return { start: Number(range.start), end: Number(range.end) } - } - // Handle legacy string format: "1-50" - if (typeof range === "string") { - const match = range.match(/^(\d+)-(\d+)$/) - if (match) { - return { start: parseInt(match[1], 10), end: parseInt(match[2], 10) } - } - } - return null - }) - .filter(Boolean) + + // Map offset parameter + if (file.offset !== undefined) { + const offset = Number(file.offset) + if (!isNaN(offset) && offset > 0) { + entry.offset = offset + } + } + + // Map limit parameter + if (file.limit !== undefined) { + const limit = Number(file.limit) + if (!isNaN(limit) && limit > 0) { + entry.limit = limit + } } + + // Map mode parameter + if (file.mode === "slice" || file.mode === "indentation") { + entry.mode = file.mode + } + + // Map indentation configuration + if (file.indentation && typeof file.indentation === "object") { + const indent = file.indentation + const indentConfig: FileEntry["indentation"] = {} + + if (indent.anchor_line !== undefined) { + const anchorLine = Number(indent.anchor_line) + if (!isNaN(anchorLine) && anchorLine > 0) { + indentConfig.anchorLine = anchorLine + } + } + + if (indent.max_levels !== undefined) { + const maxLevels = Number(indent.max_levels) + if (!isNaN(maxLevels) && maxLevels >= 0) { + indentConfig.maxLevels = maxLevels + } + } + + if (indent.include_siblings !== undefined) { + indentConfig.includeSiblings = Boolean(indent.include_siblings) + } + + if (indent.include_header !== undefined) { + indentConfig.includeHeader = Boolean(indent.include_header) + } + + if (indent.max_lines !== undefined) { + const maxLines = Number(indent.max_lines) + if (!isNaN(maxLines) && maxLines > 0) { + indentConfig.maxLines = maxLines + } + } + + if (Object.keys(indentConfig).length > 0) { + entry.indentation = indentConfig + } + } + return entry }) } diff --git a/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts b/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts index 0e81671cc15..c7e99b604e6 100644 --- a/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts +++ b/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts @@ -8,7 +8,7 @@ describe("NativeToolCallParser", () => { describe("parseToolCall", () => { describe("read_file tool", () => { - it("should handle line_ranges as tuples (new format)", () => { + it("should handle offset and mode parameters", () => { const toolCall = { id: "toolu_123", name: "read_file" as const, @@ -16,10 +16,8 @@ describe("NativeToolCallParser", () => { files: [ { path: "src/core/task/Task.ts", - line_ranges: [ - [1920, 1990], - [2060, 2120], - ], + offset: 100, + mode: "slice", }, ], }), @@ -32,18 +30,16 @@ describe("NativeToolCallParser", () => { if (result?.type === "tool_use") { expect(result.nativeArgs).toBeDefined() const nativeArgs = result.nativeArgs as { - files: Array<{ path: string; lineRanges?: Array<{ start: number; end: number }> }> + files: Array<{ path: string; offset?: number; mode?: string }> } expect(nativeArgs.files).toHaveLength(1) expect(nativeArgs.files[0].path).toBe("src/core/task/Task.ts") - expect(nativeArgs.files[0].lineRanges).toEqual([ - { start: 1920, end: 1990 }, - { start: 2060, end: 2120 }, - ]) + expect(nativeArgs.files[0].offset).toBe(100) + expect(nativeArgs.files[0].mode).toBe("slice") } }) - it("should handle line_ranges as strings (legacy format)", () => { + it("should handle indentation mode with configuration", () => { const toolCall = { id: "toolu_123", name: "read_file" as const, @@ -51,7 +47,13 @@ describe("NativeToolCallParser", () => { files: [ { path: "src/core/task/Task.ts", - line_ranges: ["1920-1990", "2060-2120"], + offset: 50, + mode: "indentation", + indentation: { + anchor_line: 55, + max_levels: 2, + include_siblings: true, + }, }, ], }), @@ -64,18 +66,24 @@ describe("NativeToolCallParser", () => { if (result?.type === "tool_use") { expect(result.nativeArgs).toBeDefined() const nativeArgs = result.nativeArgs as { - files: Array<{ path: string; lineRanges?: Array<{ start: number; end: number }> }> + files: Array<{ + path: string + offset?: number + mode?: string + indentation?: { anchorLine?: number; maxLevels?: number; includeSiblings?: boolean } + }> } expect(nativeArgs.files).toHaveLength(1) expect(nativeArgs.files[0].path).toBe("src/core/task/Task.ts") - expect(nativeArgs.files[0].lineRanges).toEqual([ - { start: 1920, end: 1990 }, - { start: 2060, end: 2120 }, - ]) + expect(nativeArgs.files[0].offset).toBe(50) + expect(nativeArgs.files[0].mode).toBe("indentation") + expect(nativeArgs.files[0].indentation?.anchorLine).toBe(55) + expect(nativeArgs.files[0].indentation?.maxLevels).toBe(2) + expect(nativeArgs.files[0].indentation?.includeSiblings).toBe(true) } }) - it("should handle files without line_ranges", () => { + it("should handle files without offset or mode (defaults)", () => { const toolCall = { id: "toolu_123", name: "read_file" as const, @@ -94,15 +102,16 @@ describe("NativeToolCallParser", () => { expect(result?.type).toBe("tool_use") if (result?.type === "tool_use") { const nativeArgs = result.nativeArgs as { - files: Array<{ path: string; lineRanges?: Array<{ start: number; end: number }> }> + files: Array<{ path: string; offset?: number; mode?: string }> } expect(nativeArgs.files).toHaveLength(1) expect(nativeArgs.files[0].path).toBe("src/utils.ts") - expect(nativeArgs.files[0].lineRanges).toBeUndefined() + expect(nativeArgs.files[0].offset).toBeUndefined() + expect(nativeArgs.files[0].mode).toBeUndefined() } }) - it("should handle multiple files with different line_ranges", () => { + it("should handle multiple files with different offsets and modes", () => { const toolCall = { id: "toolu_123", name: "read_file" as const, @@ -110,11 +119,14 @@ describe("NativeToolCallParser", () => { files: [ { path: "file1.ts", - line_ranges: ["1-50"], + offset: 1, + mode: "slice", }, { path: "file2.ts", - line_ranges: ["100-150", "200-250"], + offset: 100, + mode: "indentation", + indentation: { max_levels: 1 }, }, { path: "file3.ts", @@ -129,19 +141,25 @@ describe("NativeToolCallParser", () => { expect(result?.type).toBe("tool_use") if (result?.type === "tool_use") { const nativeArgs = result.nativeArgs as { - files: Array<{ path: string; lineRanges?: Array<{ start: number; end: number }> }> + files: Array<{ + path: string + offset?: number + mode?: string + indentation?: { maxLevels?: number } + }> } expect(nativeArgs.files).toHaveLength(3) - expect(nativeArgs.files[0].lineRanges).toEqual([{ start: 1, end: 50 }]) - expect(nativeArgs.files[1].lineRanges).toEqual([ - { start: 100, end: 150 }, - { start: 200, end: 250 }, - ]) - expect(nativeArgs.files[2].lineRanges).toBeUndefined() + expect(nativeArgs.files[0].offset).toBe(1) + expect(nativeArgs.files[0].mode).toBe("slice") + expect(nativeArgs.files[1].offset).toBe(100) + expect(nativeArgs.files[1].mode).toBe("indentation") + expect(nativeArgs.files[1].indentation?.maxLevels).toBe(1) + expect(nativeArgs.files[2].offset).toBeUndefined() + expect(nativeArgs.files[2].mode).toBeUndefined() } }) - it("should filter out invalid line_range strings", () => { + it("should ignore invalid offset and mode values", () => { const toolCall = { id: "toolu_123", name: "read_file" as const, @@ -149,7 +167,8 @@ describe("NativeToolCallParser", () => { files: [ { path: "file.ts", - line_ranges: ["1-50", "invalid", "100-200", "abc-def"], + offset: -10, // Invalid - should be ignored + mode: "invalid_mode", // Invalid - should be ignored }, ], }), @@ -161,12 +180,11 @@ describe("NativeToolCallParser", () => { expect(result?.type).toBe("tool_use") if (result?.type === "tool_use") { const nativeArgs = result.nativeArgs as { - files: Array<{ path: string; lineRanges?: Array<{ start: number; end: number }> }> + files: Array<{ path: string; offset?: number; mode?: string }> } - expect(nativeArgs.files[0].lineRanges).toEqual([ - { start: 1, end: 50 }, - { start: 100, end: 200 }, - ]) + // Invalid values should not be passed through + expect(nativeArgs.files[0].offset).toBeUndefined() + expect(nativeArgs.files[0].mode).toBeUndefined() } }) }) @@ -174,7 +192,7 @@ describe("NativeToolCallParser", () => { describe("processStreamingChunk", () => { describe("read_file tool", () => { - it("should convert line_ranges strings to lineRanges objects during streaming", () => { + it("should parse offset and mode during streaming", () => { const id = "toolu_streaming_123" NativeToolCallParser.startStreamingToolCall(id, "read_file") @@ -183,7 +201,8 @@ describe("NativeToolCallParser", () => { files: [ { path: "src/test.ts", - line_ranges: ["10-20", "30-40"], + offset: 50, + mode: "slice", }, ], }) @@ -194,20 +213,18 @@ describe("NativeToolCallParser", () => { expect(result).not.toBeNull() expect(result?.nativeArgs).toBeDefined() const nativeArgs = result?.nativeArgs as { - files: Array<{ path: string; lineRanges?: Array<{ start: number; end: number }> }> + files: Array<{ path: string; offset?: number; mode?: string }> } expect(nativeArgs.files).toHaveLength(1) - expect(nativeArgs.files[0].lineRanges).toEqual([ - { start: 10, end: 20 }, - { start: 30, end: 40 }, - ]) + expect(nativeArgs.files[0].offset).toBe(50) + expect(nativeArgs.files[0].mode).toBe("slice") }) }) }) describe("finalizeStreamingToolCall", () => { describe("read_file tool", () => { - it("should convert line_ranges strings to lineRanges objects on finalize", () => { + it("should parse offset and mode on finalize", () => { const id = "toolu_finalize_123" NativeToolCallParser.startStreamingToolCall(id, "read_file") @@ -218,7 +235,9 @@ describe("NativeToolCallParser", () => { files: [ { path: "finalized.ts", - line_ranges: ["500-600"], + offset: 500, + mode: "indentation", + indentation: { anchor_line: 520 }, }, ], }), @@ -230,10 +249,17 @@ describe("NativeToolCallParser", () => { expect(result?.type).toBe("tool_use") if (result?.type === "tool_use") { const nativeArgs = result.nativeArgs as { - files: Array<{ path: string; lineRanges?: Array<{ start: number; end: number }> }> + files: Array<{ + path: string + offset?: number + mode?: string + indentation?: { anchorLine?: number } + }> } expect(nativeArgs.files[0].path).toBe("finalized.ts") - expect(nativeArgs.files[0].lineRanges).toEqual([{ start: 500, end: 600 }]) + expect(nativeArgs.files[0].offset).toBe(500) + expect(nativeArgs.files[0].mode).toBe("indentation") + expect(nativeArgs.files[0].indentation?.anchorLine).toBe(520) } }) }) diff --git a/src/core/prompts/tools/native-tools/index.ts b/src/core/prompts/tools/native-tools/index.ts index 4f78729cdc8..f8dcb246833 100644 --- a/src/core/prompts/tools/native-tools/index.ts +++ b/src/core/prompts/tools/native-tools/index.ts @@ -11,7 +11,7 @@ import fetchInstructions from "./fetch_instructions" import generateImage from "./generate_image" import listFiles from "./list_files" import newTask from "./new_task" -import { createReadFileTool, type ReadFileToolOptions } from "./read_file" +import { createReadFileTool, type CreateReadFileToolOptions } from "./read_file" import runSlashCommand from "./run_slash_command" import searchAndReplace from "./search_and_replace" import searchReplace from "./search_replace" @@ -23,14 +23,16 @@ import writeToFile from "./write_to_file" export { getMcpServerTools } from "./mcp_server" export { convertOpenAIToolToAnthropic, convertOpenAIToolsToAnthropic } from "./converters" -export type { ReadFileToolOptions } from "./read_file" +export type { CreateReadFileToolOptions } from "./read_file" /** - * Options for customizing the native tools array. + * Options for getting native tools */ -export interface NativeToolsOptions { - /** Whether to include line_ranges support in read_file tool (default: true) */ +export interface GetNativeToolsOptions { + /** Whether to include advanced reading parameters (offset, mode, indentation) in read_file tool */ partialReadsEnabled?: boolean + /** The configured max lines per read (shown in description for model awareness) */ + maxReadFileLine?: number /** Maximum number of files that can be read in a single read_file request (default: 5) */ maxConcurrentFileReads?: number /** Whether the model supports image processing (default: false) */ @@ -43,11 +45,17 @@ export interface NativeToolsOptions { * @param options - Configuration options for the tools * @returns Array of native tool definitions */ -export function getNativeTools(options: NativeToolsOptions = {}): OpenAI.Chat.ChatCompletionTool[] { - const { partialReadsEnabled = true, maxConcurrentFileReads = 5, supportsImages = false } = options +export function getNativeTools(options: GetNativeToolsOptions = {}): OpenAI.Chat.ChatCompletionTool[] { + const { + partialReadsEnabled = true, + maxReadFileLine, + maxConcurrentFileReads = 5, + supportsImages = false, + } = options - const readFileOptions: ReadFileToolOptions = { + const readFileOptions: CreateReadFileToolOptions = { partialReadsEnabled, + maxReadFileLine, maxConcurrentFileReads, supportsImages, } @@ -77,5 +85,5 @@ export function getNativeTools(options: NativeToolsOptions = {}): OpenAI.Chat.Ch ] satisfies OpenAI.Chat.ChatCompletionTool[] } -// Backward compatibility: export default tools with line ranges enabled +// Backward compatibility: export default tools with advanced reading enabled export const nativeTools = getNativeTools() diff --git a/src/core/prompts/tools/native-tools/read_file.ts b/src/core/prompts/tools/native-tools/read_file.ts index 7171be0f1d6..8edf087b705 100644 --- a/src/core/prompts/tools/native-tools/read_file.ts +++ b/src/core/prompts/tools/native-tools/read_file.ts @@ -14,11 +14,13 @@ function getReadFileSupportsNote(supportsImages: boolean): string { } /** - * Options for creating the read_file tool definition. + * Options for creating the read_file tool definition */ -export interface ReadFileToolOptions { - /** Whether to include line_ranges parameter (default: true) */ +export interface CreateReadFileToolOptions { + /** Whether to include advanced reading parameters (offset, mode, indentation) */ partialReadsEnabled?: boolean + /** The configured max lines per read (shown in description for model awareness) */ + maxReadFileLine?: number /** Maximum number of files that can be read in a single request (default: 5) */ maxConcurrentFileReads?: number /** Whether the model supports image processing (default: false) */ @@ -26,16 +28,23 @@ export interface ReadFileToolOptions { } /** - * Creates the read_file tool definition, optionally including line_ranges support - * based on whether partial reads are enabled. + * Creates the read_file tool definition with advanced reading modes. * * @param options - Configuration options for the tool * @returns Native tool definition for read_file */ -export function createReadFileTool(options: ReadFileToolOptions = {}): OpenAI.Chat.ChatCompletionTool { - const { partialReadsEnabled = true, maxConcurrentFileReads = 5, supportsImages = false } = options +export function createReadFileTool(options: CreateReadFileToolOptions = {}): OpenAI.Chat.ChatCompletionTool { + const { + partialReadsEnabled = true, + maxReadFileLine, + maxConcurrentFileReads = 5, + supportsImages = false, + } = options const isMultipleReadsEnabled = maxConcurrentFileReads > 1 + // Build limit info for descriptions + const limitInfo = maxReadFileLine && maxReadFileLine > 0 ? `Each read returns up to ${maxReadFileLine} lines. ` : "" + // Build description intro with concurrent reads limit message const descriptionIntro = isMultipleReadsEnabled ? `Read one or more files and return their contents with line numbers for diffing or discussion. IMPORTANT: You can read a maximum of ${maxConcurrentFileReads} files in a single request. If you need to read more files, use multiple sequential read_file requests. ` @@ -44,19 +53,23 @@ export function createReadFileTool(options: ReadFileToolOptions = {}): OpenAI.Ch const baseDescription = descriptionIntro + "Structure: { files: [{ path: 'relative/path.ts'" + - (partialReadsEnabled ? ", line_ranges: [[1, 50], [100, 150]]" : "") + - " }] }. " + - "The 'path' is required and relative to workspace. " + (partialReadsEnabled ? ", offset: 1, mode: 'slice' }" : "}") + + "] }. " + + "The 'path' is required and relative to workspace. " + + limitInfo - const optionalRangesDescription = partialReadsEnabled - ? "The 'line_ranges' is optional for reading specific sections. Each range is a [start, end] tuple (1-based inclusive). " + const modeDescription = partialReadsEnabled + ? "Two modes available: 'slice' (default) for simple line reading with offset, " + + "'indentation' for smart code block extraction that expands from an anchor line based on indentation levels. " + + "Use 'offset' to paginate through large files. " : "" const examples = partialReadsEnabled - ? "Example single file: { files: [{ path: 'src/app.ts' }] }. " + - "Example with line ranges: { files: [{ path: 'src/app.ts', line_ranges: [[1, 50], [100, 150]] }] }. " + + ? "Example simple read: { files: [{ path: 'src/app.ts', offset: 1 }] }. " + + "Example reading from line 500: { files: [{ path: 'src/app.ts', offset: 500 }] }. " + + "Example indentation mode: { files: [{ path: 'src/app.ts', offset: 50, mode: 'indentation', indentation: { maxLevels: 2 } }] }. " + (isMultipleReadsEnabled - ? `Example multiple files (within ${maxConcurrentFileReads}-file limit): { files: [{ path: 'file1.ts', line_ranges: [[1, 50]] }, { path: 'file2.ts' }] }` + ? `Example multiple files (within ${maxConcurrentFileReads}-file limit): { files: [{ path: 'file1.ts', offset: 1 }, { path: 'file2.ts' }] }` : "") : "Example single file: { files: [{ path: 'src/app.ts' }] }. " + (isMultipleReadsEnabled @@ -64,34 +77,72 @@ export function createReadFileTool(options: ReadFileToolOptions = {}): OpenAI.Ch : "") const description = - baseDescription + optionalRangesDescription + getReadFileSupportsNote(supportsImages) + " " + examples + baseDescription + modeDescription + getReadFileSupportsNote(supportsImages) + " " + examples - // Build the properties object conditionally - const fileProperties: Record = { + // Build the file properties object conditionally + const fileProperties: Record = { path: { type: "string", description: "Path to the file to read, relative to the workspace", }, } - // Only include line_ranges if partial reads are enabled + // Only include advanced reading parameters if partial reads are enabled if (partialReadsEnabled) { - fileProperties.line_ranges = { - type: ["array", "null"], + const offsetDesc = + maxReadFileLine && maxReadFileLine > 0 + ? `1-indexed line number to start reading from. Use this to paginate through large files (each read returns up to ${maxReadFileLine} lines). Defaults to 1.` + : "1-indexed line number to start reading from. Use this to paginate through large files. Defaults to 1." + + fileProperties.offset = { + type: ["integer", "null"], + description: offsetDesc, + default: 1, + minimum: 1, + } + + fileProperties.mode = { + type: ["string", "null"], + enum: ["slice", "indentation", null], description: - "Optional line ranges to read. Each range is a [start, end] tuple with 1-based inclusive line numbers. Use multiple ranges for non-contiguous sections.", - items: { - type: "array", - items: { type: "integer" }, - minItems: 2, - maxItems: 2, + "Reading mode: 'slice' for simple line reading (default), 'indentation' for smart code block extraction.", + default: "slice", + } + + fileProperties.indentation = { + type: ["object", "null"], + description: "Configuration for indentation mode. Only used when mode is 'indentation'.", + properties: { + anchorLine: { + type: ["integer", "null"], + description: "The line to anchor the block expansion from. Defaults to offset.", + minimum: 1, + }, + maxLevels: { + type: ["integer", "null"], + description: + "Maximum indentation depth to collect. 0 = unlimited (expand to file-level). Defaults to 0.", + default: 0, + minimum: 0, + }, + includeSiblings: { + type: ["boolean", "null"], + description: "Whether to include sibling blocks at the same indentation level. Defaults to false.", + default: false, + }, + includeHeader: { + type: ["boolean", "null"], + description: "Whether to include comment headers above the anchor block. Defaults to true.", + default: true, + }, }, + additionalProperties: false, } } // When using strict mode, ALL properties must be in the required array // Optional properties are handled by having type: ["...", "null"] - const fileRequiredProperties = partialReadsEnabled ? ["path", "line_ranges"] : ["path"] + const fileRequiredProperties = partialReadsEnabled ? ["path", "offset", "mode", "indentation"] : ["path"] return { type: "function", diff --git a/src/core/task/build-tools.ts b/src/core/task/build-tools.ts index 52a9f2eb82f..6d8179c43d3 100644 --- a/src/core/task/build-tools.ts +++ b/src/core/task/build-tools.ts @@ -68,8 +68,10 @@ export async function buildNativeToolsArray(options: BuildToolsOptions): Promise const supportsImages = modelInfo?.supportsImages ?? false // Build native tools with dynamic read_file tool based on settings. + // Pass maxReadFileLine so the tool description tells the model about the limit. const nativeTools = getNativeTools({ partialReadsEnabled, + maxReadFileLine: partialReadsEnabled ? maxReadFileLine : undefined, maxConcurrentFileReads, supportsImages, }) diff --git a/src/core/tools/ReadFileTool.ts b/src/core/tools/ReadFileTool.ts index 483d4f00252..9de269de2e6 100644 --- a/src/core/tools/ReadFileTool.ts +++ b/src/core/tools/ReadFileTool.ts @@ -1,7 +1,7 @@ import path from "path" import * as fs from "fs/promises" import { isBinaryFile } from "isbinaryfile" -import type { FileEntry, LineRange } from "@roo-code/types" +import type { FileEntry, ReadMode, IndentationConfig } from "@roo-code/types" import { isNativeProtocol, ANTHROPIC_DEFAULT_MAX_TOKENS } from "@roo-code/types" import { Task } from "../task/Task" @@ -13,9 +13,8 @@ import { RecordSource } from "../context-tracking/FileContextTrackerTypes" import { isPathOutsideWorkspace } from "../../utils/pathUtils" import { getReadablePath } from "../../utils/path" import { countFileLines } from "../../integrations/misc/line-counter" -import { readLines } from "../../integrations/misc/read-lines" +import { readFileContent } from "../../integrations/misc/read-file-content" import { extractTextFromFile, addLineNumbers, getSupportedBinaryFormats } from "../../integrations/misc/extract-text" -import { parseSourceCodeDefinitionsForFile } from "../../services/tree-sitter" import { parseXml } from "../../utils/xml" import { resolveToolProtocol } from "../../utils/resolveToolProtocol" import { @@ -27,7 +26,6 @@ import { ImageMemoryTracker, } from "./helpers/imageHelpers" import { FILE_READ_BUDGET_PERCENT, readFileWithTokenBudget } from "./helpers/fileTokenBudget" -import { truncateDefinitionsToLineLimit } from "./helpers/truncateDefinitions" import { BaseTool, ToolCallbacks } from "./BaseTool" import type { ToolUse } from "../../shared/tools" @@ -37,7 +35,11 @@ interface FileResult { content?: string error?: string notice?: string - lineRanges?: LineRange[] + // Slice/indentation mode parameters + offset?: number + limit?: number + mode?: ReadMode + indentation?: IndentationConfig xmlContent?: string nativeContent?: string imageDataUrl?: string @@ -50,65 +52,27 @@ export class ReadFileTool extends BaseTool<"read_file"> { parseLegacy(params: Partial>): { files: FileEntry[] } { const argsXmlTag = params.args - const legacyPath = params.path - const legacyStartLineStr = params.start_line - const legacyEndLineStr = params.end_line const fileEntries: FileEntry[] = [] - // XML args format + // XML args format - just parse paths, advanced features are native-only if (argsXmlTag) { const parsed = parseXml(argsXmlTag) as any const files = Array.isArray(parsed.file) ? parsed.file : [parsed.file].filter(Boolean) for (const file of files) { if (!file.path) continue - - const fileEntry: FileEntry = { - path: file.path, - lineRanges: [], - } - - if (file.line_range) { - const ranges = Array.isArray(file.line_range) ? file.line_range : [file.line_range] - for (const range of ranges) { - const match = String(range).match(/(\d+)-(\d+)/) - if (match) { - const [, start, end] = match.map(Number) - if (!isNaN(start) && !isNaN(end)) { - fileEntry.lineRanges?.push({ start, end }) - } - } - } - } - fileEntries.push(fileEntry) + fileEntries.push({ path: file.path }) } return { files: fileEntries } } - // Legacy single file path - if (legacyPath) { - const fileEntry: FileEntry = { - path: legacyPath, - lineRanges: [], - } - - if (legacyStartLineStr && legacyEndLineStr) { - const start = parseInt(legacyStartLineStr, 10) - const end = parseInt(legacyEndLineStr, 10) - if (!isNaN(start) && !isNaN(end) && start > 0 && end > 0) { - fileEntry.lineRanges?.push({ start, end }) - } - } - fileEntries.push(fileEntry) - } - return { files: fileEntries } } async execute(params: { files: FileEntry[] }, task: Task, callbacks: ToolCallbacks): Promise { - const { handleError, pushToolResult, toolProtocol } = callbacks + const { pushToolResult } = callbacks const fileEntries = params.files const modelInfo = task.api.getModel().info // Use the task's locked protocol for consistent output formatting throughout the task @@ -141,7 +105,10 @@ export class ReadFileTool extends BaseTool<"read_file"> { const fileResults: FileResult[] = fileEntries.map((entry) => ({ path: entry.path, status: "pending", - lineRanges: entry.lineRanges, + // Map slice/indentation mode parameters (limit is not mapped - always use maxReadFileLine setting) + offset: entry.offset, + mode: entry.mode, + indentation: entry.indentation, })) const updateFileResult = (filePath: string, updates: Partial) => { @@ -156,38 +123,6 @@ export class ReadFileTool extends BaseTool<"read_file"> { for (const fileResult of fileResults) { const relPath = fileResult.path - const fullPath = path.resolve(task.cwd, relPath) - - if (fileResult.lineRanges) { - let hasRangeError = false - for (const range of fileResult.lineRanges) { - if (range.start > range.end) { - const errorMsg = "Invalid line range: end line cannot be less than start line" - updateFileResult(relPath, { - status: "blocked", - error: errorMsg, - xmlContent: `${relPath}Error reading file: ${errorMsg}`, - nativeContent: `File: ${relPath}\nError: Error reading file: ${errorMsg}`, - }) - await task.say("error", `Error reading file ${relPath}: ${errorMsg}`) - hasRangeError = true - break - } - if (isNaN(range.start) || isNaN(range.end)) { - const errorMsg = "Invalid line range values" - updateFileResult(relPath, { - status: "blocked", - error: errorMsg, - xmlContent: `${relPath}Error reading file: ${errorMsg}`, - nativeContent: `File: ${relPath}\nError: Error reading file: ${errorMsg}`, - }) - await task.say("error", `Error reading file ${relPath}: ${errorMsg}`) - hasRangeError = true - break - } - } - if (hasRangeError) continue - } if (fileResult.status === "pending") { const accessAllowed = task.rooIgnoreController?.validateAccess(relPath) @@ -216,15 +151,14 @@ export class ReadFileTool extends BaseTool<"read_file"> { const isOutsideWorkspace = isPathOutsideWorkspace(fullPath) let lineSnippet = "" - if (fileResult.lineRanges && fileResult.lineRanges.length > 0) { - const ranges = fileResult.lineRanges.map((range) => - t("tools:readFile.linesRange", { start: range.start, end: range.end }), - ) - lineSnippet = ranges.join(", ") - } else if (maxReadFileLine === 0) { - lineSnippet = t("tools:readFile.definitionsOnly") - } else if (maxReadFileLine > 0) { - lineSnippet = t("tools:readFile.maxLines", { max: maxReadFileLine }) + const startLine = fileResult.offset ?? 1 + if (maxReadFileLine > 0) { + // Show the expected line range (start to start + maxReadFileLine - 1) + const endLine = startLine + maxReadFileLine - 1 + lineSnippet = t("tools:readFile.linesRange", { start: startLine, end: endLine }) + } else if (startLine > 1) { + // No line limit but reading from offset + lineSnippet = t("tools:readFile.startingFromLine", { start: startLine }) } const readablePath = getReadablePath(task.cwd, relPath) @@ -299,15 +233,14 @@ export class ReadFileTool extends BaseTool<"read_file"> { const { maxReadFileLine = -1 } = (await task.providerRef.deref()?.getState()) ?? {} let lineSnippet = "" - if (fileResult.lineRanges && fileResult.lineRanges.length > 0) { - const ranges = fileResult.lineRanges.map((range) => - t("tools:readFile.linesRange", { start: range.start, end: range.end }), - ) - lineSnippet = ranges.join(", ") - } else if (maxReadFileLine === 0) { - lineSnippet = t("tools:readFile.definitionsOnly") - } else if (maxReadFileLine > 0) { - lineSnippet = t("tools:readFile.maxLines", { max: maxReadFileLine }) + const startLine = fileResult.offset ?? 1 + if (maxReadFileLine > 0) { + // Show the expected line range (start to start + maxReadFileLine - 1) + const endLine = startLine + maxReadFileLine - 1 + lineSnippet = t("tools:readFile.linesRange", { start: startLine, end: endLine }) + } else if (startLine > 1) { + // No line limit but reading from offset + lineSnippet = t("tools:readFile.startingFromLine", { start: startLine }) } const completeMessage = JSON.stringify({ @@ -457,86 +390,106 @@ export class ReadFileTool extends BaseTool<"read_file"> { } } - if (fileResult.lineRanges && fileResult.lineRanges.length > 0) { - const rangeResults: string[] = [] - const nativeRangeResults: string[] = [] - - for (const range of fileResult.lineRanges) { - const content = addLineNumbers( - await readLines(fullPath, range.end - 1, range.start - 1), - range.start, - ) - const lineRangeAttr = ` lines="${range.start}-${range.end}"` - rangeResults.push(`\n${content}`) - nativeRangeResults.push(`Lines ${range.start}-${range.end}:\n${content}`) - } + // Handle slice/indentation mode when offset or mode is specified + if (fileResult.offset !== undefined || fileResult.mode !== undefined) { + try { + const result = await readFileContent({ + filePath: fullPath, + offset: fileResult.offset, + // limit is controlled by maxReadFileLine setting, not model input + mode: fileResult.mode, + indentation: fileResult.indentation, + defaultLimit: maxReadFileLine > 0 ? maxReadFileLine : undefined, + }) - updateFileResult(relPath, { - xmlContent: `${relPath}\n${rangeResults.join("\n")}\n`, - nativeContent: `File: ${relPath}\n${nativeRangeResults.join("\n\n")}`, - }) - continue - } + await task.fileContextTracker.trackFileContext(relPath, "read_tool" as RecordSource) - if (maxReadFileLine === 0) { - try { - const defResult = await parseSourceCodeDefinitionsForFile( - fullPath, - task.rooIgnoreController, - ) - if (defResult) { - const notice = `Showing only ${maxReadFileLine} of ${totalLines} total lines. Use line_range if you need to read more lines` - updateFileResult(relPath, { - xmlContent: `${relPath}\n${defResult}\n${notice}\n`, - nativeContent: `File: ${relPath}\nCode Definitions:\n${defResult}\n\nNote: ${notice}`, + const modeLabel = fileResult.mode === "indentation" ? "indentation" : "slice" + const { metadata } = result + let xmlInfo = "" + let nativeInfo = "" + + if (result.lineCount === 0) { + xmlInfo = `\nNo content returned (file may be empty or offset exceeds file length)\n` + nativeInfo = `Note: No content returned (file may be empty or offset exceeds file length)` + } else { + const lineRangeAttr = ` lines="${metadata.startLine}-${metadata.endLine}"` + xmlInfo = `\n${result.content}\n` + nativeInfo = `Lines ${metadata.startLine}-${metadata.endLine} (${modeLabel} mode):\n${result.content}` + + // Include structured metadata for LLM pagination awareness + const metadataJson = JSON.stringify({ + totalLinesInFile: metadata.totalLinesInFile, + linesReturned: metadata.linesReturned, + startLine: metadata.startLine, + endLine: metadata.endLine, + hasMoreBefore: metadata.hasMoreBefore, + hasMoreAfter: metadata.hasMoreAfter, + linesBeforeStart: metadata.linesBeforeStart, + linesAfterEnd: metadata.linesAfterEnd, + truncatedByLimit: metadata.truncatedByLimit, + lineLengthTruncations: metadata.lineLengthTruncations, }) + xmlInfo += `${metadataJson}\n` + nativeInfo += `\n\n${metadataJson}` } + + updateFileResult(relPath, { + xmlContent: `${relPath}\n${xmlInfo}`, + nativeContent: `File: ${relPath}\n${nativeInfo}`, + }) + continue } catch (error) { - if (error instanceof Error && error.message.startsWith("Unsupported language:")) { - console.warn(`[read_file] Warning: ${error.message}`) - } else { - console.error( - `[read_file] Unhandled error: ${error instanceof Error ? error.message : String(error)}`, - ) - } + const errorMsg = error instanceof Error ? error.message : String(error) + updateFileResult(relPath, { + status: "error", + error: `Error reading file with ${fileResult.mode || "slice"} mode: ${errorMsg}`, + xmlContent: `${relPath}Error reading file: ${errorMsg}`, + nativeContent: `File: ${relPath}\nError: Error reading file: ${errorMsg}`, + }) + await task.say("error", `Error reading file ${relPath}: ${errorMsg}`) + continue } - continue } + // Handle maxReadFileLine partial read if (maxReadFileLine > 0 && totalLines > maxReadFileLine) { - const content = addLineNumbers(await readLines(fullPath, maxReadFileLine - 1, 0)) - const lineRangeAttr = ` lines="1-${maxReadFileLine}"` + const sliceResult = await readFileContent({ + filePath: fullPath, + offset: 1, + limit: maxReadFileLine, + mode: "slice", + }) + // readFileContent already includes line numbers + const content = sliceResult.content + const { metadata } = sliceResult + + // Build metadata summary for pagination awareness + const metadataSummary = [] + if (metadata.hasMoreAfter) { + metadataSummary.push(`${metadata.linesAfterEnd} lines after`) + } + if (metadata.lineLengthTruncations.length > 0) { + metadataSummary.push(`${metadata.lineLengthTruncations.length} lines truncated for length`) + } + + const lineRangeAttr = ` lines="${metadata.startLine}-${metadata.endLine}"` let xmlInfo = `\n${content}\n` - let nativeInfo = `Lines 1-${maxReadFileLine}:\n${content}\n` + let nativeInfo = `Lines ${metadata.startLine}-${metadata.endLine}:\n${content}\n` - try { - const defResult = await parseSourceCodeDefinitionsForFile( - fullPath, - task.rooIgnoreController, - ) - if (defResult) { - const truncatedDefs = truncateDefinitionsToLineLimit(defResult, maxReadFileLine) - xmlInfo += `${truncatedDefs}\n` - nativeInfo += `\nCode Definitions:\n${truncatedDefs}\n` - } + const notice = + `Showing ${sliceResult.lineCount} of ${metadata.totalLinesInFile} total lines` + + (metadataSummary.length > 0 ? ` (${metadataSummary.join(", ")})` : "") + + `. Use offset to read more` + xmlInfo += `${notice}\n` + nativeInfo += `\nNote: ${notice}` - const notice = `Showing only ${maxReadFileLine} of ${totalLines} total lines. Use line_range if you need to read more lines` - xmlInfo += `${notice}\n` - nativeInfo += `\nNote: ${notice}` + await task.fileContextTracker.trackFileContext(relPath, "read_tool" as RecordSource) - updateFileResult(relPath, { - xmlContent: `${relPath}\n${xmlInfo}`, - nativeContent: `File: ${relPath}\n${nativeInfo}`, - }) - } catch (error) { - if (error instanceof Error && error.message.startsWith("Unsupported language:")) { - console.warn(`[read_file] Warning: ${error.message}`) - } else { - console.error( - `[read_file] Unhandled error: ${error instanceof Error ? error.message : String(error)}`, - ) - } - } + updateFileResult(relPath, { + xmlContent: `${relPath}\n${xmlInfo}`, + nativeContent: `File: ${relPath}\n${nativeInfo}`, + }) continue } @@ -575,7 +528,7 @@ export class ReadFileTool extends BaseTool<"read_file"> { if (!result.complete) { // File was truncated - const notice = `File truncated: showing ${result.lineCount} lines (${result.tokenCount} tokens) due to context budget. Use line_range to read specific sections.` + const notice = `File truncated: showing ${result.lineCount} lines (${result.tokenCount} tokens) due to context budget. Use offset to read more.` const lineRangeAttr = result.lineCount > 0 ? ` lines="1-${result.lineCount}"` : "" xmlInfo = result.lineCount > 0 @@ -743,7 +696,7 @@ export class ReadFileTool extends BaseTool<"read_file"> { } } - // Fallback to legacy/XML or synthesized params + // XML args format const blockParams = second as any if (blockParams?.args) { @@ -766,44 +719,19 @@ export class ReadFileTool extends BaseTool<"read_file"> { console.error("Failed to parse read_file args XML for description:", error) return `[${blockName} with unparsable args]` } - } else if (blockParams?.path) { - return `[${blockName} for '${blockParams.path}'. Reading multiple files at once is more efficient for the LLM. If other files are relevant to your current task, please read them simultaneously.]` - } else if (blockParams?.files) { - // Back-compat: some paths may still synthesize params.files; try to parse if present - try { - const files = JSON.parse(blockParams.files) - if (Array.isArray(files) && files.length > 0) { - const paths = files.map((f: any) => f?.path).filter(Boolean) as string[] - if (paths.length === 1) { - return `[${blockName} for '${paths[0]}'. Reading multiple files at once is more efficient for the LLM. If other files are relevant to your current task, please read them simultaneously.]` - } else if (paths.length <= 3) { - const pathList = paths.map((p) => `'${p}'`).join(", ") - return `[${blockName} for ${pathList}]` - } else { - return `[${blockName} for ${paths.length} files]` - } - } - } catch (error) { - console.error("Failed to parse native files JSON for description:", error) - return `[${blockName} with unparsable files]` - } } - return `[${blockName} with missing path/args/files]` + return `[${blockName} with missing args]` } override async handlePartial(task: Task, block: ToolUse<"read_file">): Promise { const argsXmlTag = block.params.args - const legacyPath = block.params.path let filePath = "" if (argsXmlTag) { const match = argsXmlTag.match(/.*?([^<]+)<\/path>/s) if (match) filePath = match[1] } - if (!filePath && legacyPath) { - filePath = legacyPath - } if (!filePath && block.nativeArgs && "files" in block.nativeArgs && Array.isArray(block.nativeArgs.files)) { const files = block.nativeArgs.files diff --git a/src/core/tools/__tests__/readFileTool.spec.ts b/src/core/tools/__tests__/readFileTool.spec.ts index f178e38026c..5e8530fdcd5 100644 --- a/src/core/tools/__tests__/readFileTool.spec.ts +++ b/src/core/tools/__tests__/readFileTool.spec.ts @@ -3,9 +3,7 @@ import * as path from "path" import { countFileLines } from "../../../integrations/misc/line-counter" -import { readLines } from "../../../integrations/misc/read-lines" import { extractTextFromFile } from "../../../integrations/misc/extract-text" -import { parseSourceCodeDefinitionsForFile } from "../../../services/tree-sitter" import { isBinaryFile } from "isbinaryfile" import { ReadFileToolUse, ToolParamName, ToolResponse } from "../../../shared/tools" import { readFileTool } from "../ReadFileTool" @@ -24,7 +22,13 @@ vi.mock("path", async () => { vi.mock("isbinaryfile") vi.mock("../../../integrations/misc/line-counter") -vi.mock("../../../integrations/misc/read-lines") + +// Create hoisted mock for readFileContent +const mockReadFileContent = vi.hoisted(() => vi.fn()) + +vi.mock("../../../integrations/misc/read-file-content", () => ({ + readFileContent: mockReadFileContent, +})) // Mock fs/promises readFile for image tests const fsPromises = vi.hoisted(() => ({ @@ -53,7 +57,6 @@ vi.mock("../../../integrations/misc/extract-text", () => ({ addLineNumbers: addLineNumbersMock, getSupportedBinaryFormats: vi.fn(() => [".pdf", ".docx", ".ipynb"]), })) -vi.mock("../../../services/tree-sitter") // Mock readFileWithTokenBudget - must be mocked to prevent actual file system access vi.mock("../../../integrations/misc/read-file-with-budget", () => ({ @@ -269,13 +272,10 @@ describe("read_file tool with maxReadFileLine setting", () => { const absoluteFilePath = "/test/file.txt" const fileContent = "Line 1\nLine 2\nLine 3\nLine 4\nLine 5" const numberedFileContent = "1 | Line 1\n2 | Line 2\n3 | Line 3\n4 | Line 4\n5 | Line 5\n" - const sourceCodeDef = "\n\n# file.txt\n1--5 | Content" // Mocked functions with correct types const mockedCountFileLines = vi.mocked(countFileLines) - const mockedReadLines = vi.mocked(readLines) const mockedExtractTextFromFile = vi.mocked(extractTextFromFile) - const mockedParseSourceCodeDefinitionsForFile = vi.mocked(parseSourceCodeDefinitionsForFile) const mockedIsBinaryFile = vi.mocked(isBinaryFile) const mockedPathResolve = vi.mocked(path.resolve) @@ -410,106 +410,73 @@ describe("read_file tool with maxReadFileLine setting", () => { }) }) - describe("when maxReadFileLine is 0", () => { - it("should return an empty content with source code definitions", async () => { - // Setup - for maxReadFileLine = 0, the implementation won't call readLines - mockedParseSourceCodeDefinitionsForFile.mockResolvedValue(sourceCodeDef) - - // Execute - skip addLineNumbers check as it's not called for maxReadFileLine=0 - const result = await executeReadFileTool( - {}, - { - maxReadFileLine: 0, - totalLines: 5, - skipAddLineNumbersCheck: true, - }, - ) - - // Verify - native format - expect(result).toContain(`File: ${testFilePath}`) - expect(result).toContain(`Code Definitions:`) - - // Verify native structure - expect(result).toContain("Note: Showing only 0 of 5 total lines") - expect(result).toContain(sourceCodeDef.trim()) - expect(result).not.toContain("Lines 1-") // No content when maxReadFileLine is 0 - }) - }) - describe("when maxReadFileLine is less than file length", () => { - it("should read only maxReadFileLine lines and add source code definitions", async () => { + it("should read only maxReadFileLine lines with notice", async () => { // Setup const content = "Line 1\nLine 2\nLine 3" const numberedContent = "1 | Line 1\n2 | Line 2\n3 | Line 3" - mockedReadLines.mockResolvedValue(content) - mockedParseSourceCodeDefinitionsForFile.mockResolvedValue(sourceCodeDef) - // Setup addLineNumbers to always return numbered content - addLineNumbersMock.mockReturnValue(numberedContent) + // Mock readFileContent for partial read with metadata + mockReadFileContent.mockResolvedValue({ + content: numberedContent, + lineCount: 3, + totalLines: 5, + metadata: { + filePath: absoluteFilePath, + totalLinesInFile: 5, + linesReturned: 3, + startLine: 1, + endLine: 3, + hasMoreBefore: false, + hasMoreAfter: true, + linesBeforeStart: 0, + linesAfterEnd: 2, + truncatedByLimit: true, + lineLengthTruncations: [], + }, + }) // Execute const result = await executeReadFileTool({}, { maxReadFileLine: 3 }) - // Verify - native format - expect(result).toContain(`File: ${testFilePath}`) - expect(result).toContain(`Lines 1-3:`) - expect(result).toContain(`Code Definitions:`) - expect(result).toContain("Note: Showing only 3 of 5 total lines") + // Verify - just check that the result contains the expected elements + expect(result).toContain(`${testFilePath}`) + expect(result).toContain(``) + expect(result).toContain("Showing 3 of 5 total lines") }) - it("should truncate code definitions when file exceeds maxReadFileLine", async () => { - // Setup - file with 100 lines but we'll only read first 30 - const content = "Line 1\nLine 2\nLine 3" - const numberedContent = "1 | Line 1\n2 | Line 2\n3 | Line 3" - const fullDefinitions = `# file.txt -10--20 | function foo() { -50--60 | function bar() { -80--90 | function baz() {` - const truncatedDefinitions = `# file.txt -10--20 | function foo() {` - - mockedReadLines.mockResolvedValue(content) - mockedParseSourceCodeDefinitionsForFile.mockResolvedValue(fullDefinitions) - addLineNumbersMock.mockReturnValue(numberedContent) - - // Execute with maxReadFileLine = 30 - const result = await executeReadFileTool({}, { maxReadFileLine: 30, totalLines: 100 }) - - // Verify - native format - expect(result).toContain(`File: ${testFilePath}`) - expect(result).toContain(`Lines 1-30:`) - expect(result).toContain(`Code Definitions:`) - - // Should include foo (starts at line 10) but not bar (starts at line 50) or baz (starts at line 80) - expect(result).toContain("10--20 | function foo()") - expect(result).not.toContain("50--60 | function bar()") - expect(result).not.toContain("80--90 | function baz()") - - expect(result).toContain("Note: Showing only 30 of 100 total lines") - }) - - it("should handle truncation when all definitions are beyond the line limit", async () => { - // Setup - all definitions start after maxReadFileLine + it("should suggest using offset/limit for specific sections", async () => { + // Setup const content = "Line 1\nLine 2\nLine 3" const numberedContent = "1 | Line 1\n2 | Line 2\n3 | Line 3" - const fullDefinitions = `# file.txt -50--60 | function foo() { -80--90 | function bar() {` - mockedReadLines.mockResolvedValue(content) - mockedParseSourceCodeDefinitionsForFile.mockResolvedValue(fullDefinitions) - addLineNumbersMock.mockReturnValue(numberedContent) + // Mock readFileContent for partial read with metadata + mockReadFileContent.mockResolvedValue({ + content: numberedContent, + lineCount: 3, + totalLines: 100, + metadata: { + filePath: absoluteFilePath, + totalLinesInFile: 100, + linesReturned: 3, + startLine: 1, + endLine: 3, + hasMoreBefore: false, + hasMoreAfter: true, + linesBeforeStart: 0, + linesAfterEnd: 97, + truncatedByLimit: true, + lineLengthTruncations: [], + }, + }) - // Execute with maxReadFileLine = 30 - const result = await executeReadFileTool({}, { maxReadFileLine: 30, totalLines: 100 }) + // Execute with maxReadFileLine = 3 + const result = await executeReadFileTool({}, { maxReadFileLine: 3, totalLines: 100 }) - // Verify - native format - expect(result).toContain(`File: ${testFilePath}`) - expect(result).toContain(`Lines 1-30:`) - expect(result).toContain(`Code Definitions:`) - expect(result).toContain("# file.txt") - expect(result).not.toContain("50--60 | function foo()") - expect(result).not.toContain("80--90 | function bar()") + // Verify notice includes suggestion about offset/limit + expect(result).toContain(`${testFilePath}`) + expect(result).toContain(``) + expect(result).toContain("Use offset to read more") }) }) @@ -565,26 +532,6 @@ describe("read_file tool with maxReadFileLine setting", () => { expect(typeof result).toBe("string") }) }) - - describe("with range parameters", () => { - it("should honor start_line and end_line when provided", async () => { - // Setup - mockedReadLines.mockResolvedValue("Line 2\nLine 3\nLine 4") - - // Execute using executeReadFileTool with range parameters - const rangeResult = await executeReadFileTool( - {}, - { - start_line: "2", - end_line: "4", - }, - ) - - // Verify - native format - expect(rangeResult).toContain(`File: ${testFilePath}`) - expect(rangeResult).toContain(`Lines 2-4:`) - }) - }) }) describe("read_file tool output structure", () => { diff --git a/src/core/tools/helpers/__tests__/truncateDefinitions.spec.ts b/src/core/tools/helpers/__tests__/truncateDefinitions.spec.ts deleted file mode 100644 index a221b574055..00000000000 --- a/src/core/tools/helpers/__tests__/truncateDefinitions.spec.ts +++ /dev/null @@ -1,160 +0,0 @@ -import { describe, it, expect } from "vitest" -import { truncateDefinitionsToLineLimit } from "../truncateDefinitions" - -describe("truncateDefinitionsToLineLimit", () => { - it("should not truncate when maxReadFileLine is -1 (no limit)", () => { - const definitions = `# test.ts -10--20 | function foo() { -30--40 | function bar() { -50--60 | function baz() {` - - const result = truncateDefinitionsToLineLimit(definitions, -1) - expect(result).toBe(definitions) - }) - - it("should not truncate when maxReadFileLine is 0 (definitions only mode)", () => { - const definitions = `# test.ts -10--20 | function foo() { -30--40 | function bar() { -50--60 | function baz() {` - - const result = truncateDefinitionsToLineLimit(definitions, 0) - expect(result).toBe(definitions) - }) - - it("should truncate definitions beyond the line limit", () => { - const definitions = `# test.ts -10--20 | function foo() { -30--40 | function bar() { -50--60 | function baz() {` - - const result = truncateDefinitionsToLineLimit(definitions, 25) - const expected = `# test.ts -10--20 | function foo() {` - - expect(result).toBe(expected) - }) - - it("should include definitions that start within limit even if they end beyond it", () => { - const definitions = `# test.ts -10--50 | function foo() { -60--80 | function bar() {` - - const result = truncateDefinitionsToLineLimit(definitions, 30) - const expected = `# test.ts -10--50 | function foo() {` - - expect(result).toBe(expected) - }) - - it("should handle single-line definitions", () => { - const definitions = `# test.ts -10 | const foo = 1 -20 | const bar = 2 -30 | const baz = 3` - - const result = truncateDefinitionsToLineLimit(definitions, 25) - const expected = `# test.ts -10 | const foo = 1 -20 | const bar = 2` - - expect(result).toBe(expected) - }) - - it("should preserve header line when all definitions are beyond limit", () => { - const definitions = `# test.ts -100--200 | function foo() {` - - const result = truncateDefinitionsToLineLimit(definitions, 50) - const expected = `# test.ts` - - expect(result).toBe(expected) - }) - - it("should handle empty definitions", () => { - const definitions = `# test.ts` - - const result = truncateDefinitionsToLineLimit(definitions, 50) - expect(result).toBe(definitions) - }) - - it("should handle definitions without header", () => { - const definitions = `10--20 | function foo() { -30--40 | function bar() {` - - const result = truncateDefinitionsToLineLimit(definitions, 25) - const expected = `10--20 | function foo() {` - - expect(result).toBe(expected) - }) - - it("should not preserve empty lines (only definition lines)", () => { - const definitions = `# test.ts -10--20 | function foo() { - -30--40 | function bar() {` - - const result = truncateDefinitionsToLineLimit(definitions, 25) - const expected = `# test.ts -10--20 | function foo() {` - - expect(result).toBe(expected) - }) - - it("should handle mixed single and range definitions", () => { - const definitions = `# test.ts -5 | const x = 1 -10--20 | function foo() { -25 | const y = 2 -30--40 | function bar() {` - - const result = truncateDefinitionsToLineLimit(definitions, 26) - const expected = `# test.ts -5 | const x = 1 -10--20 | function foo() { -25 | const y = 2` - - expect(result).toBe(expected) - }) - - it("should handle definitions at exactly the limit", () => { - const definitions = `# test.ts -10--20 | function foo() { -30--40 | function bar() { -50--60 | function baz() {` - - const result = truncateDefinitionsToLineLimit(definitions, 30) - const expected = `# test.ts -10--20 | function foo() { -30--40 | function bar() {` - - expect(result).toBe(expected) - }) - - it("should handle definitions with leading whitespace", () => { - const definitions = `# test.ts - 10--20 | function foo() { - 30--40 | function bar() { - 50--60 | function baz() {` - - const result = truncateDefinitionsToLineLimit(definitions, 25) - const expected = `# test.ts - 10--20 | function foo() {` - - expect(result).toBe(expected) - }) - - it("should handle definitions with mixed whitespace patterns", () => { - const definitions = `# test.ts -10--20 | function foo() { - 30--40 | function bar() { - 50--60 | function baz() {` - - const result = truncateDefinitionsToLineLimit(definitions, 35) - const expected = `# test.ts -10--20 | function foo() { - 30--40 | function bar() {` - - expect(result).toBe(expected) - }) -}) diff --git a/src/core/tools/helpers/truncateDefinitions.ts b/src/core/tools/helpers/truncateDefinitions.ts deleted file mode 100644 index 7c193ef52a5..00000000000 --- a/src/core/tools/helpers/truncateDefinitions.ts +++ /dev/null @@ -1,44 +0,0 @@ -/** - * Truncate code definitions to only include those within the line limit - * @param definitions - The full definitions string from parseSourceCodeDefinitionsForFile - * @param maxReadFileLine - Maximum line number to include (-1 for no limit, 0 for definitions only) - * @returns Truncated definitions string - */ -export function truncateDefinitionsToLineLimit(definitions: string, maxReadFileLine: number): string { - // If no limit or definitions-only mode (0), return as-is - if (maxReadFileLine <= 0) { - return definitions - } - - const lines = definitions.split("\n") - const result: string[] = [] - let startIndex = 0 - - // Keep the header line (e.g., "# filename.ts") - if (lines.length > 0 && lines[0].startsWith("#")) { - result.push(lines[0]) - startIndex = 1 - } - - // Process definition lines - for (let i = startIndex; i < lines.length; i++) { - const line = lines[i] - - // Match definition format: "startLine--endLine | content" or "lineNumber | content" - // Allow optional leading whitespace to handle indented output or CRLF artifacts - const rangeMatch = line.match(/^\s*(\d+)(?:--(\d+))?\s*\|/) - - if (rangeMatch) { - const startLine = parseInt(rangeMatch[1], 10) - - // Only include definitions that start within the truncated range - if (startLine <= maxReadFileLine) { - result.push(line) - } - } - // Note: We don't preserve empty lines or other non-definition content - // as they're not part of the actual code definitions - } - - return result.join("\n") -} diff --git a/src/i18n/locales/en/tools.json b/src/i18n/locales/en/tools.json index 94e1820249b..25d0eee8699 100644 --- a/src/i18n/locales/en/tools.json +++ b/src/i18n/locales/en/tools.json @@ -1,6 +1,7 @@ { "readFile": { "linesRange": " (lines {{start}}-{{end}})", + "startingFromLine": " (starting from line {{start}})", "definitionsOnly": " (definitions only)", "maxLines": " (max {{max}} lines)", "imageTooLarge": "Image file is too large ({{size}} MB). The maximum allowed size is {{max}} MB.", diff --git a/src/integrations/misc/__tests__/read-file-content.spec.ts b/src/integrations/misc/__tests__/read-file-content.spec.ts new file mode 100644 index 00000000000..69d0a7e5f63 --- /dev/null +++ b/src/integrations/misc/__tests__/read-file-content.spec.ts @@ -0,0 +1,282 @@ +import { promises as fs } from "fs" +import path from "path" +import { readSlice, readIndentationBlock, readFileContent, measureIndent } from "../read-file-content" + +describe("read-file-content", () => { + const testDir = __dirname + + // Helper function to create a temporary file, run a test, and clean up + async function withTempFile(filename: string, content: string, testFn: (filepath: string) => Promise) { + const filepath = path.join(testDir, filename) + await fs.writeFile(filepath, content) + try { + await testFn(filepath) + } finally { + await fs.unlink(filepath) + } + } + + describe("measureIndent", () => { + it("should measure spaces correctly", () => { + expect(measureIndent(" hello")).toBe(4) + expect(measureIndent(" hello")).toBe(2) + expect(measureIndent("hello")).toBe(0) + }) + + it("should treat tabs as 4 spaces", () => { + expect(measureIndent("\thello")).toBe(4) + expect(measureIndent("\t\thello")).toBe(8) + expect(measureIndent(" \thello")).toBe(6) // 2 spaces + 4 for tab + }) + + it("should handle empty lines", () => { + expect(measureIndent("")).toBe(0) + expect(measureIndent(" ")).toBe(3) + }) + }) + + describe("readSlice", () => { + it("should read lines from start by default", async () => { + const content = Array.from({ length: 10 }, (_, i) => `Line ${i + 1}`).join("\n") + await withTempFile("slice-test.txt", content, async (filepath) => { + const result = await readSlice(filepath, 1, 3) + expect(result.lineCount).toBe(3) + expect(result.content).toContain("1 | Line 1") + expect(result.content).toContain("2 | Line 2") + expect(result.content).toContain("3 | Line 3") + }) + }) + + it("should read lines from specified offset", async () => { + const content = Array.from({ length: 10 }, (_, i) => `Line ${i + 1}`).join("\n") + await withTempFile("slice-offset-test.txt", content, async (filepath) => { + const result = await readSlice(filepath, 5, 3) + expect(result.lineCount).toBe(3) + expect(result.content).toContain("5 | Line 5") + expect(result.content).toContain("6 | Line 6") + expect(result.content).toContain("7 | Line 7") + }) + }) + + it("should handle reading beyond file end", async () => { + const content = "Line 1\nLine 2\nLine 3" + await withTempFile("slice-beyond-test.txt", content, async (filepath) => { + const result = await readSlice(filepath, 1, 100) + expect(result.lineCount).toBe(3) + expect(result.totalLines).toBe(3) + }) + }) + + it("should throw error for offset=0", async () => { + const content = "Line 1\nLine 2" + await withTempFile("slice-zero-offset-test.txt", content, async (filepath) => { + await expect(readSlice(filepath, 0, 3)).rejects.toThrow("offset must be a 1-indexed line number") + }) + }) + + it("should throw error for limit=0", async () => { + const content = "Line 1\nLine 2" + await withTempFile("slice-zero-limit-test.txt", content, async (filepath) => { + await expect(readSlice(filepath, 1, 0)).rejects.toThrow("limit must be greater than zero") + }) + }) + + it("should throw error when offset exceeds file length", async () => { + const content = "Line 1\nLine 2" + await withTempFile("slice-past-end-test.txt", content, async (filepath) => { + await expect(readSlice(filepath, 100, 3)).rejects.toThrow("offset exceeds file length") + }) + }) + + it("should truncate long lines", async () => { + const longLine = "x".repeat(600) // Longer than MAX_LINE_LENGTH (500) + await withTempFile("slice-long-line-test.txt", longLine, async (filepath) => { + const result = await readSlice(filepath, 1, 1) + // Line should be truncated to 500 characters + line number prefix + expect(result.content.length).toBeLessThan(600) + }) + }) + + it("should handle files with CRLF line endings", async () => { + const content = "Line 1\r\nLine 2\r\nLine 3" + await withTempFile("slice-crlf-test.txt", content, async (filepath) => { + const result = await readSlice(filepath, 1, 3) + expect(result.lineCount).toBe(3) + expect(result.content).not.toContain("\r") + }) + }) + }) + + describe("readIndentationBlock", () => { + const pythonCode = `def outer(): + x = 1 + def inner(): + y = 2 + return y + return inner() + +def another(): + pass` + + it("should extract a function block with its contents", async () => { + await withTempFile("indent-function-test.py", pythonCode, async (filepath) => { + const result = await readIndentationBlock(filepath, 1, 100, { anchorLine: 3 }) + // Should include "def inner():" and its body + expect(result.content).toContain("def inner():") + expect(result.content).toContain("y = 2") + expect(result.content).toContain("return y") + }) + }) + + it("should respect maxLevels parameter", async () => { + await withTempFile("indent-levels-test.py", pythonCode, async (filepath) => { + const result = await readIndentationBlock(filepath, 4, 50, { + anchorLine: 4, + maxLevels: 1, + }) + // With maxLevels=1, should only go up one level from the anchor + expect(result.content).toContain("y = 2") + }) + }) + + it("should include sibling blocks when includeSiblings is true", async () => { + await withTempFile("indent-siblings-test.py", pythonCode, async (filepath) => { + const result = await readIndentationBlock(filepath, 1, 100, { + anchorLine: 1, + includeSiblings: true, + }) + // Should include both functions + expect(result.content).toContain("def outer():") + expect(result.content).toContain("def another():") + }) + }) + + it("should throw error for anchorLine=0", async () => { + await withTempFile("indent-zero-anchor-test.py", pythonCode, async (filepath) => { + await expect(readIndentationBlock(filepath, 1, 100, { anchorLine: 0 })).rejects.toThrow( + "anchor_line must be a 1-indexed line number", + ) + }) + }) + + it("should throw error when anchorLine exceeds file length", async () => { + await withTempFile("indent-past-end-test.py", pythonCode, async (filepath) => { + await expect(readIndentationBlock(filepath, 1, 100, { anchorLine: 100 })).rejects.toThrow( + "anchor_line exceeds file length", + ) + }) + }) + + it("should handle single line result", async () => { + const content = "single line" + await withTempFile("indent-single-test.txt", content, async (filepath) => { + const result = await readIndentationBlock(filepath, 1, 1, { anchorLine: 1 }) + expect(result.lineCount).toBe(1) + expect(result.content).toContain("single line") + }) + }) + + it("should trim leading/trailing blank lines", async () => { + const content = "\n\ndef foo():\n pass\n\n" + await withTempFile("indent-trim-test.py", content, async (filepath) => { + const result = await readIndentationBlock(filepath, 3, 100, { anchorLine: 3 }) + // Should not start or end with blank lines + const lines = result.content.split("\n").filter((l) => l.trim()) + expect(lines.length).toBeGreaterThan(0) + expect(lines[0]).toContain("def foo():") + }) + }) + + it("should include comment headers when includeHeader is true", async () => { + const codeWithComment = `# This is a comment header +# describing the function +def my_function(): + return 42` + await withTempFile("indent-header-test.py", codeWithComment, async (filepath) => { + const result = await readIndentationBlock(filepath, 3, 100, { + anchorLine: 3, + includeHeader: true, + }) + // Should include the comment header + expect(result.content).toContain("# This is a comment header") + }) + }) + }) + + describe("readFileContent", () => { + it("should use slice mode by default", async () => { + const content = "Line 1\nLine 2\nLine 3" + await withTempFile("mode-default-test.txt", content, async (filepath) => { + const result = await readFileContent({ + filePath: filepath, + offset: 1, + limit: 2, + }) + expect(result.lineCount).toBe(2) + expect(result.content).toContain("1 | Line 1") + expect(result.content).toContain("2 | Line 2") + }) + }) + + it("should use slice mode when explicitly specified", async () => { + const content = "Line 1\nLine 2\nLine 3" + await withTempFile("mode-slice-test.txt", content, async (filepath) => { + const result = await readFileContent({ + filePath: filepath, + offset: 2, + limit: 2, + mode: "slice", + }) + expect(result.lineCount).toBe(2) + expect(result.content).toContain("2 | Line 2") + expect(result.content).toContain("3 | Line 3") + }) + }) + + it("should use indentation mode when specified", async () => { + const pythonCode = `def outer(): + x = 1 + return x` + await withTempFile("mode-indent-test.py", pythonCode, async (filepath) => { + const result = await readFileContent({ + filePath: filepath, + offset: 1, + limit: 100, + mode: "indentation", + indentation: { anchorLine: 2 }, + }) + expect(result.content).toContain("x = 1") + }) + }) + + it("should use default values when not specified", async () => { + const content = Array.from({ length: 10 }, (_, i) => `Line ${i + 1}`).join("\n") + await withTempFile("mode-defaults-test.txt", content, async (filepath) => { + const result = await readFileContent({ + filePath: filepath, + }) + // Should use default offset=1 and default limit + expect(result.content).toContain("1 | Line 1") + }) + }) + + it("should handle empty files gracefully", async () => { + await withTempFile("mode-empty-test.txt", "", async (filepath) => { + await expect(readFileContent({ filePath: filepath })).rejects.toThrow() + }) + }) + + it("should handle Unicode content", async () => { + const content = "Hello 👋\nWorld 🌍\nTest 测试" + await withTempFile("mode-unicode-test.txt", content, async (filepath) => { + const result = await readFileContent({ + filePath: filepath, + mode: "slice", + }) + expect(result.content).toContain("Hello 👋") + expect(result.content).toContain("World 🌍") + expect(result.content).toContain("Test 测试") + }) + }) + }) +}) diff --git a/src/integrations/misc/read-file-content.ts b/src/integrations/misc/read-file-content.ts new file mode 100644 index 00000000000..04822893331 --- /dev/null +++ b/src/integrations/misc/read-file-content.ts @@ -0,0 +1,557 @@ +/** + * Read file content utilities with advanced features: + * - Slice mode: Simple line-by-line reading with offset and limit + * - Indentation mode: Smart extraction of code blocks based on indentation levels + */ +import { createReadStream } from "fs" + +// Configuration constants +const MAX_LINE_LENGTH = 500 // Truncate lines longer than this +const TAB_WIDTH = 4 // Treat tabs as 4 spaces for indentation +const FALLBACK_LIMIT = 500 // Fallback when no limit is specified and maxReadFileLine is not set + +// Comment prefixes for header detection +const COMMENT_PREFIXES = ["#", "//", "--", "/*", "*", "'''", '"""', "