diff --git a/codeflash/benchmarking/trace_benchmarks.py b/codeflash/benchmarking/trace_benchmarks.py index e59b06656..6de415a55 100644 --- a/codeflash/benchmarking/trace_benchmarks.py +++ b/codeflash/benchmarking/trace_benchmarks.py @@ -7,6 +7,7 @@ from codeflash.cli_cmds.console import logger from codeflash.code_utils.compat import SAFE_SYS_EXECUTABLE +from codeflash.code_utils.shell_utils import get_cross_platform_subprocess_run_args def trace_benchmarks_pytest( @@ -17,7 +18,10 @@ def trace_benchmarks_pytest( benchmark_env["PYTHONPATH"] = str(project_root) else: benchmark_env["PYTHONPATH"] += os.pathsep + str(project_root) - result = subprocess.run( + run_args = get_cross_platform_subprocess_run_args( + cwd=project_root, env=benchmark_env, timeout=timeout, check=False, text=True, capture_output=True + ) + result = subprocess.run( # noqa: PLW1510 [ SAFE_SYS_EXECUTABLE, Path(__file__).parent / "pytest_new_process_trace_benchmarks.py", @@ -25,12 +29,7 @@ def trace_benchmarks_pytest( tests_root, trace_file, ], - cwd=project_root, - check=False, - capture_output=True, - text=True, - env=benchmark_env, - timeout=timeout, + **run_args, ) if result.returncode != 0: if "ERROR collecting" in result.stdout: diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py index ecf7d3e6d..68a0a5e2f 100644 --- a/codeflash/cli_cmds/console.py +++ b/codeflash/cli_cmds/console.py @@ -58,6 +58,19 @@ ) +class DummyTask: + def __init__(self) -> None: + self.id = 0 + + +class DummyProgress: + def __init__(self) -> None: + pass + + def advance(self, task_id: TaskID, advance: int = 1) -> None: + pass + + def lsp_log(message: LspMessage) -> None: if not is_LSP_enabled(): return @@ -120,10 +133,6 @@ def progress_bar( logger.info(message) # Create a fake task ID since we still need to yield something - class DummyTask: - def __init__(self) -> None: - self.id = 0 - yield DummyTask().id else: progress = Progress( @@ -141,6 +150,13 @@ def __init__(self) -> None: @contextmanager def test_files_progress_bar(total: int, description: str) -> Generator[tuple[Progress, TaskID], None, None]: """Progress bar for test files.""" + if is_LSP_enabled(): + lsp_log(LspTextMessage(text=description, takes_time=True)) + dummy_progress = DummyProgress() + dummy_task = DummyTask() + yield dummy_progress, dummy_task.id + return + with Progress( SpinnerColumn(next(spinners)), TextColumn("[progress.description]{task.description}"), diff --git a/codeflash/code_utils/code_replacer.py b/codeflash/code_utils/code_replacer.py index 47aa2e75f..3b838eb8a 100644 --- a/codeflash/code_utils/code_replacer.py +++ b/codeflash/code_utils/code_replacer.py @@ -447,7 +447,7 @@ def replace_function_definitions_in_module( new_code: str = replace_functions_and_add_imports( # adding the global assignments before replacing the code, not after - # becuase of an "edge case" where the optimized code intoduced a new import and a global assignment using that import + # because of an "edge case" where the optimized code intoduced a new import and a global assignment using that import # and that import wasn't used before, so it was ignored when calling AddImportsVisitor.add_needed_import inside replace_functions_and_add_imports (because the global assignment wasn't added yet) # this was added at https://github.com/codeflash-ai/codeflash/pull/448 add_global_assignments(code_to_apply, source_code) if should_add_global_assignments else source_code, diff --git a/codeflash/code_utils/shell_utils.py b/codeflash/code_utils/shell_utils.py index 60da8e3ba..ee4dfb5df 100644 --- a/codeflash/code_utils/shell_utils.py +++ b/codeflash/code_utils/shell_utils.py @@ -3,6 +3,8 @@ import contextlib import os import re +import subprocess +import sys from pathlib import Path from typing import TYPE_CHECKING, Optional @@ -11,8 +13,11 @@ from codeflash.either import Failure, Success if TYPE_CHECKING: + from collections.abc import Mapping + from codeflash.either import Result + # PowerShell patterns and prefixes POWERSHELL_RC_EXPORT_PATTERN = re.compile( r'^\$env:CODEFLASH_API_KEY\s*=\s*(?:"|\')?(cf-[^\s"\']+)(?:"|\')?\s*$', re.MULTILINE @@ -231,3 +236,24 @@ def save_api_key_to_rc(api_key: str) -> Result[str, str]: f"To ensure your Codeflash API key is automatically loaded into your environment at startup, you can create {shell_rc_path} and add the following line:{LF}" f"{LF}{api_key_line}{LF}" ) + + +def get_cross_platform_subprocess_run_args( + cwd: Path | str | None = None, + env: Mapping[str, str] | None = None, + timeout: Optional[float] = None, + check: bool = False, # noqa: FBT001, FBT002 + text: bool = True, # noqa: FBT001, FBT002 + capture_output: bool = True, # noqa: FBT001, FBT002 (only for non-Windows) +) -> dict[str, str]: + run_args = {"cwd": cwd, "env": env, "text": text, "timeout": timeout, "check": check} + if sys.platform == "win32": + creationflags = subprocess.CREATE_NEW_PROCESS_GROUP + run_args["creationflags"] = creationflags + run_args["stdout"] = subprocess.PIPE + run_args["stderr"] = subprocess.PIPE + run_args["stdin"] = subprocess.DEVNULL + else: + run_args["capture_output"] = capture_output + + return run_args diff --git a/codeflash/discovery/discover_unit_tests.py b/codeflash/discovery/discover_unit_tests.py index 587b972ee..8bb3a3f28 100644 --- a/codeflash/discovery/discover_unit_tests.py +++ b/codeflash/discovery/discover_unit_tests.py @@ -16,7 +16,6 @@ if TYPE_CHECKING: from codeflash.discovery.functions_to_optimize import FunctionToOptimize - from pydantic.dataclasses import dataclass from rich.panel import Panel from rich.text import Text @@ -29,6 +28,7 @@ module_name_from_file_path, ) from codeflash.code_utils.compat import SAFE_SYS_EXECUTABLE, codeflash_cache_db +from codeflash.code_utils.shell_utils import get_cross_platform_subprocess_run_args from codeflash.models.models import CodePosition, FunctionCalledInTest, TestsInFile, TestType if TYPE_CHECKING: @@ -331,7 +331,7 @@ def visit_ImportFrom(self, node: ast.ImportFrom) -> None: # Be conservative except when an alias is used (which requires exact method matching) for target_func in fnames: if "." in target_func: - class_name, method_name = target_func.split(".", 1) + class_name, _method_name = target_func.split(".", 1) if aname == class_name and not alias.asname: self.found_any_target_function = True self.found_qualified_name = target_func @@ -585,7 +585,10 @@ def discover_tests_pytest( tmp_pickle_path = get_run_tmp_file("collected_tests.pkl") with custom_addopts(): - result = subprocess.run( + run_kwargs = get_cross_platform_subprocess_run_args( + cwd=project_root, check=False, text=True, capture_output=True + ) + result = subprocess.run( # noqa: PLW1510 [ SAFE_SYS_EXECUTABLE, Path(__file__).parent / "pytest_new_process_discovery.py", @@ -593,10 +596,7 @@ def discover_tests_pytest( str(tests_root), str(tmp_pickle_path), ], - cwd=project_root, - check=False, - capture_output=True, - text=True, + **run_kwargs, ) try: with tmp_pickle_path.open(mode="rb") as f: diff --git a/codeflash/discovery/functions_to_optimize.py b/codeflash/discovery/functions_to_optimize.py index 3958f40cf..cb00fabad 100644 --- a/codeflash/discovery/functions_to_optimize.py +++ b/codeflash/discovery/functions_to_optimize.py @@ -175,7 +175,7 @@ def qualified_name_with_modules_from_root(self, project_root_path: Path) -> str: def get_functions_to_optimize( optimize_all: str | None, replay_test: list[Path] | None, - file: Path | None, + file: Path | str | None, only_get_this_function: str | None, test_cfg: TestConfig, ignore_paths: list[Path], @@ -202,6 +202,7 @@ def get_functions_to_optimize( elif file is not None: logger.info("!lsp|Finding all functions in the file '%s'…", file) console.rule() + file = Path(file) if isinstance(file, str) else file functions: dict[Path, list[FunctionToOptimize]] = find_all_functions_in_file(file) if only_get_this_function is not None: split_function = only_get_this_function.split(".") diff --git a/codeflash/lsp/beta.py b/codeflash/lsp/beta.py index 8be2c3b03..a38cb1cde 100644 --- a/codeflash/lsp/beta.py +++ b/codeflash/lsp/beta.py @@ -134,7 +134,7 @@ def get_optimizable_functions(params: OptimizableFunctionsParams) -> dict[str, l document_uri = params.textDocument.uri document = server.workspace.get_text_document(document_uri) - file_path = Path(document.path) + file_path = Path(document.path).resolve() if not server.optimizer: return {"status": "error", "message": "optimizer not initialized"} @@ -517,7 +517,7 @@ def initialize_function_optimization(params: FunctionOptimizationInitParams) -> files = [document.path] _, _, original_helpers = server.current_optimization_init_result - files.extend([str(helper_path) for helper_path in original_helpers]) + files.extend([str(helper_path.resolve()) for helper_path in original_helpers]) return {"functionName": params.functionName, "status": "success", "files_inside_context": files} diff --git a/codeflash/lsp/features/perform_optimization.py b/codeflash/lsp/features/perform_optimization.py index 96552f1a8..6c54bca01 100644 --- a/codeflash/lsp/features/perform_optimization.py +++ b/codeflash/lsp/features/perform_optimization.py @@ -63,7 +63,7 @@ def run_generate_optimizations(): # noqa: ANN202 future_tests = function_optimizer.executor.submit(ctx_tests.run, run_generate_tests) future_optimizations = function_optimizer.executor.submit(ctx_opts.run, run_generate_optimizations) - logger.info(f"loading|Generating new tests and optimizations for function '{params.functionName}'...") + logger.info(f"loading|Generating new tests and optimizations for function '{params.functionName}'") concurrent.futures.wait([future_tests, future_optimizations]) test_setup_result = future_tests.result() diff --git a/codeflash/lsp/lsp_message.py b/codeflash/lsp/lsp_message.py index 36b2f3dcc..020c6eef2 100644 --- a/codeflash/lsp/lsp_message.py +++ b/codeflash/lsp/lsp_message.py @@ -11,8 +11,8 @@ json_primitive_types = (str, float, int, bool) max_code_lines_before_collapse = 45 -# \u241F is the message delimiter becuase it can be more than one message sent over the same message, so we need something to separate each message -message_delimiter = "\u241f" +# \\u241F is the message delimiter because it can be more than one message sent over the same message, so we need something to separate each message +message_delimiter = "\\u241F" # allow the client to know which message it is receiving diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 1f55033a7..5111f2f23 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -396,7 +396,7 @@ def __init__( def can_be_optimized(self) -> Result[tuple[bool, CodeOptimizationContext, dict[Path, str]], str]: should_run_experiment = self.experiment_id is not None - logger.info(f"Function Trace ID: {self.function_trace_id}") + logger.info(f"!lsp|Function Trace ID: {self.function_trace_id}") ph("cli-optimize-function-start", {"function_trace_id": self.function_trace_id}) self.cleanup_leftover_test_return_values() file_name_from_test_module_name.cache_clear() @@ -1934,6 +1934,7 @@ def establish_original_code_baseline( instrument_codeflash_capture( self.function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root ) + total_looping_time = TOTAL_LOOPING_TIME_EFFECTIVE behavioral_results, coverage_results = self.run_and_parse_tests( testing_type=TestingMode.BEHAVIOR, diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py index c98d37042..1e1ddefcf 100644 --- a/codeflash/optimization/optimizer.py +++ b/codeflash/optimization/optimizer.py @@ -652,8 +652,8 @@ def mirror_paths_for_worktree_mode(self, worktree_dir: Path) -> None: def mirror_path(path: Path, src_root: Path, dest_root: Path) -> Path: - relative_path = path.relative_to(src_root) - return dest_root / relative_path + relative_path = path.resolve().relative_to(src_root.resolve()) + return Path(dest_root / relative_path) def run_with_args(args: Namespace) -> None: diff --git a/codeflash/verification/test_runner.py b/codeflash/verification/test_runner.py index 44a50af3b..ece39e1e0 100644 --- a/codeflash/verification/test_runner.py +++ b/codeflash/verification/test_runner.py @@ -1,7 +1,9 @@ from __future__ import annotations +import contextlib import shlex import subprocess +import sys from pathlib import Path from typing import TYPE_CHECKING @@ -10,6 +12,7 @@ from codeflash.code_utils.compat import IS_POSIX, SAFE_SYS_EXECUTABLE from codeflash.code_utils.config_consts import TOTAL_LOOPING_TIME_EFFECTIVE from codeflash.code_utils.coverage_utils import prepare_coverage_files +from codeflash.code_utils.shell_utils import get_cross_platform_subprocess_run_args from codeflash.models.models import TestFiles, TestType if TYPE_CHECKING: @@ -23,9 +26,12 @@ def execute_test_subprocess( cmd_list: list[str], cwd: Path, env: dict[str, str] | None, timeout: int = 600 ) -> subprocess.CompletedProcess: """Execute a subprocess with the given command list, working directory, environment variables, and timeout.""" + logger.debug(f"executing test run with command: {' '.join(cmd_list)}") with custom_addopts(): - logger.debug(f"executing test run with command: {' '.join(cmd_list)}") - return subprocess.run(cmd_list, capture_output=True, cwd=cwd, env=env, text=True, timeout=timeout, check=False) + run_args = get_cross_platform_subprocess_run_args( + cwd=cwd, env=env, timeout=timeout, check=False, text=True, capture_output=True + ) + return subprocess.run(cmd_list, **run_args) # noqa: PLW1510 def run_behavioral_tests( @@ -39,6 +45,7 @@ def run_behavioral_tests( pytest_target_runtime_seconds: float = TOTAL_LOOPING_TIME_EFFECTIVE, enable_coverage: bool = False, ) -> tuple[Path, subprocess.CompletedProcess, Path | None, Path | None]: + """Run behavioral tests with optional coverage.""" if test_framework in {"pytest", "unittest"}: test_files: list[str] = [] for file in test_paths.test_files: @@ -53,12 +60,14 @@ def run_behavioral_tests( ) else: test_files.append(str(file.instrumented_behavior_file_path)) + pytest_cmd_list = ( shlex.split(f"{SAFE_SYS_EXECUTABLE} -m pytest", posix=IS_POSIX) if pytest_cmd == "pytest" else [SAFE_SYS_EXECUTABLE, "-m", *shlex.split(pytest_cmd, posix=IS_POSIX)] ) test_files = list(set(test_files)) # remove multiple calls in the same test function + common_pytest_args = [ "--capture=tee-sys", "-q", @@ -85,11 +94,18 @@ def run_behavioral_tests( pytest_test_env["TF_XLA_FLAGS"] = "--tf_xla_auto_jit=0" pytest_test_env["TF_ENABLE_ONEDNN_OPTS"] = str(0) pytest_test_env["JAX_DISABLE_JIT"] = str(0) - cov_erase = execute_test_subprocess( - shlex.split(f"{SAFE_SYS_EXECUTABLE} -m coverage erase"), cwd=cwd, env=pytest_test_env - ) # this cleanup is necessary to avoid coverage data from previous runs, if there are any, - # then the current run will be appended to the previous data, which skews the results - logger.debug(cov_erase) + + is_windows = sys.platform == "win32" + if is_windows: + # On Windows, delete coverage database file directly instead of using 'coverage erase', to avoid locking issues + if coverage_database_file.exists(): + with contextlib.suppress(PermissionError, OSError): + coverage_database_file.unlink() + else: + cov_erase = execute_test_subprocess( + shlex.split(f"{SAFE_SYS_EXECUTABLE} -m coverage erase"), cwd=cwd, env=pytest_test_env, timeout=30 + ) # this cleanup is necessary to avoid coverage data from previous runs, if there are any, then the current run will be appended to the previous data, which skews the results + logger.debug(cov_erase) coverage_cmd = [ SAFE_SYS_EXECUTABLE, "-m", @@ -105,7 +121,6 @@ def run_behavioral_tests( coverage_cmd.extend(shlex.split(pytest_cmd, posix=IS_POSIX)[1:]) blocklist_args = [f"-p no:{plugin}" for plugin in BEHAVIORAL_BLOCKLISTED_PLUGINS if plugin != "cov"] - results = execute_test_subprocess( coverage_cmd + common_pytest_args + blocklist_args + result_args + test_files, cwd=cwd, @@ -118,6 +133,7 @@ def run_behavioral_tests( ) else: blocklist_args = [f"-p no:{plugin}" for plugin in BEHAVIORAL_BLOCKLISTED_PLUGINS] + results = execute_test_subprocess( pytest_cmd_list + common_pytest_args + blocklist_args + result_args + test_files, cwd=cwd, diff --git a/tests/test_function_discovery.py b/tests/test_function_discovery.py index 49a67ba9b..76b3445a1 100644 --- a/tests/test_function_discovery.py +++ b/tests/test_function_discovery.py @@ -411,13 +411,17 @@ def not_in_checkpoint_function(): discovered = find_all_functions_in_file(test_file_path) modified_functions = {test_file_path: discovered[test_file_path]} - filtered, count = filter_functions( - modified_functions, - tests_root=Path("tests"), - ignore_paths=[], - project_root=temp_dir, - module_root=temp_dir, - ) + # Use an absolute path for tests_root that won't match the temp directory + # This avoids path resolution issues in CI where the working directory might differ + tests_root_absolute = (temp_dir.parent / "nonexistent_tests_dir").resolve() + with unittest.mock.patch("codeflash.discovery.functions_to_optimize.get_blocklisted_functions", return_value={}): + filtered, count = filter_functions( + modified_functions, + tests_root=tests_root_absolute, + ignore_paths=[], + project_root=temp_dir, + module_root=temp_dir, + ) function_names = [fn.function_name for fn in filtered.get(test_file_path, [])] assert "propagate_attributes" in function_names assert count == 3