diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index 2eedb9fae..c18495899 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -4,6 +4,7 @@ import os import platform import time +from itertools import count from typing import TYPE_CHECKING, Any, cast import requests @@ -12,7 +13,6 @@ from codeflash.cli_cmds.console import console, logger from codeflash.code_utils.code_replacer import is_zero_diff from codeflash.code_utils.code_utils import unified_diff_strings -from codeflash.code_utils.config_consts import N_CANDIDATES_EFFECTIVE, N_CANDIDATES_LP_EFFECTIVE from codeflash.code_utils.env_utils import get_codeflash_api_key from codeflash.code_utils.git_utils import get_last_commit_author_if_pr_exists, get_repo_owner_and_name from codeflash.code_utils.time_utils import humanize_runtime @@ -40,6 +40,11 @@ class AiServiceClient: def __init__(self) -> None: self.base_url = self.get_aiservice_base_url() self.headers = {"Authorization": f"Bearer {get_codeflash_api_key()}", "Connection": "close"} + self.llm_call_counter = count(1) + + def get_next_sequence(self) -> int: + """Get the next LLM call sequence number.""" + return next(self.llm_call_counter) def get_aiservice_base_url(self) -> str: if os.environ.get("CODEFLASH_AIS_SERVER", default="prod").lower() == "local": @@ -106,6 +111,7 @@ def _get_valid_candidates( optimization_id=opt["optimization_id"], source=source, parent_id=opt.get("parent_id", None), + model=opt.get("model"), ) ) return candidates @@ -115,7 +121,6 @@ def optimize_python_code( # noqa: D417 source_code: str, dependency_code: str, trace_id: str, - num_candidates: int = 10, experiment_metadata: ExperimentMetadata | None = None, *, is_async: bool = False, @@ -127,21 +132,22 @@ def optimize_python_code( # noqa: D417 - source_code (str): The python code to optimize. - dependency_code (str): The dependency code used as read-only context for the optimization - trace_id (str): Trace id of optimization run - - num_candidates (int): Number of optimization variants to generate. Default is 10. - experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization + - is_async (bool): Whether the function being optimized is async Returns ------- - List[OptimizationCandidate]: A list of Optimization Candidates. """ + logger.info("Generating optimized candidates…") + console.rule() start_time = time.perf_counter() git_repo_owner, git_repo_name = safe_get_repo_owner_and_name() payload = { "source_code": source_code, "dependency_code": dependency_code, - "num_variants": num_candidates, "trace_id": trace_id, "python_version": platform.python_version(), "experiment_metadata": experiment_metadata, @@ -149,24 +155,26 @@ def optimize_python_code( # noqa: D417 "current_username": get_last_commit_author_if_pr_exists(None), "repo_owner": git_repo_owner, "repo_name": git_repo_name, - "n_candidates": N_CANDIDATES_EFFECTIVE, "is_async": is_async, + "lsp_mode": is_LSP_enabled(), + "call_sequence": self.get_next_sequence(), } + logger.debug(f"Sending optimize request: trace_id={trace_id}, lsp_mode={payload['lsp_mode']}") - logger.info("!lsp|Generating optimized candidates…") - console.rule() try: response = self.make_ai_service_request("/optimize", payload=payload, timeout=60) except requests.exceptions.RequestException as e: logger.exception(f"Error generating optimized candidates: {e}") ph("cli-optimize-error-caught", {"error": str(e)}) + console.rule() return [] if response.status_code == 200: optimizations_json = response.json()["optimizations"] - console.rule() end_time = time.perf_counter() logger.debug(f"!lsp|Generating possible optimizations took {end_time - start_time:.2f} seconds.") + logger.info(f"!lsp|Received {len(optimizations_json)} optimization candidates.") + console.rule() return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE) try: error = response.json()["error"] @@ -183,17 +191,16 @@ def optimize_python_code_line_profiler( # noqa: D417 dependency_code: str, trace_id: str, line_profiler_results: str, - num_candidates: int = 10, experiment_metadata: ExperimentMetadata | None = None, ) -> list[OptimizedCandidate]: - """Optimize the given python code for performance by making a request to the Django endpoint. + """Optimize the given python code for performance using line profiler results. Parameters ---------- - source_code (str): The python code to optimize. - dependency_code (str): The dependency code used as read-only context for the optimization - trace_id (str): Trace id of optimization run - - num_candidates (int): Number of optimization variants to generate. Default is 10. + - line_profiler_results (str): Line profiler output to guide optimization - experiment_metadata (Optional[ExperimentalMetadata, None]): Any available experiment metadata for this optimization Returns @@ -201,36 +208,36 @@ def optimize_python_code_line_profiler( # noqa: D417 - List[OptimizationCandidate]: A list of Optimization Candidates. """ + if line_profiler_results == "": + logger.info("No LineProfiler results were provided, Skipping optimization.") + return [] + + logger.info("Generating optimized candidates with line profiler…") + console.rule() + payload = { "source_code": source_code, "dependency_code": dependency_code, - "num_variants": num_candidates, "line_profiler_results": line_profiler_results, "trace_id": trace_id, "python_version": platform.python_version(), "experiment_metadata": experiment_metadata, "codeflash_version": codeflash_version, "lsp_mode": is_LSP_enabled(), - "n_candidates_lp": N_CANDIDATES_LP_EFFECTIVE, + "call_sequence": self.get_next_sequence(), } - console.rule() - if line_profiler_results == "": - logger.info("No LineProfiler results were provided, Skipping optimization.") - console.rule() - return [] try: response = self.make_ai_service_request("/optimize-line-profiler", payload=payload, timeout=60) except requests.exceptions.RequestException as e: logger.exception(f"Error generating optimized candidates: {e}") ph("cli-optimize-error-caught", {"error": str(e)}) + console.rule() return [] if response.status_code == 200: optimizations_json = response.json()["optimizations"] - logger.info( - f"!lsp|Generated {len(optimizations_json)} candidate optimizations using line profiler information." - ) + logger.info(f"!lsp|Received {len(optimizations_json)} line profiler optimization candidates.") console.rule() return self._get_valid_candidates(optimizations_json, OptimizedCandidateSource.OPTIMIZE_LP) try: @@ -268,6 +275,7 @@ def optimize_python_code_refinement(self, request: list[AIServiceRefinerRequest] "trace_id": opt.trace_id, "function_references": opt.function_references, "python_version": platform.python_version(), + "call_sequence": self.get_next_sequence(), } for opt in request ] @@ -402,6 +410,7 @@ def get_new_explanation( # noqa: D417 "throughput_improvement": throughput_improvement, "function_references": function_references, "codeflash_version": codeflash_version, + "call_sequence": self.get_next_sequence(), } logger.info("loading|Generating explanation") console.rule() @@ -564,6 +573,7 @@ def generate_regression_tests( # noqa: D417 "python_version": platform.python_version(), "codeflash_version": codeflash_version, "is_async": function_to_optimize.is_async, + "call_sequence": self.get_next_sequence(), } try: response = self.make_ai_service_request("/testgen", payload=payload, timeout=90) @@ -650,6 +660,7 @@ def get_optimization_review( "codeflash_version": codeflash_version, "calling_fn_details": calling_fn_details, "python_version": platform.python_version(), + "call_sequence": self.get_next_sequence(), } console.rule() try: diff --git a/codeflash/discovery/discover_unit_tests.py b/codeflash/discovery/discover_unit_tests.py index bc0e2fd67..587b972ee 100644 --- a/codeflash/discovery/discover_unit_tests.py +++ b/codeflash/discovery/discover_unit_tests.py @@ -751,6 +751,7 @@ def process_test_files( tests_cache = TestsCache(project_root_path) logger.info("!lsp|Discovering tests and processing unit tests") + console.rule() with test_files_progress_bar(total=len(file_to_test_map), description="Processing test files") as ( progress, task_id, diff --git a/codeflash/models/models.py b/codeflash/models/models.py index 1db09bc12..822ecffab 100644 --- a/codeflash/models/models.py +++ b/codeflash/models/models.py @@ -46,6 +46,7 @@ class AIServiceRefinerRequest: original_line_profiler_results: str optimized_line_profiler_results: str function_references: str | None = None + call_sequence: int | None = None class TestDiffScope(str, Enum): @@ -464,6 +465,7 @@ class OptimizedCandidate: optimization_id: str source: OptimizedCandidateSource parent_id: str | None = None + model: str | None = None # Which LLM model generated this candidate @dataclass(frozen=True) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 50399839d..7a0a5f510 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -46,8 +46,6 @@ COVERAGE_THRESHOLD, INDIVIDUAL_TESTCASE_TIMEOUT, MAX_REPAIRS_PER_TRACE, - N_CANDIDATES_EFFECTIVE, - N_CANDIDATES_LP_EFFECTIVE, N_TESTS_TO_GENERATE_EFFECTIVE, REFINE_ALL_THRESHOLD, REFINED_CANDIDATE_RANKING_WEIGHTS, @@ -146,6 +144,7 @@ def __init__( self.candidate_len = len(initial_candidates) self.ai_service_client = ai_service_client self.executor = executor + self.refinement_calls_count = 0 # Initialize queue with initial candidates for candidate in initial_candidates: @@ -155,6 +154,9 @@ def __init__( self.all_refinements_data = all_refinements_data self.future_all_code_repair = future_all_code_repair + def get_total_llm_calls(self) -> int: + return self.refinement_calls_count + def get_next_candidate(self) -> OptimizedCandidate | None: """Get the next candidate from the queue, handling async results as needed.""" try: @@ -193,10 +195,12 @@ def refine_optimizations(self, request: list[AIServiceRefinerRequest]) -> concur def _process_refinement_results(self) -> OptimizedCandidate | None: """Process refinement results and add to queue. We generate a weighted ranking based on the runtime and diff lines and select the best (round of 45%) of valid optimizations to be refined.""" future_refinements: list[concurrent.futures.Future] = [] + refinement_call_index = 0 if len(self.all_refinements_data) <= REFINE_ALL_THRESHOLD: for data in self.all_refinements_data: - future_refinements.append(self.refine_optimizations([data])) # noqa: PERF401 + refinement_call_index += 1 + future_refinements.append(self.refine_optimizations([data])) else: diff_lens_list = [] runtimes_list = [] @@ -215,9 +219,13 @@ def _process_refinement_results(self) -> OptimizedCandidate | None: top_indecies = sorted(score_dict, key=score_dict.get)[:top_n_candidates] for idx in top_indecies: + refinement_call_index += 1 data = self.all_refinements_data[idx] future_refinements.append(self.refine_optimizations([data])) + # Track total refinement calls made + self.refinement_calls_count = refinement_call_index + if future_refinements: logger.info("loading|Refining generated code for improved quality and performance...") @@ -237,6 +245,7 @@ def _process_refinement_results(self) -> OptimizedCandidate | None: logger.info( f"Added {len(refinement_response)} candidates from refinement, total candidates now: {self.candidate_len}" ) + console.rule() self.refinement_done = True return self.get_next_candidate() @@ -322,7 +331,7 @@ def __init__( def can_be_optimized(self) -> Result[tuple[bool, CodeOptimizationContext, dict[Path, str]], str]: should_run_experiment = self.experiment_id is not None - logger.debug(f"Function Trace ID: {self.function_trace_id}") + logger.info(f"Function Trace ID: {self.function_trace_id}") ph("cli-optimize-function-start", {"function_trace_id": self.function_trace_id}) self.cleanup_leftover_test_return_values() file_name_from_test_module_name.cache_clear() @@ -927,7 +936,6 @@ def determine_best_candidate( dependency_code=code_context.read_only_context_code, trace_id=self.get_trace_id(exp_type), line_profiler_results=original_code_baseline.line_profile_results["str_out"], - num_candidates=N_CANDIDATES_LP_EFFECTIVE, experiment_metadata=ExperimentMetadata( id=self.experiment_id, group="control" if exp_type == "EXP0" else "experiment" ) @@ -1206,7 +1214,6 @@ def instrument_existing_tests(self, function_to_all_tests: dict[str, set[Functio func_qualname = self.function_to_optimize.qualified_name_with_modules_from_root(self.project_root) if func_qualname not in function_to_all_tests: logger.info(f"Did not find any pre-existing tests for '{func_qualname}', will only use generated tests.") - console.rule() else: test_file_invocation_positions = defaultdict(list) for tests_in_file in function_to_all_tests.get(func_qualname): @@ -1346,7 +1353,8 @@ def generate_tests( if concolic_test_str: count_tests += 1 - logger.info(f"!lsp|Generated '{count_tests}' tests for '{self.function_to_optimize.function_name}'") + logger.info(f"!lsp|Generated {count_tests} tests for '{self.function_to_optimize.function_name}'") + console.rule() generated_tests = GeneratedTestsList(generated_tests=tests) return Success((count_tests, generated_tests, function_to_concolic_tests, concolic_test_str)) @@ -1357,15 +1365,12 @@ def generate_optimizations( read_only_context_code: str, run_experiment: bool = False, # noqa: FBT001, FBT002 ) -> Result[tuple[OptimizationSet, str], str]: - """Generate optimization candidates for the function.""" - n_candidates = N_CANDIDATES_EFFECTIVE - + """Generate optimization candidates for the function. Backend handles multi-model diversity.""" future_optimization_candidates = self.executor.submit( self.aiservice_client.optimize_python_code, read_writable_code.markdown, read_only_context_code, self.function_trace_id[:-4] + "EXP0" if run_experiment else self.function_trace_id, - n_candidates, ExperimentMetadata(id=self.experiment_id, group="control") if run_experiment else None, is_async=self.function_to_optimize.is_async, ) @@ -1388,7 +1393,6 @@ def generate_optimizations( read_writable_code.markdown, read_only_context_code, self.function_trace_id[:-4] + "EXP1", - n_candidates, ExperimentMetadata(id=self.experiment_id, group="experiment"), is_async=self.function_to_optimize.is_async, ) @@ -1397,14 +1401,16 @@ def generate_optimizations( # Wait for optimization futures to complete concurrent.futures.wait(futures) - # Retrieve results - candidates: list[OptimizedCandidate] = future_optimization_candidates.result() - logger.info(f"!lsp|Generated '{len(candidates)}' candidate optimizations.") + # Retrieve results - optimize_python_code returns list of candidates + candidates = future_optimization_candidates.result() if not candidates: return Failure(f"/!\\ NO OPTIMIZATIONS GENERATED for {self.function_to_optimize.function_name}") - candidates_experiment = future_candidates_exp.result() if future_candidates_exp else None + # Handle experiment results + candidates_experiment = None + if future_candidates_exp: + candidates_experiment = future_candidates_exp.result() function_references = future_references.result() return Success((OptimizationSet(control=candidates, experiment=candidates_experiment), function_references)) @@ -2024,6 +2030,7 @@ def run_optimized_candidate( return self.get_results_not_matched_error() logger.info(f"loading|Running performance tests for candidate {optimization_candidate_index}...") + console.rule() # For async functions, instrument at definition site for performance benchmarking if self.function_to_optimize.is_async: