From 297b856e57a9d228d7853153ffd65c7a8ccae19d Mon Sep 17 00:00:00 2001 From: AlexandreSinger Date: Sat, 16 Aug 2025 10:31:21 -0400 Subject: [PATCH 1/9] [AP][Solver][3D] 3D AP Solver Support Upgraded the solver in the AP flow to support a "Z" dimension representing the layer. This code only kicks on when the architecture has more than one layer, since in the single layer case there is no point to compute the Z dimension. --- .../analytical_place/analytical_solver.cpp | 191 +++++++++++++----- vpr/src/analytical_place/analytical_solver.h | 58 +++++- vpr/src/place/initial_placement.cpp | 8 +- .../strong_ap/basic_3d_ap/config/config.txt | 7 + .../basic_3d_ap/config/golden_results.txt | 62 +++--- .../constraints/mm9a_io_constraint.xml | 70 +++++++ 6 files changed, 314 insertions(+), 82 deletions(-) create mode 100644 vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap/constraints/mm9a_io_constraint.xml diff --git a/vpr/src/analytical_place/analytical_solver.cpp b/vpr/src/analytical_place/analytical_solver.cpp index 93b1766aeda..eb9095b40d9 100644 --- a/vpr/src/analytical_place/analytical_solver.cpp +++ b/vpr/src/analytical_place/analytical_solver.cpp @@ -594,8 +594,15 @@ void B2BSolver::solve(unsigned iteration, PartialPlacement& p_placement) { p_placement.block_x_locs[blk_id] = device_grid_width_ / 2.0; p_placement.block_y_locs[blk_id] = device_grid_height_ / 2.0; } + block_x_locs_solved = p_placement.block_x_locs; block_y_locs_solved = p_placement.block_y_locs; + if (is_multi_die()) { + std::fill(p_placement.block_layer_nums.begin(), + p_placement.block_layer_nums.end(), + device_grid_num_layers_ / 2.0); + block_z_locs_solved = p_placement.block_layer_nums; + } return; } @@ -615,10 +622,16 @@ void B2BSolver::solve(unsigned iteration, PartialPlacement& p_placement) { // Save the legalized solution; we need it for the anchors. block_x_locs_legalized = p_placement.block_x_locs; block_y_locs_legalized = p_placement.block_y_locs; + if (is_multi_die()) { + block_z_locs_legalized = p_placement.block_layer_nums; + } // Store last solved position into p_placement for b2b model p_placement.block_x_locs = block_x_locs_solved; p_placement.block_y_locs = block_y_locs_solved; + if (is_multi_die()) { + p_placement.block_layer_nums = block_z_locs_solved; + } } // Run the B2B solver using p_placement as a starting point. @@ -627,27 +640,36 @@ void B2BSolver::solve(unsigned iteration, PartialPlacement& p_placement) { // Store the solved solutions for the next iteration. block_x_locs_solved = p_placement.block_x_locs; block_y_locs_solved = p_placement.block_y_locs; + if (is_multi_die()) { + block_z_locs_solved = p_placement.block_layer_nums; + } } void B2BSolver::initialize_placement_least_dense(PartialPlacement& p_placement) { // Find a gap for the blocks such that each block can fit onto the device // if they were evenly spaced by this gap. - double gap = std::sqrt(device_grid_height_ * device_grid_width_ / static_cast(num_moveable_blocks_)); + size_t num_tiles_per_layer = device_grid_width_ * device_grid_height_; + VTR_ASSERT_SAFE(device_grid_num_layers_ > 0); + unsigned num_blocks_per_layer = num_moveable_blocks_ / device_grid_num_layers_; + double gap = std::sqrt(num_tiles_per_layer / static_cast(num_blocks_per_layer)); // Assuming this gap, get how many columns/rows of blocks there will be. size_t cols = std::ceil(device_grid_width_ / gap); size_t rows = std::ceil(device_grid_height_ / gap); // Spread the blocks at these grid coordinates. - for (size_t r = 0; r <= rows; r++) { - for (size_t c = 0; c <= cols; c++) { - size_t i = r * cols + c; - if (i >= num_moveable_blocks_) - break; - APRowId row_id = APRowId(i); - APBlockId blk_id = row_id_to_blk_id_[row_id]; - p_placement.block_x_locs[blk_id] = c * gap; - p_placement.block_y_locs[blk_id] = r * gap; + for (size_t d = 0; d < device_grid_num_layers_; d++) { + for (size_t r = 0; r <= rows; r++) { + for (size_t c = 0; c <= cols; c++) { + size_t i = r * cols + c; + if (i >= num_moveable_blocks_) + break; + APRowId row_id = APRowId(i); + APBlockId blk_id = row_id_to_blk_id_[row_id]; + p_placement.block_x_locs[blk_id] = c * gap; + p_placement.block_y_locs[blk_id] = r * gap; + p_placement.block_layer_nums[blk_id] = d; + } } } @@ -656,6 +678,9 @@ void B2BSolver::initialize_placement_least_dense(PartialPlacement& p_placement) for (APBlockId blk_id : disconnected_blocks_) { p_placement.block_x_locs[blk_id] = device_grid_width_ / 2.0; p_placement.block_y_locs[blk_id] = device_grid_height_ / 2.0; + if (is_multi_die()) { + p_placement.block_layer_nums[blk_id] = device_grid_num_layers_ / 2.0; + } } } @@ -664,11 +689,15 @@ void B2BSolver::b2b_solve_loop(unsigned iteration, PartialPlacement& p_placement // A good guess for B2B is the last solved solution. Eigen::VectorXd x_guess(num_moveable_blocks_); Eigen::VectorXd y_guess(num_moveable_blocks_); + Eigen::VectorXd z_guess(num_moveable_blocks_); for (size_t row_id_idx = 0; row_id_idx < num_moveable_blocks_; row_id_idx++) { APRowId row_id = APRowId(row_id_idx); APBlockId blk_id = row_id_to_blk_id_[row_id]; x_guess(row_id_idx) = p_placement.block_x_locs[blk_id]; y_guess(row_id_idx) = p_placement.block_y_locs[blk_id]; + if (is_multi_die()) { + z_guess(row_id_idx) = p_placement.block_layer_nums[blk_id]; + } } // Create a timer to keep track of how long each part of the solver take. @@ -706,30 +735,20 @@ void B2BSolver::b2b_solve_loop(unsigned iteration, PartialPlacement& p_placement // Note: Since we have two different connectivity matrices, we need to // different CG solver objects. float solve_linear_system_start_time = runtime_timer.elapsed_sec(); - Eigen::VectorXd x, y; - Eigen::ConjugateGradient, Eigen::Lower | Eigen::Upper> cg_x; - Eigen::ConjugateGradient, Eigen::Lower | Eigen::Upper> cg_y; - cg_x.compute(A_sparse_x); - cg_y.compute(A_sparse_y); - VTR_ASSERT_SAFE_MSG(cg_x.info() == Eigen::Success, "Conjugate Gradient failed at compute for A_x!"); - VTR_ASSERT_SAFE_MSG(cg_y.info() == Eigen::Success, "Conjugate Gradient failed at compute for A_y!"); - cg_x.setMaxIterations(max_cg_iterations_); - cg_y.setMaxIterations(max_cg_iterations_); - - // Solve the x dimension. - x = cg_x.solveWithGuess(b_x, x_guess); - total_num_cg_iters_ += cg_x.iterations(); - VTR_LOGV(log_verbosity_ >= 20, "\t\tNum CG-x iter: %zu\n", cg_x.iterations()); - - // Solve the y dimension. - y = cg_y.solveWithGuess(b_y, y_guess); - total_num_cg_iters_ += cg_y.iterations(); - VTR_LOGV(log_verbosity_ >= 20, "\t\tNum CG-y iter: %zu\n", cg_y.iterations()); - + Eigen::VectorXd x = solve_linear_system(A_sparse_x, b_x, x_guess); + Eigen::VectorXd y = solve_linear_system(A_sparse_y, b_y, y_guess); + Eigen::VectorXd z; + if (is_multi_die()) { + z = solve_linear_system(A_sparse_z, b_z, z_guess); + } total_time_spent_solving_linear_system_ += runtime_timer.elapsed_sec() - solve_linear_system_start_time; // Save the result into the partial placement object. - store_solution_into_placement(x, y, p_placement); + store_solution_into_placement(x, p_placement.block_x_locs, device_grid_width_); + store_solution_into_placement(y, p_placement.block_y_locs, device_grid_height_); + if (is_multi_die()) { + store_solution_into_placement(z, p_placement.block_layer_nums, device_grid_num_layers_); + } // If the current HPWL is larger than the previous HPWL (i.e. the HPWL // got worst since last B2B iter) or the gap between the two solutions @@ -751,6 +770,9 @@ void B2BSolver::b2b_solve_loop(unsigned iteration, PartialPlacement& p_placement // Update the guesses with the most recent answer x_guess = x; y_guess = y; + if (is_multi_die()) { + z_guess = z; + } } // Disconnected blocks are not optimized by the solver. @@ -761,6 +783,9 @@ void B2BSolver::b2b_solve_loop(unsigned iteration, PartialPlacement& p_placement for (APBlockId blk_id : disconnected_blocks_) { p_placement.block_x_locs[blk_id] = device_grid_width_ / 2.0; p_placement.block_y_locs[blk_id] = device_grid_height_ / 2.0; + if (is_multi_die()) { + p_placement.block_layer_nums[blk_id] = device_grid_num_layers_ / 2.0; + } } } else { // If a legalized solution is available (after the first iteration of GP), then @@ -768,10 +793,32 @@ void B2BSolver::b2b_solve_loop(unsigned iteration, PartialPlacement& p_placement for (APBlockId blk_id : disconnected_blocks_) { p_placement.block_x_locs[blk_id] = block_x_locs_legalized[blk_id]; p_placement.block_y_locs[blk_id] = block_y_locs_legalized[blk_id]; + if (is_multi_die()) { + p_placement.block_layer_nums[blk_id] = block_z_locs_legalized[blk_id]; + } } } } +Eigen::VectorXd B2BSolver::solve_linear_system(Eigen::SparseMatrix &A, + Eigen::VectorXd &b, + Eigen::VectorXd &guess) { + // Set up the system of equation solver. + Eigen::ConjugateGradient, Eigen::Lower | Eigen::Upper> cg; + cg.compute(A); + VTR_ASSERT_SAFE_MSG(cg.info() == Eigen::Success, "Conjugate Gradient failed at compute!"); + + // Solve. + cg.setMaxIterations(max_cg_iterations_); + Eigen::VectorXd solution = cg.solveWithGuess(b, guess); + + // Collect some metrics. + total_num_cg_iters_ += cg.iterations(); + VTR_LOGV(log_verbosity_ >= 20, "\t\tNum CG iter: %zu\n", cg.iterations()); + + return solution; +} + namespace { /** * @brief Struct used to hold the bounding blocks of an AP net. @@ -785,6 +832,10 @@ struct APNetBounds { APBlockId min_y_blk; /// @brief The top-most block in the net. APBlockId max_y_blk; + /// @brief The lower-most block in the net (lowest-layer). + APBlockId min_z_blk; + /// @brief The upper-most block in the net (upper-most-layer). + APBlockId max_z_blk; }; } // namespace @@ -809,12 +860,15 @@ static inline APNetBounds get_unique_net_bounds(APNetId net_id, double min_x_pos = std::numeric_limits::max(); double max_y_pos = std::numeric_limits::lowest(); double min_y_pos = std::numeric_limits::max(); + double max_z_pos = std::numeric_limits::lowest(); + double min_z_pos = std::numeric_limits::max(); for (APPinId pin_id : netlist.net_pins(net_id)) { // Update the bounds based on the position of the block that has this pin. APBlockId blk_id = netlist.pin_block(pin_id); double x_pos = p_placement.block_x_locs[blk_id]; double y_pos = p_placement.block_y_locs[blk_id]; + double z_pos = p_placement.block_layer_nums[blk_id]; if (x_pos < min_x_pos) { min_x_pos = x_pos; bounds.min_x_blk = blk_id; @@ -823,6 +877,10 @@ static inline APNetBounds get_unique_net_bounds(APNetId net_id, min_y_pos = y_pos; bounds.min_y_blk = blk_id; } + if (z_pos < min_z_pos) { + min_z_pos = z_pos; + bounds.min_z_blk = blk_id; + } if (x_pos > max_x_pos) { max_x_pos = x_pos; bounds.max_x_blk = blk_id; @@ -831,6 +889,10 @@ static inline APNetBounds get_unique_net_bounds(APNetId net_id, max_y_pos = y_pos; bounds.max_y_blk = blk_id; } + if (z_pos > max_z_pos) { + max_z_pos = z_pos; + bounds.max_z_blk = blk_id; + } // In the case of a tie, we do not want to have the same blocks as bounds. // If there is a tie for the max position, and the current min bound is @@ -843,6 +905,10 @@ static inline APNetBounds get_unique_net_bounds(APNetId net_id, max_y_pos = y_pos; bounds.max_y_blk = blk_id; } + if (z_pos == max_z_pos && bounds.min_z_blk != blk_id) { + max_z_pos = z_pos; + bounds.max_z_blk = blk_id; + } } // Ensure the same block is set as the bounds. @@ -851,6 +917,7 @@ static inline APNetBounds get_unique_net_bounds(APNetId net_id, // context. VTR_ASSERT_SAFE(bounds.min_x_blk != bounds.max_x_blk); VTR_ASSERT_SAFE(bounds.min_y_blk != bounds.max_y_blk); + VTR_ASSERT_SAFE(bounds.min_z_blk != bounds.max_z_blk); return bounds; } @@ -913,8 +980,8 @@ std::pair B2BSolver::get_delay_derivative(APBlockId driver_blk, // TODO: Handle 3D FPGAs for this method. int layer_num = 0; - VTR_ASSERT_SAFE_MSG(p_placement.block_layer_nums[driver_blk] == layer_num && p_placement.block_layer_nums[sink_blk] == layer_num, - "3D FPGAs not supported yet in the B2B solver"); + VTR_ASSERT_SAFE_MSG(!is_multi_die(), + "Timing-driven AP does not support 3D FPGAs yet"); // Special case: If the distance between the driver and sink is as large as // the device, we cannot take the forward difference (since it will go off @@ -1047,6 +1114,10 @@ std::pair B2BSolver::get_delay_normalization_facs(APBlockId driv // tile. This should be able to remove the units without changing the value // too much. + // TODO: Handle multi-die for this function. + VTR_ASSERT_SAFE_MSG(!is_multi_die(), + "Timing-driven AP does not support 3D FPGAs yet"); + // Similar to calculating the derivative, we want to use the legalized position // of the driver block to try and estimate the delay from that block type. t_physical_tile_loc driver_block_loc(block_x_locs_legalized[driver_blk], @@ -1086,6 +1157,10 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement, unsigned itera A_sparse_y = Eigen::SparseMatrix(num_moveable_blocks_, num_moveable_blocks_); b_x = Eigen::VectorXd::Zero(num_moveable_blocks_); b_y = Eigen::VectorXd::Zero(num_moveable_blocks_); + if (is_multi_die()) { + A_sparse_z = Eigen::SparseMatrix(num_moveable_blocks_, num_moveable_blocks_); + b_z = Eigen::VectorXd::Zero(num_moveable_blocks_); + } // Create triplet lists to store the sparse positions to update and reserve // space for them. @@ -1094,6 +1169,10 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement, unsigned itera triplet_list_x.reserve(total_num_pins_in_netlist); std::vector> triplet_list_y; triplet_list_y.reserve(total_num_pins_in_netlist); + std::vector> triplet_list_z; + if (is_multi_die()) { + triplet_list_z.resize(total_num_pins_in_netlist); + } for (APNetId net_id : netlist_.nets()) { if (netlist_.net_is_ignored(net_id)) @@ -1125,12 +1204,26 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement, unsigned itera add_connection_to_system(blk_id, net_bounds.max_y_blk, num_pins, wl_net_w, p_placement.block_y_locs, triplet_list_y, b_y); add_connection_to_system(blk_id, net_bounds.min_y_blk, num_pins, wl_net_w, p_placement.block_y_locs, triplet_list_y, b_y); } + if (is_multi_die() && blk_id != net_bounds.max_z_blk && blk_id != net_bounds.min_z_blk) { + // For multi-die FPGAs, we apply extra weight in the layer dimension + // since moving between layers tends to cost more wiring than moving + // within the same layer. + double multidie_net_w = wl_net_w * layer_distance_cost_fac_; + add_connection_to_system(blk_id, net_bounds.max_z_blk, num_pins, multidie_net_w, p_placement.block_layer_nums, triplet_list_z, b_z); + add_connection_to_system(blk_id, net_bounds.min_z_blk, num_pins, multidie_net_w, p_placement.block_layer_nums, triplet_list_z, b_z); + } } // Connect the bounds to each other. Its just easier to put these here // instead of in the for loop above. add_connection_to_system(net_bounds.max_x_blk, net_bounds.min_x_blk, num_pins, wl_net_w, p_placement.block_x_locs, triplet_list_x, b_x); add_connection_to_system(net_bounds.max_y_blk, net_bounds.min_y_blk, num_pins, wl_net_w, p_placement.block_y_locs, triplet_list_y, b_y); + if (is_multi_die()) { + // See comment above. For multi-die FPGAs, we apply an extra factor + // to the cost. + double multidie_net_w = wl_net_w * layer_distance_cost_fac_; + add_connection_to_system(net_bounds.max_z_blk, net_bounds.min_z_blk, num_pins, multidie_net_w, p_placement.block_layer_nums, triplet_list_z, b_z); + } // ==================================================================== // Timing Connections @@ -1140,6 +1233,8 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement, unsigned itera // positions to compute the delay derivative, which do not exist until // the next iteration. Its fine to do one wirelength driven iteration first. if (pre_cluster_timing_manager_.is_valid() && iteration != 0) { + VTR_ASSERT_SAFE_MSG(!is_multi_die(), + "Timing-driven AP does not support 3D FPGAs yet"); // Create connections from each driver pin to each of it's sink pins. // This will incentivize shrinking the distance from drivers to sinks // of connections which would improve the timing. @@ -1201,6 +1296,9 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement, unsigned itera // Build the sparse connectivity matrices from the triplets. A_sparse_x.setFromTriplets(triplet_list_x.begin(), triplet_list_x.end()); A_sparse_y.setFromTriplets(triplet_list_y.begin(), triplet_list_y.end()); + if (is_multi_die()) { + A_sparse_z.setFromTriplets(triplet_list_z.begin(), triplet_list_z.end()); + } } // This function adds anchors for legalized solution. Anchors are treated as fixed node, @@ -1223,12 +1321,18 @@ void B2BSolver::update_linear_system_with_anchors(unsigned iteration) { A_sparse_y.coeffRef(row_id_idx, row_id_idx) += pseudo_w_y; b_x(row_id_idx) += pseudo_w_x * block_x_locs_legalized[blk_id]; b_y(row_id_idx) += pseudo_w_y * block_y_locs_legalized[blk_id]; + + if (is_multi_die()) { + double pseudo_w_z = coeff_pseudo_anchor * 2.0; + A_sparse_z.coeffRef(row_id_idx, row_id_idx) += pseudo_w_z; + b_z(row_id_idx) += pseudo_w_z * block_z_locs_legalized[blk_id]; + } } } -void B2BSolver::store_solution_into_placement(Eigen::VectorXd& x_soln, - Eigen::VectorXd& y_soln, - PartialPlacement& p_placement) { +void B2BSolver::store_solution_into_placement(Eigen::VectorXd &dim_soln, + vtr::vector &block_dim_locs, + double dim_max_pos) { for (size_t row_id_idx = 0; row_id_idx < num_moveable_blocks_; row_id_idx++) { // Since we are capping the number of iterations, the solver may not // have enough time to converge on a solution that is on the device. @@ -1240,19 +1344,14 @@ void B2BSolver::store_solution_into_placement(Eigen::VectorXd& x_soln, // TODO: Should handle this better. If the solution is very negative // it may indicate a bug. double epsilon = 0.0001; - if (x_soln[row_id_idx] < epsilon) - x_soln[row_id_idx] = epsilon; - if (x_soln[row_id_idx] >= device_grid_width_) - x_soln[row_id_idx] = device_grid_width_ - epsilon; - if (y_soln[row_id_idx] < epsilon) - y_soln[row_id_idx] = epsilon; - if (y_soln[row_id_idx] >= device_grid_height_) - y_soln[row_id_idx] = device_grid_height_ - epsilon; + if (dim_soln[row_id_idx] < epsilon) + dim_soln[row_id_idx] = epsilon; + if (dim_soln[row_id_idx] >= dim_max_pos) + dim_soln[row_id_idx] = dim_max_pos - epsilon; APRowId row_id = APRowId(row_id_idx); APBlockId blk_id = row_id_to_blk_id_[row_id]; - p_placement.block_x_locs[blk_id] = x_soln[row_id_idx]; - p_placement.block_y_locs[blk_id] = y_soln[row_id_idx]; + block_dim_locs[blk_id] = dim_soln[row_id_idx]; } } diff --git a/vpr/src/analytical_place/analytical_solver.h b/vpr/src/analytical_place/analytical_solver.h index faf5a9c2362..e3662c1e261 100644 --- a/vpr/src/analytical_place/analytical_solver.h +++ b/vpr/src/analytical_place/analytical_solver.h @@ -531,6 +531,16 @@ class B2BSolver : public AnalyticalSolver { /// number, the solver will focus more on timing and less on wirelength. static constexpr double timing_slope_fac_ = 0.75; + /// @brief For most FPGA architectures, the cost of moving horizontally is + /// equivalent to the cost moving vertically (i.e. moving in increasing + /// x-dimension has the same cost as moving the same amount in the + /// y-dimension). However, for 3D FPGAs, moving between layers is + /// much more expensive than moving in the x or y dimension. We account + /// for this by adding a cost penalty factor to the "z"-dimension. + /// TODO: This cost factor was randomly selected because it felt ok. Should + /// choose a better factor that is chosen empirically. + static constexpr double layer_distance_cost_fac_ = 10.0; + public: B2BSolver(const APNetlist& ap_netlist, const DeviceGrid& device_grid, @@ -699,15 +709,41 @@ class B2BSolver : public AnalyticalSolver { void update_linear_system_with_anchors(unsigned iteration); /** - * @brief Store the x and y solutions in Eigen's vectors into the partial - * placement object. - * - * Note: The x_soln and y_soln may be modified if it is found that the - * solution is imposible (i.e. has negative positions). + * @brief Solves the linear system of equations using the connectivity + * matrix (A), the constant vector (b), and a guess for the solution. */ - void store_solution_into_placement(Eigen::VectorXd& x_soln, - Eigen::VectorXd& y_soln, - PartialPlacement& p_placement); + Eigen::VectorXd solve_linear_system(Eigen::SparseMatrix &A, + Eigen::VectorXd &b, + Eigen::VectorXd &guess); + + /** + * @brief Store the solutions from the linear system into the partial + * placement object for the given dimension. + * + * Note: The dim_soln may be modified if it is found that the solution is + * imposible (e.g. has negative positions). + * + * @param dim_soln + * The solution of the linear system for a given dimension. + * @param block_dim_locs + * The block locations in the partial placement for the dimension. + * @param dim_max_pos + * The maximum position allowed for the dimension. For example, for the + * x-dimension, this would be the width of the device. This is used to + * ensure that the positions do not go off device. + */ + void store_solution_into_placement(Eigen::VectorXd &dim_soln, + vtr::vector &block_dim_locs, + double dim_max_pos); + + /** + * @brief Does the FPGA that the AP flow is currently targeting have more + * than one die. Having multiple dies would imply that the solver + * needs to add another dimension to solve for. + */ + inline bool is_multi_die() const { + return device_grid_num_layers_ > 1; + } // The following are variables used to store the system of equations to be // solved in the x and y dimensions. The equations are of the form: @@ -720,22 +756,28 @@ class B2BSolver : public AnalyticalSolver { Eigen::SparseMatrix A_sparse_x; /// @brief The coefficient / connectivity matrix for the y dimension. Eigen::SparseMatrix A_sparse_y; + /// @brief The coefficient / connectivity matrix for the z dimension (layer dimension). + Eigen::SparseMatrix A_sparse_z; /// @brief The constant vector in the x dimension. Eigen::VectorXd b_x; /// @brief The constant vector in the y dimension. Eigen::VectorXd b_y; + /// @brief The constant vector in the z dimension (layer dimension). + Eigen::VectorXd b_z; // The following is the solution of the previous iteration of this solver. // They are updated at the end of solve() and are used as the starting point // for the next call to solve. vtr::vector block_x_locs_solved; vtr::vector block_y_locs_solved; + vtr::vector block_z_locs_solved; // The following are the legalized solution coming into the analytical solver // (other than the first iteration). These are stored to be used as anchor // blocks during the solver. vtr::vector block_x_locs_legalized; vtr::vector block_y_locs_legalized; + vtr::vector block_z_locs_legalized; /// @brief The total number of CG iterations that this solver has performed /// so far. This can be a useful metric for the amount of work the diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp index 0bacfcac783..7b25359f526 100644 --- a/vpr/src/place/initial_placement.cpp +++ b/vpr/src/place/initial_placement.cpp @@ -653,8 +653,6 @@ static t_flat_pl_loc find_centroid_loc_from_flat_placement(const t_pl_macro& pl_ // and save the closest of all regions. t_flat_pl_loc best_projected_pos = centroid; float best_distance = std::numeric_limits::max(); - VTR_ASSERT_MSG(centroid.layer == 0, - "3D FPGAs not supported for this part of the code yet"); for (const Region& region : head_pr.get_regions()) { const vtr::Rect& rect = region.get_rect(); // Note: We add 0.999 here since the partition region is in grid @@ -662,12 +660,16 @@ static t_flat_pl_loc find_centroid_loc_from_flat_placement(const t_pl_macro& pl_ // they really are 1x1. float proj_x = std::clamp(centroid.x, rect.xmin(), rect.xmax() + 0.999); float proj_y = std::clamp(centroid.y, rect.ymin(), rect.ymax() + 0.999); + float proj_layer = std::clamp(centroid.layer, region.get_layer_range().first, + region.get_layer_range().second + 0.999); float dx = std::abs(proj_x - centroid.x); float dy = std::abs(proj_y - centroid.y); - float dist = dx + dy; + float dlayer = std::abs(proj_layer - centroid.layer); + float dist = dx + dy + dlayer; if (dist < best_distance) { best_projected_pos.x = proj_x; best_projected_pos.y = proj_y; + best_projected_pos.layer = proj_layer; best_distance = dist; } } diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap/config/config.txt index 80d8b5b770b..276f206608b 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap/config/config.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap/config/config.txt @@ -27,6 +27,11 @@ circuit_list_add=mm9b.blif circuit_list_add=styr.blif circuit_list_add=s953.blif +# Constrain the IOs +# TODO: Should create a unique config file that tests fixed blocks for 3D AP. +# - For now, just add one so we can test the solver effectively. +circuit_constraint_list_add=(mm9a.blif, constraints=../../../../constraints/mm9a_io_constraint.xml) + # Parse info and how to parse parse_file=vpr_fixed_chan_width.txt @@ -42,4 +47,6 @@ script_params_common=-starting_stage vpr -track_memory_usage --analytical_place script_params_list_add=--ap_analytical_solver identity --ap_partial_legalizer none # Force unrelated clustering on. script_params_list_add=--ap_analytical_solver identity --ap_partial_legalizer none --allow_unrelated_clustering on +# Test that the solver will work with 3D +script_params_list_add=--ap_partial_legalizer none --allow_unrelated_clustering on diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap/config/golden_results.txt index d9874bc4dfb..382609acdf0 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap/config/golden_results.txt @@ -1,25 +1,37 @@ -arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time initial_placed_CPD_est placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time -3d_k4_N4_90nm.xml s820.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.41 vpr 67.45 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 35 19 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69068 19 19 136 0 1 99 73 10 10 200 -1 FPGA3D -1 -1 871.641 751 681 57 398 226 67.4 MiB 0.12 0.00 3.372 3.04604 -44.3501 -3.04604 3.04604 0.00 0.000256444 0.000213235 0.00238126 0.00211997 67.4 MiB 0.12 67.4 MiB 0.09 844 8.61224 844 8.61224 475 1406 401495 170476 142676 78026.2 975514. 4877.57 10 30528 128848 -1 3.15206 3.15206 -50.6277 -3.15206 0 0 0.17 -1 -1 67.4 MiB 0.07 0.0125619 0.0113107 28.5 MiB -1 0.02 -3d_k4_N4_90nm.xml s820.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.44 vpr 67.45 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 34 19 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69064 19 19 136 0 1 99 72 10 10 200 -1 FPGA3D -1 -1 879.713 754 817 82 480 255 67.4 MiB 0.12 0.00 3.372 3.00456 -44.8378 -3.00456 3.00456 0.00 0.000283054 0.000240054 0.00277318 0.002473 67.4 MiB 0.12 67.4 MiB 0.09 843 8.60204 843 8.60204 462 1338 564593 271588 142676 75796.9 975514. 4877.57 11 30528 128848 -1 3.1757 3.1757 -50.7348 -3.1757 0 0 0.17 -1 -1 67.4 MiB 0.10 0.0131351 0.0117164 28.5 MiB -1 0.02 -3d_k4_N4_90nm.xml s838.1.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.37 vpr 67.06 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 31 35 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68668 35 1 129 0 1 108 67 10 10 200 -1 FPGA3D -1 -1 829.372 592 1699 155 1001 543 67.1 MiB 0.13 0.00 5.56099 4.9431 -98.8807 -4.9431 4.9431 0.00 0.000260731 0.00022283 0.00434779 0.00379044 67.1 MiB 0.13 67.1 MiB 0.09 647 6.04673 647 6.04673 420 831 175142 62634 142676 69108.9 975514. 4877.57 7 30528 128848 -1 5.41618 5.41618 -101.368 -5.41618 0 0 0.17 -1 -1 67.1 MiB 0.03 0.0119957 0.0106859 28.4 MiB -1 0.02 -3d_k4_N4_90nm.xml s838.1.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.39 vpr 67.20 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 30 35 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68816 35 1 129 0 1 108 66 10 10 200 -1 FPGA3D -1 -1 831.372 588 1396 138 801 457 67.2 MiB 0.12 0.00 5.56099 4.96986 -98.2942 -4.96986 4.96986 0.00 0.000285683 0.000248542 0.00394139 0.0034509 67.2 MiB 0.12 67.2 MiB 0.09 635 5.93458 635 5.93458 433 911 201419 73665 142676 66879.6 975514. 4877.57 13 30528 128848 -1 5.04154 5.04154 -98.9333 -5.04154 0 0 0.18 -1 -1 67.2 MiB 0.04 0.0145015 0.0127822 28.4 MiB -1 0.02 -3d_k4_N4_90nm.xml bw.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.43 vpr 67.57 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 37 5 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69196 5 28 137 0 0 95 70 10 10 200 -1 FPGA3D -1 -1 811.663 751 358 32 170 156 67.6 MiB 0.12 0.00 3.83992 3.52385 -67.1587 -3.52385 nan 0.00 0.000261393 0.000216879 0.00197599 0.00179054 67.6 MiB 0.12 67.6 MiB 0.09 791 8.32632 791 8.32632 422 999 445501 228266 142676 82484.8 975514. 4877.57 8 30528 128848 -1 3.59016 nan -75.0344 -3.59016 0 0 0.17 -1 -1 67.6 MiB 0.08 0.0113797 0.0102712 28.6 MiB -1 0.02 -3d_k4_N4_90nm.xml bw.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.43 vpr 67.03 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 36 5 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68640 5 28 137 0 0 95 69 10 10 200 -1 FPGA3D -1 -1 815.277 716 774 65 382 327 67.0 MiB 0.13 0.00 3.83992 2.81327 -62.7158 -2.81327 nan 0.00 0.000267235 0.000222035 0.00277126 0.00244752 67.0 MiB 0.13 67.0 MiB 0.10 771 8.11579 771 8.11579 396 963 453652 232363 142676 80255.5 975514. 4877.57 11 30528 128848 -1 3.21552 nan -71.8798 -3.21552 0 0 0.17 -1 -1 67.0 MiB 0.09 0.0136186 0.0121416 28.1 MiB -1 0.02 -3d_k4_N4_90nm.xml rd84.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.43 vpr 67.13 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 48 8 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68744 8 4 165 0 0 122 60 10 10 200 -1 FPGA3D -1 -1 1159.54 968 1464 166 1059 239 67.1 MiB 0.14 0.00 4.49381 3.68783 -12.7612 -3.68783 nan 0.00 0.00029022 0.000240743 0.00533876 0.00462497 67.1 MiB 0.14 67.1 MiB 0.10 1146 9.39344 1146 9.39344 694 2071 441536 150803 142676 107007 975514. 4877.57 12 30528 128848 -1 4.14058 nan -13.8699 -4.14058 0 0 0.17 -1 -1 67.1 MiB 0.08 0.0179728 0.0158994 28.1 MiB -1 0.02 -3d_k4_N4_90nm.xml rd84.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.44 vpr 67.46 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 48 8 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69084 8 4 165 0 0 122 60 10 10 200 -1 FPGA3D -1 -1 1159.54 968 1464 166 1059 239 67.5 MiB 0.14 0.00 4.49381 3.68783 -12.7612 -3.68783 nan 0.00 0.000307288 0.000256225 0.00559788 0.00484354 67.5 MiB 0.14 67.5 MiB 0.10 1146 9.39344 1146 9.39344 694 2071 441536 150803 142676 107007 975514. 4877.57 12 30528 128848 -1 4.14058 nan -13.8699 -4.14058 0 0 0.18 -1 -1 67.5 MiB 0.08 0.0184545 0.0163003 28.5 MiB -1 0.02 -3d_k4_N4_90nm.xml s832.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.42 vpr 67.20 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 40 19 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68812 19 19 161 0 1 113 78 10 10 200 -1 FPGA3D -1 -1 1035.88 805 1904 218 1090 596 67.2 MiB 0.13 0.00 3.56182 3.05406 -45.7438 -3.05406 3.05406 0.00 0.000305934 0.000255416 0.00470479 0.00409149 67.2 MiB 0.13 67.2 MiB 0.10 886 7.91071 886 7.91071 534 1697 391552 158224 142676 89172.8 975514. 4877.57 12 30528 128848 -1 3.2145 3.2145 -50.1138 -3.2145 0 0 0.17 -1 -1 67.2 MiB 0.07 0.0166514 0.0147489 28.1 MiB -1 0.02 -3d_k4_N4_90nm.xml s832.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.45 vpr 67.51 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 39 19 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69132 19 19 161 0 1 113 77 10 10 200 -1 FPGA3D -1 -1 1033.88 826 1544 157 892 495 67.5 MiB 0.13 0.00 3.56182 3.21504 -46.5131 -3.21504 3.21504 0.00 0.000316625 0.000267708 0.00430746 0.0037363 67.5 MiB 0.13 67.5 MiB 0.10 965 8.61607 965 8.61607 548 1651 574859 233525 142676 86943.5 975514. 4877.57 10 30528 128848 -1 3.27694 3.27694 -52.9059 -3.27694 0 0 0.17 -1 -1 67.5 MiB 0.10 0.0161037 0.0142779 28.5 MiB -1 0.02 -3d_k4_N4_90nm.xml mm9a.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.44 vpr 67.39 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 43 13 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69004 13 9 165 0 1 116 65 10 10 200 -1 FPGA3D -1 -1 1058.49 813 1755 173 1219 363 67.4 MiB 0.15 0.00 10.3774 8.83659 -159.14 -8.83659 8.83659 0.00 0.000385324 0.000335514 0.00680145 0.00598277 67.4 MiB 0.15 67.4 MiB 0.10 959 8.33913 959 8.33913 536 1578 390106 170128 142676 95860.8 975514. 4877.57 9 30528 128848 -1 9.76606 9.76606 -166.142 -9.76606 0 0 0.17 -1 -1 67.4 MiB 0.07 0.019374 0.0173704 28.4 MiB -1 0.02 -3d_k4_N4_90nm.xml mm9a.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.44 vpr 67.26 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 43 13 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68876 13 9 165 0 1 116 65 10 10 200 -1 FPGA3D -1 -1 1056.49 816 1495 134 1026 335 67.3 MiB 0.14 0.00 10.3774 8.88521 -154.293 -8.88521 8.88521 0.00 0.000344076 0.00029067 0.00593383 0.00518801 67.3 MiB 0.14 67.3 MiB 0.10 971 8.44348 971 8.44348 611 1928 403295 155537 142676 95860.8 975514. 4877.57 12 30528 128848 -1 9.01678 9.01678 -161.147 -9.01678 0 0 0.18 -1 -1 67.3 MiB 0.08 0.0194851 0.0172898 28.4 MiB -1 0.02 -3d_k4_N4_90nm.xml alu2.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.44 vpr 67.31 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 59 10 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68928 10 6 207 0 0 147 75 10 10 200 -1 FPGA3D -1 -1 1462.31 1206 1813 181 1251 381 67.3 MiB 0.15 0.00 7.15091 6.33663 -20.2299 -6.33663 nan 0.00 0.000370933 0.000310403 0.00618125 0.00539142 67.3 MiB 0.15 67.3 MiB 0.11 1358 9.23810 1358 9.23810 787 2247 365299 124151 142676 131530 975514. 4877.57 11 30528 128848 -1 6.64292 nan -21.9088 -6.64292 0 0 0.17 -1 -1 67.3 MiB 0.07 0.0214792 0.019147 28.4 MiB -1 0.02 -3d_k4_N4_90nm.xml alu2.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.46 vpr 67.41 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 58 10 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69032 10 6 207 0 0 147 74 10 10 200 -1 FPGA3D -1 -1 1464.8 1177 1779 184 1243 352 67.4 MiB 0.15 0.00 7.39668 6.29249 -21.2096 -6.29249 nan 0.00 0.000365545 0.000304455 0.00642313 0.00559458 67.4 MiB 0.15 67.4 MiB 0.11 1379 9.38095 1379 9.38095 817 2430 432630 150203 142676 129301 975514. 4877.57 14 30528 128848 -1 6.8716 nan -23.6985 -6.8716 0 0 0.17 -1 -1 67.4 MiB 0.08 0.0242575 0.0215874 28.5 MiB -1 0.02 -3d_k4_N4_90nm.xml x1.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.52 vpr 67.51 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 40 51 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69128 51 35 187 0 0 146 126 10 10 200 -1 FPGA3D -1 -1 1431.65 1102 2646 284 1001 1361 67.5 MiB 0.13 0.00 3.43373 2.37137 -53.5604 -2.37137 nan 0.00 0.000308386 0.000256935 0.00405197 0.00352503 67.5 MiB 0.13 67.5 MiB 0.10 1245 8.52740 1245 8.52740 741 2178 977720 428367 142676 89172.8 975514. 4877.57 13 30528 128848 -1 2.85242 nan -62.0834 -2.85242 0 0 0.18 -1 -1 67.5 MiB 0.16 0.0174491 0.0155081 28.6 MiB -1 0.02 -3d_k4_N4_90nm.xml x1.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.51 vpr 67.14 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 39 51 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68748 51 35 187 0 0 146 125 10 10 200 -1 FPGA3D -1 -1 1431.75 1138 3557 364 1478 1715 67.1 MiB 0.14 0.00 3.43373 2.34 -52.6482 -2.34 nan 0.00 0.000304644 0.00025537 0.00500613 0.00434258 67.1 MiB 0.14 67.1 MiB 0.10 1232 8.43836 1232 8.43836 757 2156 870465 396526 142676 86943.5 975514. 4877.57 12 30528 128848 -1 2.78998 nan -62.3332 -2.78998 0 0 0.18 -1 -1 67.1 MiB 0.15 0.0174681 0.0154681 28.4 MiB -1 0.02 -3d_k4_N4_90nm.xml t481.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.62 vpr 67.26 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 55 16 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68876 16 1 230 0 0 164 72 10 10 200 -1 FPGA3D -1 -1 1719.09 1423 1711 164 1284 263 67.3 MiB 0.23 0.00 5.26477 4.78086 -4.78086 -4.78086 nan 0.00 0.000371499 0.000308278 0.00660479 0.0057026 67.3 MiB 0.23 67.3 MiB 0.19 1908 11.6341 1908 11.6341 1096 4201 1095477 388933 142676 122613 975514. 4877.57 14 30528 128848 -1 5.1579 nan -5.1579 -5.1579 0 0 0.17 -1 -1 67.3 MiB 0.17 0.0253407 0.0224414 28.4 MiB -1 0.02 -3d_k4_N4_90nm.xml t481.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.62 vpr 67.30 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 55 16 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68912 16 1 230 0 0 164 72 10 10 200 -1 FPGA3D -1 -1 1719.09 1423 1711 164 1284 263 67.3 MiB 0.23 0.00 5.26477 4.78086 -4.78086 -4.78086 nan 0.00 0.000375177 0.000312127 0.00675769 0.00585125 67.3 MiB 0.23 67.3 MiB 0.19 1908 11.6341 1908 11.6341 1096 4201 1095477 388933 142676 122613 975514. 4877.57 14 30528 128848 -1 5.1579 nan -5.1579 -5.1579 0 0 0.17 -1 -1 67.3 MiB 0.16 0.0249796 0.0220727 28.4 MiB -1 0.02 -3d_k4_N4_90nm.xml mm9b.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.56 vpr 67.40 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 64 13 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69020 13 9 229 0 1 170 86 10 10 200 -1 FPGA3D -1 -1 1838.97 1301 2732 263 2032 437 67.4 MiB 0.23 0.00 11.9172 10.0937 -200.226 -10.0937 10.0937 0.00 0.000447109 0.000375185 0.00923795 0.00805715 67.4 MiB 0.23 67.4 MiB 0.18 1561 9.23669 1561 9.23669 945 3035 598090 221479 142676 142676 975514. 4877.57 11 30528 128848 -1 10.6349 10.6349 -208.117 -10.6349 0 0 0.17 -1 -1 67.4 MiB 0.11 0.027256 0.0241761 28.6 MiB -1 0.02 -3d_k4_N4_90nm.xml mm9b.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.51 vpr 67.52 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 64 13 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69144 13 9 229 0 1 170 86 10 10 200 -1 FPGA3D -1 -1 1838.97 1301 2732 263 2032 437 67.5 MiB 0.17 0.00 11.9172 10.0937 -200.226 -10.0937 10.0937 0.00 0.000491731 0.000415896 0.00979358 0.00856623 67.5 MiB 0.17 67.5 MiB 0.12 1561 9.23669 1561 9.23669 945 3035 598090 221479 142676 142676 975514. 4877.57 11 30528 128848 -1 10.6349 10.6349 -208.117 -10.6349 0 0 0.17 -1 -1 67.5 MiB 0.11 0.027452 0.0244516 28.5 MiB -1 0.02 -3d_k4_N4_90nm.xml styr.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.57 vpr 67.02 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 60 10 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68628 10 10 247 0 1 155 80 10 10 200 -1 FPGA3D -1 -1 1546.92 1190 2316 309 1659 348 67.0 MiB 0.23 0.00 3.91624 3.0935 -39.105 -3.0935 3.0935 0.00 0.000412332 0.000337749 0.00784731 0.00673918 67.0 MiB 0.23 67.0 MiB 0.18 1571 10.2013 1571 10.2013 766 2600 607719 233715 142676 133759 975514. 4877.57 13 30528 128848 -1 3.38176 3.38176 -42.7981 -3.38176 0 0 0.17 -1 -1 67.0 MiB 0.11 0.0269385 0.0237335 28.1 MiB -1 0.02 -3d_k4_N4_90nm.xml styr.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.57 vpr 67.15 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 60 10 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68760 10 10 247 0 1 155 80 10 10 200 -1 FPGA3D -1 -1 1546.92 1190 2316 309 1659 348 67.1 MiB 0.23 0.00 3.91624 3.0935 -39.105 -3.0935 3.0935 0.00 0.000442532 0.000367869 0.00831213 0.0072054 67.1 MiB 0.23 67.1 MiB 0.18 1571 10.2013 1571 10.2013 766 2600 607719 233715 142676 133759 975514. 4877.57 13 30528 128848 -1 3.38176 3.38176 -42.7981 -3.38176 0 0 0.18 -1 -1 67.1 MiB 0.11 0.0279652 0.0248269 28.3 MiB -1 0.02 -3d_k4_N4_90nm.xml s953.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.56 vpr 67.66 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 63 17 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69288 17 23 231 0 1 178 103 10 10 200 -1 FPGA3D -1 -1 1847 1427 3477 390 2178 909 67.7 MiB 0.18 0.00 4.10356 3.10287 -80.5632 -3.10287 3.10287 0.00 0.00047445 0.000394252 0.00998331 0.00858358 67.7 MiB 0.18 67.7 MiB 0.12 1761 9.94915 1761 9.94915 964 3371 900421 327204 142676 140447 975514. 4877.57 13 30528 128848 -1 3.27694 3.27694 -87.685 -3.27694 0 0 0.18 -1 -1 67.7 MiB 0.15 0.0295839 0.02608 28.8 MiB -1 0.02 -3d_k4_N4_90nm.xml s953.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.57 vpr 67.16 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 63 17 -1 -1 success v8.0.0-13596-g7ff2fdbdb-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-20T21:23:20 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68776 17 23 231 0 1 178 103 10 10 200 -1 FPGA3D -1 -1 1842.29 1437 3477 463 2076 938 67.2 MiB 0.17 0.00 4.10356 3.2145 -80.7092 -3.2145 3.2145 0.00 0.000514829 0.000435857 0.00935591 0.00811848 67.2 MiB 0.17 67.2 MiB 0.12 1837 10.3785 1837 10.3785 953 3269 974047 381889 142676 140447 975514. 4877.57 13 30528 128848 -1 3.41672 3.41672 -87.9469 -3.41672 0 0 0.17 -1 -1 67.2 MiB 0.16 0.0290759 0.0257941 28.3 MiB -1 0.02 + arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time initial_placed_CPD_est placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time + 3d_k4_N4_90nm.xml s820.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.51 vpr 67.29 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 35 19 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68908 19 19 136 0 1 99 73 10 10 200 -1 FPGA3D -1 -1 871.641 751 681 57 398 226 67.3 MiB 0.13 0.00 3.372 3.04604 -44.3501 -3.04604 3.04604 0.00 0.00026651 0.000223768 0.00248161 0.00222504 67.3 MiB 0.13 67.3 MiB 0.10 844 8.61224 844 8.61224 475 1406 401495 170476 142676 78026.2 975514. 4877.57 10 30528 128848 -1 3.15206 3.15206 -50.6277 -3.15206 0 0 0.25 -1 -1 67.3 MiB 0.07 0.0120498 0.0107952 28.4 MiB -1 0.02 + 3d_k4_N4_90nm.xml s820.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.55 vpr 67.40 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 34 19 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69020 19 19 136 0 1 99 72 10 10 200 -1 FPGA3D -1 -1 879.713 754 817 82 480 255 67.4 MiB 0.12 0.00 3.372 3.00456 -44.8378 -3.00456 3.00456 0.00 0.000258055 0.000216319 0.00266788 0.0023755 67.4 MiB 0.12 67.4 MiB 0.10 843 8.60204 843 8.60204 462 1338 564593 271588 142676 75796.9 975514. 4877.57 11 30528 128848 -1 3.1757 3.1757 -50.7348 -3.1757 0 0 0.26 -1 -1 67.4 MiB 0.11 0.0132497 0.011807 28.5 MiB -1 0.02 + 3d_k4_N4_90nm.xml s820.blif common_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.55 vpr 67.33 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 34 19 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68948 19 19 136 0 1 99 72 10 10 200 -1 FPGA3D -1 -1 879.713 754 817 82 480 255 67.3 MiB 0.13 0.00 3.372 3.00456 -44.8378 -3.00456 3.00456 0.00 0.000266531 0.000224127 0.00276856 0.00247158 67.3 MiB 0.13 67.3 MiB 0.10 843 8.60204 843 8.60204 462 1338 564593 271588 142676 75796.9 975514. 4877.57 11 30528 128848 -1 3.1757 3.1757 -50.7348 -3.1757 0 0 0.26 -1 -1 67.3 MiB 0.11 0.0132202 0.0118082 28.4 MiB -1 0.02 + 3d_k4_N4_90nm.xml s838.1.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.48 vpr 67.44 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 31 35 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69060 35 1 129 0 1 108 67 10 10 200 -1 FPGA3D -1 -1 829.372 592 1699 155 1001 543 67.4 MiB 0.13 0.00 5.56099 4.9431 -98.8807 -4.9431 4.9431 0.00 0.000261732 0.000224686 0.004759 0.00420488 67.4 MiB 0.13 67.4 MiB 0.09 647 6.04673 647 6.04673 420 831 175142 62634 142676 69108.9 975514. 4877.57 7 30528 128848 -1 5.41618 5.41618 -101.368 -5.41618 0 0 0.26 -1 -1 67.4 MiB 0.03 0.0126339 0.0113223 28.5 MiB -1 0.02 + 3d_k4_N4_90nm.xml s838.1.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.64 vpr 67.38 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 30 35 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68992 35 1 129 0 1 108 66 10 10 200 -1 FPGA3D -1 -1 831.372 588 1396 138 801 457 67.4 MiB 0.19 0.00 5.56099 4.96986 -98.2942 -4.96986 4.96986 0.00 0.000267303 0.00023176 0.00596728 0.00528898 67.4 MiB 0.19 67.4 MiB 0.14 635 5.93458 635 5.93458 433 911 201419 73665 142676 66879.6 975514. 4877.57 13 30528 128848 -1 5.04154 5.04154 -98.9333 -5.04154 0 0 0.32 -1 -1 67.4 MiB 0.05 0.0182751 0.0162105 28.4 MiB -1 0.03 + 3d_k4_N4_90nm.xml s838.1.blif common_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.67 vpr 67.69 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 30 35 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69316 35 1 129 0 1 108 66 10 10 200 -1 FPGA3D -1 -1 831.372 588 1396 138 801 457 67.7 MiB 0.19 0.00 5.56099 4.96986 -98.2942 -4.96986 4.96986 0.00 0.000654946 0.000576059 0.00507022 0.00445766 67.7 MiB 0.19 67.7 MiB 0.14 635 5.93458 635 5.93458 433 911 201419 73665 142676 66879.6 975514. 4877.57 13 30528 128848 -1 5.04154 5.04154 -98.9333 -5.04154 0 0 0.34 -1 -1 67.7 MiB 0.07 0.0209473 0.0186984 28.6 MiB -1 0.03 + 3d_k4_N4_90nm.xml bw.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.76 vpr 67.24 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 37 5 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68856 5 28 137 0 0 95 70 10 10 200 -1 FPGA3D -1 -1 811.663 751 358 32 170 156 67.2 MiB 0.18 0.00 3.83992 3.52385 -67.1587 -3.52385 nan 0.00 0.000485806 0.000417259 0.00335419 0.00304458 67.2 MiB 0.18 67.2 MiB 0.13 791 8.32632 791 8.32632 422 999 445501 228266 142676 82484.8 975514. 4877.57 8 30528 128848 -1 3.59016 nan -75.0344 -3.59016 0 0 0.33 -1 -1 67.2 MiB 0.18 0.0209877 0.0191361 28.5 MiB -1 0.03 + 3d_k4_N4_90nm.xml bw.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.77 vpr 67.46 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 36 5 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69076 5 28 137 0 0 95 69 10 10 200 -1 FPGA3D -1 -1 815.277 716 774 65 382 327 67.5 MiB 0.19 0.00 3.83992 2.81327 -62.7158 -2.81327 nan 0.00 0.000519246 0.000447366 0.0049669 0.00445837 67.5 MiB 0.19 67.5 MiB 0.14 771 8.11579 771 8.11579 396 963 453652 232363 142676 80255.5 975514. 4877.57 11 30528 128848 -1 3.21552 nan -71.8798 -3.21552 0 0 0.33 -1 -1 67.5 MiB 0.17 0.0245888 0.0222217 28.5 MiB -1 0.03 + 3d_k4_N4_90nm.xml bw.blif common_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.81 vpr 67.19 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 36 5 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68800 5 28 137 0 0 95 69 10 10 200 -1 FPGA3D -1 -1 815.277 716 774 65 382 327 67.2 MiB 0.19 0.00 3.83992 2.81327 -62.7158 -2.81327 nan 0.00 0.000498947 0.000429189 0.00487588 0.00437872 67.2 MiB 0.19 67.2 MiB 0.14 771 8.11579 771 8.11579 396 963 453652 232363 142676 80255.5 975514. 4877.57 11 30528 128848 -1 3.21552 nan -71.8798 -3.21552 0 0 0.34 -1 -1 67.2 MiB 0.17 0.0243808 0.0218322 28.5 MiB -1 0.03 + 3d_k4_N4_90nm.xml rd84.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.77 vpr 67.52 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 48 8 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69136 8 4 165 0 0 122 60 10 10 200 -1 FPGA3D -1 -1 1159.54 968 1464 166 1059 239 67.5 MiB 0.22 0.00 4.49381 3.68783 -12.7612 -3.68783 nan 0.00 0.000569462 0.000491331 0.0104769 0.00928904 67.5 MiB 0.22 67.5 MiB 0.16 1146 9.39344 1146 9.39344 694 2071 441536 150803 142676 107007 975514. 4877.57 12 30528 128848 -1 4.14058 nan -13.8699 -4.14058 0 0 0.33 -1 -1 67.5 MiB 0.14 0.0334846 0.0301007 28.5 MiB -1 0.03 + 3d_k4_N4_90nm.xml rd84.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.79 vpr 67.28 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 48 8 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68896 8 4 165 0 0 122 60 10 10 200 -1 FPGA3D -1 -1 1159.54 968 1464 166 1059 239 67.3 MiB 0.22 0.00 4.49381 3.68783 -12.7612 -3.68783 nan 0.00 0.000657191 0.000564156 0.0101478 0.00896815 67.3 MiB 0.22 67.3 MiB 0.16 1146 9.39344 1146 9.39344 694 2071 441536 150803 142676 107007 975514. 4877.57 12 30528 128848 -1 4.14058 nan -13.8699 -4.14058 0 0 0.34 -1 -1 67.3 MiB 0.15 0.0335371 0.0301413 28.4 MiB -1 0.03 + 3d_k4_N4_90nm.xml rd84.blif common_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.77 vpr 67.39 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 48 8 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69008 8 4 165 0 0 122 60 10 10 200 -1 FPGA3D -1 -1 1159.54 968 1464 166 1059 239 67.4 MiB 0.22 0.00 4.49381 3.68783 -12.7612 -3.68783 nan 0.00 0.00065527 0.00056949 0.0106895 0.00942782 67.4 MiB 0.22 67.4 MiB 0.16 1146 9.39344 1146 9.39344 694 2071 441536 150803 142676 107007 975514. 4877.57 12 30528 128848 -1 4.14058 nan -13.8699 -4.14058 0 0 0.32 -1 -1 67.4 MiB 0.14 0.0333618 0.0299277 28.5 MiB -1 0.03 + 3d_k4_N4_90nm.xml s832.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.79 vpr 67.52 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 40 19 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69136 19 19 161 0 1 113 78 10 10 200 -1 FPGA3D -1 -1 1035.88 805 1904 218 1090 596 67.5 MiB 0.21 0.00 3.56182 3.05406 -45.7438 -3.05406 3.05406 0.00 0.000540229 0.000460706 0.00851952 0.00752026 67.5 MiB 0.21 67.5 MiB 0.15 886 7.91071 886 7.91071 534 1697 391552 158224 142676 89172.8 975514. 4877.57 12 30528 128848 -1 3.2145 3.2145 -50.1138 -3.2145 0 0 0.35 -1 -1 67.5 MiB 0.14 0.0297952 0.0267563 28.5 MiB -1 0.03 + 3d_k4_N4_90nm.xml s832.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.84 vpr 67.37 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 39 19 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68984 19 19 161 0 1 113 77 10 10 200 -1 FPGA3D -1 -1 1033.88 826 1544 157 892 495 67.4 MiB 0.21 0.00 3.56182 3.21504 -46.5131 -3.21504 3.21504 0.00 0.000633393 0.000547443 0.00830199 0.00736219 67.4 MiB 0.21 67.4 MiB 0.15 965 8.61607 965 8.61607 548 1651 574859 233525 142676 86943.5 975514. 4877.57 10 30528 128848 -1 3.27694 3.27694 -52.9059 -3.27694 0 0 0.35 -1 -1 67.4 MiB 0.20 0.0281771 0.0253975 28.5 MiB -1 0.03 + 3d_k4_N4_90nm.xml s832.blif common_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.80 vpr 67.64 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 39 19 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69264 19 19 161 0 1 113 77 10 10 200 -1 FPGA3D -1 -1 1033.88 826 1544 157 892 495 67.6 MiB 0.21 0.00 3.56182 3.21504 -46.5131 -3.21504 3.21504 0.00 0.000543619 0.000469028 0.00743386 0.00660528 67.6 MiB 0.21 67.6 MiB 0.15 965 8.61607 965 8.61607 548 1651 574859 233525 142676 86943.5 975514. 4877.57 10 30528 128848 -1 3.27694 3.27694 -52.9059 -3.27694 0 0 0.32 -1 -1 67.6 MiB 0.19 0.0275229 0.0247701 28.8 MiB -1 0.03 + 3d_k4_N4_90nm.xml mm9a.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.74 vpr 67.52 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 43 13 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69136 13 9 165 20 1 117 65 10 10 200 -1 FPGA3D -1 -1 1076.73 834 1625 115 999 511 67.5 MiB 0.23 0.00 10.4399 8.66873 -158.897 -8.66873 8.66873 0.00 0.000648464 0.000559001 0.0109084 0.00967592 67.5 MiB 0.23 67.5 MiB 0.16 912 7.86207 912 7.86207 520 1534 213386 77023 142676 95860.8 975514. 4877.57 10 30528 128848 -1 9.39142 9.39142 -165.062 -9.39142 0 0 0.32 -1 -1 67.5 MiB 0.09 0.0336913 0.0303441 28.5 MiB -1 0.03 + 3d_k4_N4_90nm.xml mm9a.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.76 vpr 67.65 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 43 13 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69272 13 9 165 20 1 117 65 10 10 200 -1 FPGA3D -1 -1 1076.73 834 1625 115 999 511 67.6 MiB 0.23 0.00 10.4399 8.66873 -158.897 -8.66873 8.66873 0.00 0.000651696 0.000565793 0.0112284 0.00994744 67.6 MiB 0.23 67.6 MiB 0.16 912 7.86207 912 7.86207 520 1534 213386 77023 142676 95860.8 975514. 4877.57 10 30528 128848 -1 9.39142 9.39142 -165.062 -9.39142 0 0 0.34 -1 -1 67.6 MiB 0.09 0.0341363 0.0307423 28.8 MiB -1 0.03 + 3d_k4_N4_90nm.xml mm9a.blif common_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.75 vpr 67.40 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 43 13 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69020 13 9 165 20 1 117 65 10 10 200 -1 FPGA3D -1 -1 1118.26 813 1755 118 1106 531 67.4 MiB 0.24 0.00 10.6921 8.6069 -159.896 -8.6069 8.6069 0.00 0.000757211 0.000664655 0.0123339 0.0109456 67.4 MiB 0.24 67.4 MiB 0.15 908 7.82759 908 7.82759 574 1761 231097 87700 142676 95860.8 975514. 4877.57 15 30528 128848 -1 9.39142 9.39142 -168.371 -9.39142 0 0 0.32 -1 -1 67.4 MiB 0.10 0.0399335 0.0357478 28.5 MiB -1 0.03 + 3d_k4_N4_90nm.xml alu2.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.79 vpr 67.32 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 59 10 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68936 10 6 207 0 0 147 75 10 10 200 -1 FPGA3D -1 -1 1462.31 1206 1813 181 1251 381 67.3 MiB 0.24 0.00 7.15091 6.33663 -20.2299 -6.33663 nan 0.00 0.00110478 0.000997696 0.0127198 0.0113753 67.3 MiB 0.24 67.3 MiB 0.17 1358 9.23810 1358 9.23810 787 2247 365299 124151 142676 131530 975514. 4877.57 11 30528 128848 -1 6.64292 nan -21.9088 -6.64292 0 0 0.32 -1 -1 67.3 MiB 0.13 0.0403634 0.0365584 28.4 MiB -1 0.03 + 3d_k4_N4_90nm.xml alu2.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.82 vpr 67.69 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 58 10 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69312 10 6 207 0 0 147 74 10 10 200 -1 FPGA3D -1 -1 1464.8 1177 1779 184 1243 352 67.7 MiB 0.24 0.00 7.39668 6.29249 -21.2096 -6.29249 nan 0.00 0.000831938 0.000725917 0.0135877 0.0120921 67.7 MiB 0.24 67.7 MiB 0.17 1379 9.38095 1379 9.38095 817 2430 432630 150203 142676 129301 975514. 4877.57 14 30528 128848 -1 6.8716 nan -23.6985 -6.8716 0 0 0.33 -1 -1 67.7 MiB 0.16 0.0482147 0.0437696 28.8 MiB -1 0.03 + 3d_k4_N4_90nm.xml alu2.blif common_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.83 vpr 67.45 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 58 10 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69064 10 6 207 0 0 147 74 10 10 200 -1 FPGA3D -1 -1 1464.8 1177 1779 184 1243 352 67.4 MiB 0.24 0.00 7.39668 6.29249 -21.2096 -6.29249 nan 0.00 0.000741889 0.000641315 0.0130648 0.0116154 67.4 MiB 0.24 67.4 MiB 0.17 1379 9.38095 1379 9.38095 817 2430 432630 150203 142676 129301 975514. 4877.57 14 30528 128848 -1 6.8716 nan -23.6985 -6.8716 0 0 0.33 -1 -1 67.4 MiB 0.15 0.0430977 0.0387797 28.6 MiB -1 0.03 + 3d_k4_N4_90nm.xml x1.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.94 vpr 67.17 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 40 51 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68780 51 35 187 0 0 146 126 10 10 200 -1 FPGA3D -1 -1 1431.65 1102 2646 284 1001 1361 67.2 MiB 0.21 0.00 3.43373 2.37137 -53.5604 -2.37137 nan 0.00 0.000641153 0.00055829 0.00791904 0.00708407 67.2 MiB 0.21 67.2 MiB 0.15 1245 8.52740 1245 8.52740 741 2178 977720 428367 142676 89172.8 975514. 4877.57 13 30528 128848 -1 2.85242 nan -62.0834 -2.85242 0 0 0.32 -1 -1 67.2 MiB 0.31 0.0311118 0.0279211 28.5 MiB -1 0.03 + 3d_k4_N4_90nm.xml x1.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.91 vpr 66.95 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 39 51 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68552 51 35 187 0 0 146 125 10 10 200 -1 FPGA3D -1 -1 1431.75 1138 3557 364 1478 1715 66.9 MiB 0.22 0.00 3.43373 2.34 -52.6482 -2.34 nan 0.00 0.00066412 0.000576648 0.00964263 0.00853702 66.9 MiB 0.22 66.9 MiB 0.15 1232 8.43836 1232 8.43836 757 2156 870465 396526 142676 86943.5 975514. 4877.57 12 30528 128848 -1 2.78998 nan -62.3332 -2.78998 0 0 0.32 -1 -1 66.9 MiB 0.29 0.0313874 0.028147 28.4 MiB -1 0.03 + 3d_k4_N4_90nm.xml x1.blif common_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.92 vpr 67.30 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 39 51 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68912 51 35 187 0 0 146 125 10 10 200 -1 FPGA3D -1 -1 1431.75 1138 3557 364 1478 1715 67.3 MiB 0.22 0.00 3.43373 2.34 -52.6482 -2.34 nan 0.00 0.000675893 0.000587686 0.00985462 0.00874785 67.3 MiB 0.22 67.3 MiB 0.15 1232 8.43836 1232 8.43836 757 2156 870465 396526 142676 86943.5 975514. 4877.57 12 30528 128848 -1 2.78998 nan -62.3332 -2.78998 0 0 0.32 -1 -1 67.3 MiB 0.29 0.0322044 0.0289236 28.6 MiB -1 0.03 + 3d_k4_N4_90nm.xml t481.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 1.22 vpr 67.55 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 55 16 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69168 16 1 230 0 0 164 72 10 10 200 -1 FPGA3D -1 -1 1719.09 1423 1711 164 1284 263 67.5 MiB 0.42 0.00 5.26477 4.78086 -4.78086 -4.78086 nan 0.00 0.000725217 0.000622851 0.0134039 0.010933 67.5 MiB 0.42 67.5 MiB 0.34 1908 11.6341 1908 11.6341 1096 4201 1095477 388933 142676 122613 975514. 4877.57 14 30528 128848 -1 5.1579 nan -5.1579 -5.1579 0 0 0.32 -1 -1 67.5 MiB 0.38 0.048473 0.0427232 28.6 MiB -1 0.03 + 3d_k4_N4_90nm.xml t481.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 1.15 vpr 67.45 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 55 16 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69064 16 1 230 0 0 164 72 10 10 200 -1 FPGA3D -1 -1 1719.09 1423 1711 164 1284 263 67.4 MiB 0.39 0.00 5.26477 4.78086 -4.78086 -4.78086 nan 0.00 0.000761928 0.000663587 0.0130892 0.0116305 67.4 MiB 0.39 67.4 MiB 0.31 1908 11.6341 1908 11.6341 1096 4201 1095477 388933 142676 122613 975514. 4877.57 14 30528 128848 -1 5.1579 nan -5.1579 -5.1579 0 0 0.32 -1 -1 67.4 MiB 0.35 0.0522662 0.0474837 28.5 MiB -1 0.03 + 3d_k4_N4_90nm.xml t481.blif common_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 1.13 vpr 67.55 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 55 16 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69176 16 1 230 0 0 164 72 10 10 200 -1 FPGA3D -1 -1 1719.09 1423 1711 164 1284 263 67.6 MiB 0.38 0.00 5.26477 4.78086 -4.78086 -4.78086 nan 0.00 0.000864939 0.000758278 0.0143228 0.012737 67.6 MiB 0.38 67.6 MiB 0.31 1908 11.6341 1908 11.6341 1096 4201 1095477 388933 142676 122613 975514. 4877.57 14 30528 128848 -1 5.1579 nan -5.1579 -5.1579 0 0 0.32 -1 -1 67.6 MiB 0.33 0.0476867 0.0428633 28.5 MiB -1 0.03 + 3d_k4_N4_90nm.xml mm9b.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 1.03 vpr 67.39 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 64 13 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69012 13 9 229 0 1 170 86 10 10 200 -1 FPGA3D -1 -1 1838.97 1301 2732 263 2032 437 67.4 MiB 0.38 0.00 11.9172 10.0937 -200.226 -10.0937 10.0937 0.00 0.000985134 0.00085996 0.0176605 0.015696 67.4 MiB 0.38 67.4 MiB 0.29 1561 9.23669 1561 9.23669 945 3035 598090 221479 142676 142676 975514. 4877.57 11 30528 128848 -1 10.6349 10.6349 -208.117 -10.6349 0 0 0.33 -1 -1 67.4 MiB 0.20 0.0493917 0.0445339 28.5 MiB -1 0.03 + 3d_k4_N4_90nm.xml mm9b.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.91 vpr 67.43 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 64 13 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69044 13 9 229 0 1 170 86 10 10 200 -1 FPGA3D -1 -1 1838.97 1301 2732 263 2032 437 67.4 MiB 0.29 0.00 11.9172 10.0937 -200.226 -10.0937 10.0937 0.00 0.00099508 0.00086712 0.018114 0.0160967 67.4 MiB 0.29 67.4 MiB 0.19 1561 9.23669 1561 9.23669 945 3035 598090 221479 142676 142676 975514. 4877.57 11 30528 128848 -1 10.6349 10.6349 -208.117 -10.6349 0 0 0.33 -1 -1 67.4 MiB 0.20 0.0497734 0.0448781 28.4 MiB -1 0.03 + 3d_k4_N4_90nm.xml mm9b.blif common_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 0.91 vpr 67.40 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 64 13 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69020 13 9 229 0 1 170 86 10 10 200 -1 FPGA3D -1 -1 1838.97 1301 2732 263 2032 437 67.4 MiB 0.29 0.00 11.9172 10.0937 -200.226 -10.0937 10.0937 0.00 0.00101423 0.000888511 0.017951 0.0159986 67.4 MiB 0.29 67.4 MiB 0.20 1561 9.23669 1561 9.23669 945 3035 598090 221479 142676 142676 975514. 4877.57 11 30528 128848 -1 10.6349 10.6349 -208.117 -10.6349 0 0 0.32 -1 -1 67.4 MiB 0.20 0.0497906 0.0450357 28.4 MiB -1 0.03 + 3d_k4_N4_90nm.xml styr.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 1.00 vpr 67.65 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 60 10 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69276 10 10 247 0 1 155 80 10 10 200 -1 FPGA3D -1 -1 1546.92 1190 2316 309 1659 348 67.7 MiB 0.36 0.00 3.91624 3.0935 -39.105 -3.0935 3.0935 0.00 0.000814645 0.000689878 0.015677 0.0138138 67.7 MiB 0.36 67.7 MiB 0.28 1571 10.2013 1571 10.2013 766 2600 607719 233715 142676 133759 975514. 4877.57 13 30528 128848 -1 3.38176 3.38176 -42.7981 -3.38176 0 0 0.32 -1 -1 67.7 MiB 0.22 0.0503481 0.0451644 28.8 MiB -1 0.03 + 3d_k4_N4_90nm.xml styr.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 1.02 vpr 67.53 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 60 10 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69148 10 10 247 0 1 155 80 10 10 200 -1 FPGA3D -1 -1 1546.92 1190 2316 309 1659 348 67.5 MiB 0.37 0.00 3.91624 3.0935 -39.105 -3.0935 3.0935 0.00 0.000909059 0.000777363 0.01567 0.0138127 67.5 MiB 0.37 67.5 MiB 0.29 1571 10.2013 1571 10.2013 766 2600 607719 233715 142676 133759 975514. 4877.57 13 30528 128848 -1 3.38176 3.38176 -42.7981 -3.38176 0 0 0.33 -1 -1 67.5 MiB 0.23 0.0500646 0.0448722 28.5 MiB -1 0.03 + 3d_k4_N4_90nm.xml styr.blif common_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 1.05 vpr 67.53 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 60 10 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69152 10 10 247 0 1 155 80 10 10 200 -1 FPGA3D -1 -1 1546.92 1190 2316 309 1659 348 67.5 MiB 0.38 0.00 3.91624 3.0935 -39.105 -3.0935 3.0935 0.00 0.000799016 0.000681231 0.0146814 0.0129403 67.5 MiB 0.38 67.5 MiB 0.29 1571 10.2013 1571 10.2013 766 2600 607719 233715 142676 133759 975514. 4877.57 13 30528 128848 -1 3.38176 3.38176 -42.7981 -3.38176 0 0 0.35 -1 -1 67.5 MiB 0.22 0.0495672 0.0445067 28.6 MiB -1 0.03 + 3d_k4_N4_90nm.xml s953.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none 0.99 vpr 67.16 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 63 17 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68776 17 23 231 0 1 178 103 10 10 200 -1 FPGA3D -1 -1 1847 1427 3477 390 2178 909 67.2 MiB 0.28 0.00 4.10356 3.10287 -80.5632 -3.10287 3.10287 0.00 0.00095887 0.000823436 0.0188062 0.016582 67.2 MiB 0.28 67.2 MiB 0.18 1761 9.94915 1761 9.94915 964 3371 900421 327204 142676 140447 975514. 4877.57 13 30528 128848 -1 3.27694 3.27694 -87.685 -3.27694 0 0 0.33 -1 -1 67.2 MiB 0.28 0.0537744 0.0482236 28.3 MiB -1 0.03 + 3d_k4_N4_90nm.xml s953.blif common_--ap_analytical_solver_identity_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 1.03 vpr 67.66 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 63 17 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 69288 17 23 231 0 1 178 103 10 10 200 -1 FPGA3D -1 -1 1842.29 1437 3477 463 2076 938 67.7 MiB 0.28 0.00 4.10356 3.2145 -80.7092 -3.2145 3.2145 0.00 0.000963823 0.000836172 0.0171937 0.0151427 67.7 MiB 0.28 67.7 MiB 0.19 1837 10.3785 1837 10.3785 953 3269 974047 381889 142676 140447 975514. 4877.57 13 30528 128848 -1 3.41672 3.41672 -87.9469 -3.41672 0 0 0.33 -1 -1 67.7 MiB 0.32 0.0521802 0.0467707 28.8 MiB -1 0.03 + 3d_k4_N4_90nm.xml s953.blif common_--ap_partial_legalizer_none_--allow_unrelated_clustering_on 1.07 vpr 67.17 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 63 17 -1 -1 success v8.0.0-13874-g7a4092251b-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-09-04T17:49:03 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap 68780 17 23 231 0 1 178 103 10 10 200 -1 FPGA3D -1 -1 1842.29 1437 3477 463 2076 938 67.2 MiB 0.30 0.00 4.10356 3.2145 -80.7092 -3.2145 3.2145 0.00 0.00083829 0.000713563 0.0184137 0.0163433 67.2 MiB 0.30 67.2 MiB 0.19 1837 10.3785 1837 10.3785 953 3269 974047 381889 142676 140447 975514. 4877.57 13 30528 128848 -1 3.41672 3.41672 -87.9469 -3.41672 0 0 0.33 -1 -1 67.2 MiB 0.34 0.0531794 0.0477529 28.3 MiB -1 0.03 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap/constraints/mm9a_io_constraint.xml b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap/constraints/mm9a_io_constraint.xml new file mode 100644 index 00000000000..69caad14394 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap/constraints/mm9a_io_constraint.xml @@ -0,0 +1,70 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 743a02cca72a15c076907779d80982532ce4c66a Mon Sep 17 00:00:00 2001 From: AlexandreSinger Date: Fri, 19 Dec 2025 10:46:56 -0500 Subject: [PATCH 2/9] [AP] Updated Solver Docs and Fixed Typo --- vpr/src/analytical_place/analytical_solver.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vpr/src/analytical_place/analytical_solver.h b/vpr/src/analytical_place/analytical_solver.h index d713ca26066..179d3072f6b 100644 --- a/vpr/src/analytical_place/analytical_solver.h +++ b/vpr/src/analytical_place/analytical_solver.h @@ -721,7 +721,7 @@ class B2BSolver : public AnalyticalSolver { * placement object for the given dimension. * * Note: The dim_soln may be modified if it is found that the solution is - * imposible (e.g. has negative positions). + * impossible (e.g. has negative positions). * * @param dim_soln * The solution of the linear system for a given dimension. @@ -770,6 +770,7 @@ class B2BSolver : public AnalyticalSolver { // for the next call to solve. vtr::vector block_x_locs_solved; vtr::vector block_y_locs_solved; + // NOTE: For speed, this vector is unused if a device has only one die. vtr::vector block_z_locs_solved; // The following are the legalized solution coming into the analytical solver @@ -777,6 +778,7 @@ class B2BSolver : public AnalyticalSolver { // blocks during the solver. vtr::vector block_x_locs_legalized; vtr::vector block_y_locs_legalized; + // NOTE: For speed, this vector is unused if a device has only one die. vtr::vector block_z_locs_legalized; /// @brief The total number of CG iterations that this solver has performed From a7511e7883b503703def772538d89279f9b7d79f Mon Sep 17 00:00:00 2001 From: AlexandreSinger Date: Fri, 19 Dec 2025 10:49:06 -0500 Subject: [PATCH 3/9] Make format --- vpr/src/analytical_place/analytical_solver.cpp | 10 +++++----- vpr/src/analytical_place/analytical_solver.h | 10 +++++----- vpr/src/place/initial_placement.cpp | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/vpr/src/analytical_place/analytical_solver.cpp b/vpr/src/analytical_place/analytical_solver.cpp index 29dc14c6990..fbc70172e15 100644 --- a/vpr/src/analytical_place/analytical_solver.cpp +++ b/vpr/src/analytical_place/analytical_solver.cpp @@ -800,9 +800,9 @@ void B2BSolver::b2b_solve_loop(unsigned iteration, PartialPlacement& p_placement } } -Eigen::VectorXd B2BSolver::solve_linear_system(Eigen::SparseMatrix &A, - Eigen::VectorXd &b, - Eigen::VectorXd &guess) { +Eigen::VectorXd B2BSolver::solve_linear_system(Eigen::SparseMatrix& A, + Eigen::VectorXd& b, + Eigen::VectorXd& guess) { // Set up the system of equation solver. Eigen::ConjugateGradient, Eigen::Lower | Eigen::Upper> cg; cg.compute(A); @@ -1330,8 +1330,8 @@ void B2BSolver::update_linear_system_with_anchors(unsigned iteration) { } } -void B2BSolver::store_solution_into_placement(Eigen::VectorXd &dim_soln, - vtr::vector &block_dim_locs, +void B2BSolver::store_solution_into_placement(Eigen::VectorXd& dim_soln, + vtr::vector& block_dim_locs, double dim_max_pos) { for (size_t row_id_idx = 0; row_id_idx < num_moveable_blocks_; row_id_idx++) { // Since we are capping the number of iterations, the solver may not diff --git a/vpr/src/analytical_place/analytical_solver.h b/vpr/src/analytical_place/analytical_solver.h index 179d3072f6b..7244dd43d7c 100644 --- a/vpr/src/analytical_place/analytical_solver.h +++ b/vpr/src/analytical_place/analytical_solver.h @@ -712,9 +712,9 @@ class B2BSolver : public AnalyticalSolver { * @brief Solves the linear system of equations using the connectivity * matrix (A), the constant vector (b), and a guess for the solution. */ - Eigen::VectorXd solve_linear_system(Eigen::SparseMatrix &A, - Eigen::VectorXd &b, - Eigen::VectorXd &guess); + Eigen::VectorXd solve_linear_system(Eigen::SparseMatrix& A, + Eigen::VectorXd& b, + Eigen::VectorXd& guess); /** * @brief Store the solutions from the linear system into the partial @@ -732,8 +732,8 @@ class B2BSolver : public AnalyticalSolver { * x-dimension, this would be the width of the device. This is used to * ensure that the positions do not go off device. */ - void store_solution_into_placement(Eigen::VectorXd &dim_soln, - vtr::vector &block_dim_locs, + void store_solution_into_placement(Eigen::VectorXd& dim_soln, + vtr::vector& block_dim_locs, double dim_max_pos); /** diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp index 542c4346c06..c41c27d1912 100644 --- a/vpr/src/place/initial_placement.cpp +++ b/vpr/src/place/initial_placement.cpp @@ -661,7 +661,7 @@ static t_flat_pl_loc find_centroid_loc_from_flat_placement(const t_pl_macro& pl_ float proj_x = std::clamp(centroid.x, rect.xmin(), rect.xmax() + 0.999); float proj_y = std::clamp(centroid.y, rect.ymin(), rect.ymax() + 0.999); float proj_layer = std::clamp(centroid.layer, region.get_layer_range().first, - region.get_layer_range().second + 0.999); + region.get_layer_range().second + 0.999); float dx = std::abs(proj_x - centroid.x); float dy = std::abs(proj_y - centroid.y); float dlayer = std::abs(proj_layer - centroid.layer); From 045c4e51d101b53c55fb2cc41cfb3403fe72a7af Mon Sep 17 00:00:00 2001 From: AlexandreSinger Date: Fri, 19 Dec 2025 15:23:59 -0500 Subject: [PATCH 4/9] [Place] Separated Out the Channel Cost Factor Code The channel cost factor code is useful outside of the net cost handler code. Separated the code out into its own class so it can be used in AP. --- vpr/src/place/chan_cost_handler.cpp | 90 +++++++++++++++++++ vpr/src/place/chan_cost_handler.h | 124 +++++++++++++++++++++++++++ vpr/src/place/net_cost_handler.cpp | 128 +++------------------------- vpr/src/place/net_cost_handler.h | 77 +---------------- 4 files changed, 229 insertions(+), 190 deletions(-) create mode 100644 vpr/src/place/chan_cost_handler.cpp create mode 100644 vpr/src/place/chan_cost_handler.h diff --git a/vpr/src/place/chan_cost_handler.cpp b/vpr/src/place/chan_cost_handler.cpp new file mode 100644 index 00000000000..68edd5d3e4e --- /dev/null +++ b/vpr/src/place/chan_cost_handler.cpp @@ -0,0 +1,90 @@ +/** + * @file + * @author Alex Singer + * @date December 2025 + * @brief The definition of the channel cost handler class. + */ + +#include "chan_cost_handler.h" +#include "rr_graph_view.h" + +ChanCostHandler::ChanCostHandler(const std::vector& rr_chanx_width, + const std::vector& rr_chany_width, + const RRGraphView& rr_graph, + const DeviceGrid& grid) { + + // These arrays contain accumulative channel width between channel zero and + // the channel specified by the given index. The accumulated channel width + // is inclusive, meaning that it includes both channel zero and channel `idx`. + // To compute the total channel width between channels 'low' and 'high', use the + // following formula: + // acc_chan?_width_[high] - acc_chan?_width_[low - 1] + // This returns the total number of tracks between channels 'low' and 'high', + // including tracks in these channels. + acc_chanx_width_ = vtr::PrefixSum1D(grid.height(), [&](size_t y) noexcept { + int chan_x_width = rr_chanx_width[y]; + + // If the number of tracks in a channel is zero, two consecutive elements take the same + // value. This can lead to a division by zero in get_chanxy_cost_fac_(). To avoid this + // potential issue, we assume that the channel width is at least 1. + if (chan_x_width == 0) { + return 1; + } + + return chan_x_width; + }); + + acc_chany_width_ = vtr::PrefixSum1D(grid.width(), [&](size_t x) noexcept { + int chan_y_width = rr_chany_width[x]; + + // to avoid a division by zero + if (chan_y_width == 0) { + return 1; + } + + return chan_y_width; + }); + + // If this is a multi-layer (3D) architecture, compute Z-channel cost term. + if (grid.get_num_layers() > 1) { + vtr::NdMatrix tile_num_inter_die_conn({grid.width(), grid.height()}, 0.); + + /* + * Step 1: iterate over the rr-graph, recording how many edges go between layers at each (x,y) location + * in the device. We count all these edges, regardless of which layers they connect. Then we divide by + * the number of layers - 1 to get the average cross-layer edge count per (x,y) location -- this mirrors + * what we do for the horizontal and vertical channels where we assume the channel width doesn't change + * along the length of the channel. It lets us be more memory-efficient for 3D devices, and could be revisited + * if someday we have architectures with widely varying connectivity between different layers in a stack. + */ + + /* To calculate the accumulative number of inter-die connections we first need to get the number of + * inter-die connection per location. To be able to work for the cases that RR Graph is read instead + * of being made from the architecture file, we calculate this number by iterating over the RR graph. Once + * tile_num_inter_die_conn is populated, we can start populating acc_tile_num_inter_die_conn_. + */ + + for (const RRNodeId node : rr_graph.nodes()) { + if (rr_graph.node_type(node) == e_rr_type::CHANZ) { + int x = rr_graph.node_xlow(node); + int y = rr_graph.node_ylow(node); + VTR_ASSERT_SAFE(x == rr_graph.node_xhigh(node) && y == rr_graph.node_yhigh(node)); + tile_num_inter_die_conn[x][y]++; + } + } + + int num_layers = grid.get_num_layers(); + for (size_t x = 0; x < grid.width(); x++) { + for (size_t y = 0; y < grid.height(); y++) { + tile_num_inter_die_conn[x][y] /= (num_layers - 1); + } + } + + // Step 2: Calculate prefix sum of the inter-die connectivity up to and including the channel at (x, y). + acc_tile_num_inter_die_conn_ = vtr::PrefixSum2D(grid.width(), + grid.height(), + [&](size_t x, size_t y) { + return (int)tile_num_inter_die_conn[x][y]; + }); + } +} diff --git a/vpr/src/place/chan_cost_handler.h b/vpr/src/place/chan_cost_handler.h new file mode 100644 index 00000000000..2755015f5a6 --- /dev/null +++ b/vpr/src/place/chan_cost_handler.h @@ -0,0 +1,124 @@ +#pragma once +/** + * @file + * @author Alex Singer + * @date December 2025 + * @brief Declaration of the channel cost handler class. + */ + +#include "vpr_types.h" +#include "vtr_prefix_sum.h" + +// Forward declarations. +class DeviceGrid; +class RRGraphView; + +/** + * @brief Manager class for computing the cost factors for channels in different + * dimensions. + */ +class ChanCostHandler { + public: + ChanCostHandler() = delete; + + /** + * @brief Constructor for the ChanCostHanlder class. + * + * This will pre-compute prefix sum data structures which will make getting + * the x, y, and z chan cost factors more efficient. + * + * @param rr_chanx_width + * The horizontal channel width distribution across the device grid. + * @param rr_chany_width + * The vertical channel width distribution across the device grid. + * @param rr_graph + * The Routing Resource Graph of the device. + * @param grid + * The device grid. + */ + ChanCostHandler(const std::vector& rr_chanx_width, + const std::vector& rr_chany_width, + const RRGraphView& rr_graph, + const DeviceGrid& grid); + + /** + * @brief Computes the inverse of average channel width for horizontal + * channels within a bounding box. + * + * @tparam BBT This can be either t_bb or t_2D_bb. + * @param bb The bounding box for which the inverse of average channel width + * within the bounding box is computed. + * @return The inverse of average channel width for horizontal channels. + */ + template + inline double get_chanx_cost_fac(const BBT& bb) const { + int total_chanx_width = acc_chanx_width_.get_sum(bb.ymin, bb.ymax); + double inverse_average_chanx_width = (bb.ymax - bb.ymin + 1.0) / total_chanx_width; + return inverse_average_chanx_width; + } + + /** + * @brief Computes the inverse of average channel width for vertical + * channels within a bounding box. + * + * @tparam BBT This can be either t_bb or t_2D_bb. + * @param bb The bounding box for which the inverse of average channel width + * within the bounding box is computed. + * @return The inverse of average channel width for vertical channels. + */ + template + inline double get_chany_cost_fac(const BBT& bb) const { + int total_chany_width = acc_chany_width_.get_sum(bb.xmin, bb.xmax); + double inverse_average_chany_width = (bb.xmax - bb.xmin + 1.0) / total_chany_width; + return inverse_average_chany_width; + } + + /** + * @brief Calculate the chanz cost factor based on the inverse of the + * average number of inter-die connections in the given bounding box. + * + * This cost factor increases the placement cost for blocks that require + * inter-layer connections in areas with, on average, fewer inter-die + * connections. If inter-die connections are evenly distributed across + * tiles, the cost factor will be the same for all bounding boxes, but it + * will still weight z-directed vs. x- and y-directed connections appropriately. + * + * @param bb Bounding box of the net which chanz cost factor is to be calculated + * @return ChanZ cost factor + */ + inline double get_chanz_cost_fac(const t_bb& bb) const { + int num_inter_dir_conn = acc_tile_num_inter_die_conn_.get_sum(bb.xmin, + bb.ymin, + bb.xmax, + bb.ymax); + + if (num_inter_dir_conn == 0) + return 1.0; + + int bb_num_tiles = (bb.xmax - bb.xmin + 1) * (bb.ymax - bb.ymin + 1); + return bb_num_tiles / static_cast(num_inter_dir_conn); + } + + private: + /** + * @brief Matrices below are used to precompute the inverse of the average + * number of tracks per channel between [subhigh] and [sublow]. Access + * them as chan?_place_cost_fac(subhigh, sublow). They are used to + * speed up the computation of the cost function that takes the length + * of the net bounding box in each dimension, divided by the average + * number of tracks in that direction; for other cost functions they + * will never be used. + */ + vtr::PrefixSum1D acc_chanx_width_; // [0..grid_width-1] + vtr::PrefixSum1D acc_chany_width_; // [0..grid_height-1] + + /** + * @brief The matrix below is used to calculate a chanz_place_cost_fac based on the average channel width in + * the cross-die-layer direction over a 2D (x,y) region. We don't assume the inter-die connectivity is the same at all (x,y) locations, so we + * can't compute the full chanz_place_cost_fac for all possible (xlow,ylow)(xhigh,yhigh) without a 4D array, which would + * be too big: O(n^2) in circuit size. Instead we compute a prefix sum that stores the number of inter-die connections per layer from + * (x=0,y=0) to (x,y). Given this, we can compute the average number of inter-die connections over a (xlow,ylow) to (xhigh,yhigh) + * region in O(1) (by adding and subtracting 4 entries) + */ + vtr::PrefixSum2D acc_tile_num_inter_die_conn_; // [0..grid_width-1][0..grid_height-1] +}; diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 012e4a8e281..227bd259436 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -90,7 +90,11 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, bool cube_bb) : cube_bb_(cube_bb) , placer_state_(placer_state) - , placer_opts_(placer_opts) { + , placer_opts_(placer_opts) + , chan_cost_handler_(g_vpr_ctx.device().rr_chanx_width, + g_vpr_ctx.device().rr_chany_width, + g_vpr_ctx.device().rr_graph, + g_vpr_ctx.device().grid) { const int num_layers = g_vpr_ctx.device().grid.get_num_layers(); const size_t num_nets = g_vpr_ctx.clustering().clb_nlist.nets().size(); @@ -133,99 +137,6 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't * been recomputed. */ bb_update_status_.resize(num_nets, NetUpdateState::NOT_UPDATED_YET); - - alloc_and_load_chan_w_factors_for_place_cost_(); -} - -void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() { - const auto& device_ctx = g_vpr_ctx.device(); - - const size_t grid_height = device_ctx.grid.height(); - const size_t grid_width = device_ctx.grid.width(); - - // These arrays contain accumulative channel width between channel zero and - // the channel specified by the given index. The accumulated channel width - // is inclusive, meaning that it includes both channel zero and channel `idx`. - // To compute the total channel width between channels 'low' and 'high', use the - // following formula: - // acc_chan?_width_[high] - acc_chan?_width_[low - 1] - // This returns the total number of tracks between channels 'low' and 'high', - // including tracks in these channels. - acc_chanx_width_ = vtr::PrefixSum1D(grid_height, [&](size_t y) noexcept { - int chan_x_width = device_ctx.rr_chanx_width[y]; - - // If the number of tracks in a channel is zero, two consecutive elements take the same - // value. This can lead to a division by zero in get_chanxy_cost_fac_(). To avoid this - // potential issue, we assume that the channel width is at least 1. - if (chan_x_width == 0) { - return 1; - } - - return chan_x_width; - }); - - acc_chany_width_ = vtr::PrefixSum1D(grid_width, [&](size_t x) noexcept { - int chan_y_width = device_ctx.rr_chany_width[x]; - - // to avoid a division by zero - if (chan_y_width == 0) { - return 1; - } - - return chan_y_width; - }); - - if (is_multi_layer_) { - alloc_and_load_for_fast_vertical_cost_update_(); - } -} - -void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_() { - const auto& device_ctx = g_vpr_ctx.device(); - const auto& rr_graph = device_ctx.rr_graph; - - const size_t grid_height = device_ctx.grid.height(); - const size_t grid_width = device_ctx.grid.width(); - - vtr::NdMatrix tile_num_inter_die_conn({grid_width, grid_height}, 0.); - - /* - * Step 1: iterate over the rr-graph, recording how many edges go between layers at each (x,y) location - * in the device. We count all these edges, regardless of which layers they connect. Then we divide by - * the number of layers - 1 to get the average cross-layer edge count per (x,y) location -- this mirrors - * what we do for the horizontal and vertical channels where we assume the channel width doesn't change - * along the length of the channel. It lets us be more memory-efficient for 3D devices, and could be revisited - * if someday we have architectures with widely varying connectivity between different layers in a stack. - */ - - /* To calculate the accumulative number of inter-die connections we first need to get the number of - * inter-die connection per location. To be able to work for the cases that RR Graph is read instead - * of being made from the architecture file, we calculate this number by iterating over the RR graph. Once - * tile_num_inter_die_conn is populated, we can start populating acc_tile_num_inter_die_conn_. - */ - - for (const RRNodeId node : rr_graph.nodes()) { - if (rr_graph.node_type(node) == e_rr_type::CHANZ) { - int x = rr_graph.node_xlow(node); - int y = rr_graph.node_ylow(node); - VTR_ASSERT_SAFE(x == rr_graph.node_xhigh(node) && y == rr_graph.node_yhigh(node)); - tile_num_inter_die_conn[x][y]++; - } - } - - int num_layers = device_ctx.grid.get_num_layers(); - for (size_t x = 0; x < device_ctx.grid.width(); x++) { - for (size_t y = 0; y < device_ctx.grid.height(); y++) { - tile_num_inter_die_conn[x][y] /= (num_layers - 1); - } - } - - // Step 2: Calculate prefix sum of the inter-die connectivity up to and including the channel at (x, y). - acc_tile_num_inter_die_conn_ = vtr::PrefixSum2D(grid_width, - grid_height, - [&](size_t x, size_t y) { - return (int)tile_num_inter_die_conn[x][y]; - }); } std::pair NetCostHandler::comp_bb_cost(e_cost_methods method) const { @@ -1336,12 +1247,13 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) { * chan?_place_cost_fac_ objects can handle -1 indices internally. */ - double ncost; - const auto [chanx_cost_fac, chany_cost_fac] = get_chanxy_cost_fac_(bb); - ncost = (bb.xmax - bb.xmin + 1) * chanx_cost_fac; + double chanx_cost_fac = chan_cost_handler_.get_chanx_cost_fac(bb); + double chany_cost_fac = chan_cost_handler_.get_chany_cost_fac(bb); + double ncost = (bb.xmax - bb.xmin + 1) * chanx_cost_fac; ncost += (bb.ymax - bb.ymin + 1) * chany_cost_fac; if (is_multi_layer_) { - ncost += (bb.layer_max - bb.layer_min) * get_chanz_cost_factor_(bb); + double chanz_cost_fac = chan_cost_handler_.get_chanz_cost_fac(bb); + ncost += (bb.layer_max - bb.layer_min) * chanz_cost_fac; } ncost *= crossing; @@ -1381,7 +1293,8 @@ double NetCostHandler::get_net_per_layer_bb_cost_(ClusterNetId net_id, bool use_ * chan?_place_cost_fac_ objects can handle -1 indices internally. */ - const auto [chanx_cost_fac, chany_cost_fac] = get_chanxy_cost_fac_(bb[layer_num]); + double chanx_cost_fac = chan_cost_handler_.get_chanx_cost_fac(bb[layer_num]); + double chany_cost_fac = chan_cost_handler_.get_chany_cost_fac(bb[layer_num]); ncost += (bb[layer_num].xmax - bb[layer_num].xmin + 1) * chanx_cost_fac; ncost += (bb[layer_num].ymax - bb[layer_num].ymin + 1) * chany_cost_fac; ncost *= crossing; @@ -1442,23 +1355,6 @@ double NetCostHandler::get_net_wirelength_from_layer_bb_(ClusterNetId net_id) co return ncost; } -float NetCostHandler::get_chanz_cost_factor_(const t_bb& bb) { - int num_inter_dir_conn = acc_tile_num_inter_die_conn_.get_sum(bb.xmin, - bb.ymin, - bb.xmax, - bb.ymax); - - float z_cost_factor; - if (num_inter_dir_conn == 0) { - return 1.0f; - } else { - int bb_num_tiles = (bb.xmax - bb.xmin + 1) * (bb.ymax - bb.ymin + 1); - z_cost_factor = bb_num_tiles / static_cast(num_inter_dir_conn); - } - - return z_cost_factor; -} - double NetCostHandler::recompute_bb_cost_() { double cost = 0; diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index a87abd108fc..c306378a445 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -5,10 +5,10 @@ * For more details on the overall algorithm, refer to the comment at the top of the net_cost_handler.cpp */ +#include "chan_cost_handler.h" #include "place_delay_model.h" #include "move_transactions.h" #include "place_util.h" -#include "vtr_prefix_sum.h" #include @@ -232,27 +232,8 @@ class NetCostHandler { vtr::vector proposed_net_cost_; vtr::vector bb_update_status_; - /** - * @brief Matrices below are used to precompute the inverse of the average - * number of tracks per channel between [subhigh] and [sublow]. Access - * them as chan?_place_cost_fac(subhigh, sublow). They are used to - * speed up the computation of the cost function that takes the length - * of the net bounding box in each dimension, divided by the average - * number of tracks in that direction; for other cost functions they - * will never be used. - */ - vtr::PrefixSum1D acc_chanx_width_; // [0..device_ctx.grid.width()-1] - vtr::PrefixSum1D acc_chany_width_; // [0..device_ctx.grid.height()-1] - - /** - * @brief The matrix below is used to calculate a chanz_place_cost_fac based on the average channel width in - * the cross-die-layer direction over a 2D (x,y) region. We don't assume the inter-die connectivity is the same at all (x,y) locations, so we - * can't compute the full chanz_place_cost_fac for all possible (xlow,ylow)(xhigh,yhigh) without a 4D array, which would - * be too big: O(n^2) in circuit size. Instead we compute a prefix sum that stores the number of inter-die connections per layer from - * (x=0,y=0) to (x,y). Given this, we can compute the average number of inter-die connections over a (xlow,ylow) to (xhigh,yhigh) - * region in O(1) (by adding and subtracting 4 entries) - */ - vtr::PrefixSum2D acc_tile_num_inter_die_conn_; // [0..grid_width-1][0..grid_height-1] + // @brief Handler object that manages the cost factors in the x, y, and z dimensions. + ChanCostHandler chan_cost_handler_; private: /** @@ -291,25 +272,6 @@ class NetCostHandler { */ void set_bb_delta_cost_(double& bb_delta_c); - /** - * @brief Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac arrays with the inverse of - * the average number of tracks per channel between [subhigh] and [sublow]. - * - * @details This is only useful for the cost function that takes the length of the net bounding box in each - * dimension divided by the average number of tracks in that direction. For other cost functions, you don't - * have to bother calling this routine; when using the cost function described above, however, you must always - * call this routine before you do any placement cost determination. - */ - void alloc_and_load_chan_w_factors_for_place_cost_(); - - /** - * @brief Allocates and loads acc_tile_num_inter_die_conn_ which contains the accumulative number of inter-die - * connections. - * - * @details This is only useful for multi-die FPGAs. - */ - void alloc_and_load_for_fast_vertical_cost_update_(); - /** * @brief Calculate the new connection delay and timing cost of all the * sink pins affected by moving a specific pin to a new location. Also @@ -544,39 +506,6 @@ class NetCostHandler { */ double get_net_per_layer_bb_cost_(ClusterNetId net_id, bool use_ts); - /** - * @brief Computes the inverse of average channel width for horizontal and - * vertical channels within a bounding box. - * @tparam BBT This can be either t_bb or t_2D_bb. - * @param bb The bounding box for which the inverse of average channel width - * within the bounding box is computed. - * @return std::pair - * first -> The inverse of average channel width for horizontal channels. - * second -> The inverse of average channel width for vertical channels. - */ - template - std::pair get_chanxy_cost_fac_(const BBT& bb) { - const int total_chanx_width = acc_chanx_width_.get_sum(bb.ymin, bb.ymax); - const double inverse_average_chanx_width = (bb.ymax - bb.ymin + 1.0) / total_chanx_width; - - const int total_chany_width = acc_chany_width_.get_sum(bb.xmin, bb.xmax); - const double inverse_average_chany_width = (bb.xmax - bb.xmin + 1.0) / total_chany_width; - - return {inverse_average_chanx_width, inverse_average_chany_width}; - } - - /** - * @brief Calculate the chanz cost factor based on the inverse of the average number of inter-die connections - * in the given bounding box. This cost factor increases the placement cost for blocks that require inter-layer - * connections in areas with, on average, fewer inter-die connections. If inter-die connections are evenly - * distributed across tiles, the cost factor will be the same for all bounding boxes, but it will still - * weight z-directed vs. x- and y-directed connections appropriately. - * - * @param bb Bounding box of the net which chanz cost factor is to be calculated - * @return ChanZ cost factor - */ - float get_chanz_cost_factor_(const t_bb& bb); - /** * @brief Given the 3D BB, calculate the wire-length estimate of the net * @param net_id ID of the net which wirelength estimate is requested From de9334e21a0ff5ef5819ad0858c2587f1de48ae3 Mon Sep 17 00:00:00 2001 From: AlexandreSinger Date: Fri, 19 Dec 2025 16:23:47 -0500 Subject: [PATCH 5/9] [AP] Added Per-Dimension Channel Cost Based on Routing Demand --- .../analytical_place/analytical_solver.cpp | 56 +++++++++++++------ vpr/src/analytical_place/analytical_solver.h | 21 ++++--- 2 files changed, 48 insertions(+), 29 deletions(-) diff --git a/vpr/src/analytical_place/analytical_solver.cpp b/vpr/src/analytical_place/analytical_solver.cpp index fbc70172e15..3ca9e9d3b1c 100644 --- a/vpr/src/analytical_place/analytical_solver.cpp +++ b/vpr/src/analytical_place/analytical_solver.cpp @@ -1190,6 +1190,33 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement, unsigned itera // Find the bounding blocks APNetBounds net_bounds = get_unique_net_bounds(net_id, p_placement, netlist_); + // Compute the channel cost factors due to routing damand. + double chanx_cost_fac = 1.0; + double chany_cost_fac = 1.0; + double chanz_cost_fac = 1.0; + if (iteration != 0) { + // Create a bounding box around the net using the net bounds. This is + // use to get the per-dimension cost terms. + // TODO: Investigate using the legalized solution from the prior iteration. + t_bb net_bb; + net_bb.xmin = p_placement.block_x_locs[net_bounds.min_x_blk]; + net_bb.xmax = p_placement.block_x_locs[net_bounds.max_x_blk]; + net_bb.ymin = p_placement.block_y_locs[net_bounds.min_y_blk]; + net_bb.ymax = p_placement.block_y_locs[net_bounds.max_y_blk]; + net_bb.layer_min = p_placement.block_layer_nums[net_bounds.min_z_blk]; + net_bb.layer_min = p_placement.block_layer_nums[net_bounds.max_z_blk]; + + chanx_cost_fac = chan_cost_handler_.get_chanx_cost_fac(net_bb); + chany_cost_fac = chan_cost_handler_.get_chany_cost_fac(net_bb); + if (is_multi_die()) + chanz_cost_fac = chan_cost_handler_.get_chanz_cost_fac(net_bb); + } + + // Get the per-dimension, wirelength net weights. + double wl_net_w_x = wl_net_w * chanx_cost_fac; + double wl_net_w_y = wl_net_w * chany_cost_fac; + double wl_net_w_z = wl_net_w * chanz_cost_fac; + // Add an edge from every block to their bounds (ignoring the bounds // themselves for now). // FIXME: If one block has multiple pins, it may connect to the bounds @@ -1197,32 +1224,25 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement, unsigned itera for (APPinId pin_id : netlist_.net_pins(net_id)) { APBlockId blk_id = netlist_.pin_block(pin_id); if (blk_id != net_bounds.max_x_blk && blk_id != net_bounds.min_x_blk) { - add_connection_to_system(blk_id, net_bounds.max_x_blk, num_pins, wl_net_w, p_placement.block_x_locs, triplet_list_x, b_x); - add_connection_to_system(blk_id, net_bounds.min_x_blk, num_pins, wl_net_w, p_placement.block_x_locs, triplet_list_x, b_x); + add_connection_to_system(blk_id, net_bounds.max_x_blk, num_pins, wl_net_w_x, p_placement.block_x_locs, triplet_list_x, b_x); + add_connection_to_system(blk_id, net_bounds.min_x_blk, num_pins, wl_net_w_x, p_placement.block_x_locs, triplet_list_x, b_x); } if (blk_id != net_bounds.max_y_blk && blk_id != net_bounds.min_y_blk) { - add_connection_to_system(blk_id, net_bounds.max_y_blk, num_pins, wl_net_w, p_placement.block_y_locs, triplet_list_y, b_y); - add_connection_to_system(blk_id, net_bounds.min_y_blk, num_pins, wl_net_w, p_placement.block_y_locs, triplet_list_y, b_y); + add_connection_to_system(blk_id, net_bounds.max_y_blk, num_pins, wl_net_w_y, p_placement.block_y_locs, triplet_list_y, b_y); + add_connection_to_system(blk_id, net_bounds.min_y_blk, num_pins, wl_net_w_y, p_placement.block_y_locs, triplet_list_y, b_y); } if (is_multi_die() && blk_id != net_bounds.max_z_blk && blk_id != net_bounds.min_z_blk) { - // For multi-die FPGAs, we apply extra weight in the layer dimension - // since moving between layers tends to cost more wiring than moving - // within the same layer. - double multidie_net_w = wl_net_w * layer_distance_cost_fac_; - add_connection_to_system(blk_id, net_bounds.max_z_blk, num_pins, multidie_net_w, p_placement.block_layer_nums, triplet_list_z, b_z); - add_connection_to_system(blk_id, net_bounds.min_z_blk, num_pins, multidie_net_w, p_placement.block_layer_nums, triplet_list_z, b_z); + add_connection_to_system(blk_id, net_bounds.max_z_blk, num_pins, wl_net_w_z, p_placement.block_layer_nums, triplet_list_z, b_z); + add_connection_to_system(blk_id, net_bounds.min_z_blk, num_pins, wl_net_w_z, p_placement.block_layer_nums, triplet_list_z, b_z); } } // Connect the bounds to each other. Its just easier to put these here // instead of in the for loop above. - add_connection_to_system(net_bounds.max_x_blk, net_bounds.min_x_blk, num_pins, wl_net_w, p_placement.block_x_locs, triplet_list_x, b_x); - add_connection_to_system(net_bounds.max_y_blk, net_bounds.min_y_blk, num_pins, wl_net_w, p_placement.block_y_locs, triplet_list_y, b_y); + add_connection_to_system(net_bounds.max_x_blk, net_bounds.min_x_blk, num_pins, wl_net_w_x, p_placement.block_x_locs, triplet_list_x, b_x); + add_connection_to_system(net_bounds.max_y_blk, net_bounds.min_y_blk, num_pins, wl_net_w_y, p_placement.block_y_locs, triplet_list_y, b_y); if (is_multi_die()) { - // See comment above. For multi-die FPGAs, we apply an extra factor - // to the cost. - double multidie_net_w = wl_net_w * layer_distance_cost_fac_; - add_connection_to_system(net_bounds.max_z_blk, net_bounds.min_z_blk, num_pins, multidie_net_w, p_placement.block_layer_nums, triplet_list_z, b_z); + add_connection_to_system(net_bounds.max_z_blk, net_bounds.min_z_blk, num_pins, wl_net_w_z, p_placement.block_layer_nums, triplet_list_z, b_z); } // ==================================================================== @@ -1283,11 +1303,11 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement, unsigned itera double timing_net_w = ap_timing_tradeoff_ * net_weights_[net_id] * timing_slope_fac_ * (1.0 + crit); add_connection_to_system(driver_blk, sink_blk, - 2 /*num_pins*/, timing_net_w * d_delay_x * delay_x_norm, + 2 /*num_pins*/, timing_net_w * d_delay_x * delay_x_norm * chanx_cost_fac, p_placement.block_x_locs, triplet_list_x, b_x); add_connection_to_system(driver_blk, sink_blk, - 2 /*num_pins*/, timing_net_w * d_delay_y * delay_y_norm, + 2 /*num_pins*/, timing_net_w * d_delay_y * delay_y_norm * chany_cost_fac, p_placement.block_y_locs, triplet_list_y, b_y); } } diff --git a/vpr/src/analytical_place/analytical_solver.h b/vpr/src/analytical_place/analytical_solver.h index 7244dd43d7c..3a2c733e2d7 100644 --- a/vpr/src/analytical_place/analytical_solver.h +++ b/vpr/src/analytical_place/analytical_solver.h @@ -10,7 +10,9 @@ #include #include "ap_flow_enums.h" #include "ap_netlist.h" +#include "chan_cost_handler.h" #include "device_grid.h" +#include "globals.h" #include "place_delay_model.h" #include "vtr_strong_id.h" #include "vtr_vector.h" @@ -531,16 +533,6 @@ class B2BSolver : public AnalyticalSolver { /// number, the solver will focus more on timing and less on wirelength. static constexpr double timing_slope_fac_ = 0.75; - /// @brief For most FPGA architectures, the cost of moving horizontally is - /// equivalent to the cost moving vertically (i.e. moving in increasing - /// x-dimension has the same cost as moving the same amount in the - /// y-dimension). However, for 3D FPGAs, moving between layers is - /// much more expensive than moving in the x or y dimension. We account - /// for this by adding a cost penalty factor to the "z"-dimension. - /// TODO: This cost factor was randomly selected because it felt ok. Should - /// choose a better factor that is chosen empirically. - static constexpr double layer_distance_cost_fac_ = 10.0; - public: B2BSolver(const APNetlist& ap_netlist, const DeviceGrid& device_grid, @@ -555,7 +547,11 @@ class B2BSolver : public AnalyticalSolver { ap_timing_tradeoff, log_verbosity) , pre_cluster_timing_manager_(pre_cluster_timing_manager) - , place_delay_model_(place_delay_model) {} + , place_delay_model_(place_delay_model) + , chan_cost_handler_(g_vpr_ctx.device().rr_chanx_width, + g_vpr_ctx.device().rr_chany_width, + g_vpr_ctx.device().rr_graph, + device_grid) {} /** * @brief Perform an iteration of the B2B solver, storing the result into @@ -804,6 +800,9 @@ class B2BSolver : public AnalyticalSolver { /// @brief The place delay model used for calculating the delay between /// two tiles on the FPGA. Used for computing the timing terms. std::shared_ptr place_delay_model_; + + /// @brief Manager class for getting the cost factors in the x, y, and z dimensions. + ChanCostHandler chan_cost_handler_; }; #endif // EIGEN_INSTALLED From f2054f2de382a243a8f2edd2836efa6ba9762dbe Mon Sep 17 00:00:00 2001 From: AlexandreSinger Date: Fri, 19 Dec 2025 18:05:00 -0500 Subject: [PATCH 6/9] [AP][Solver] Normalized the Channel Factors --- .../analytical_place/analytical_solver.cpp | 53 ++++++++++++------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/vpr/src/analytical_place/analytical_solver.cpp b/vpr/src/analytical_place/analytical_solver.cpp index 3ca9e9d3b1c..12a0ac8224b 100644 --- a/vpr/src/analytical_place/analytical_solver.cpp +++ b/vpr/src/analytical_place/analytical_solver.cpp @@ -19,6 +19,7 @@ #include "atom_netlist_fwd.h" #include "device_grid.h" #include "flat_placement_types.h" +#include "net_cost_handler.h" #include "partial_placement.h" #include "ap_netlist.h" #include "place_delay_model.h" @@ -1174,6 +1175,22 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement, unsigned itera triplet_list_z.resize(total_num_pins_in_netlist); } + // Compute a normalization term for the per-dimension channel factors. This + // is simply the inverse of the average of the per-dimension channel factors + // for a bounding box the size of the device. This resolves to being the + // average channel width of the entire device. + // TODO: This can be moved to the constructor. + t_bb device_bb; + device_bb.xmin = 0; + device_bb.xmax = device_grid_width_ - 1; + device_bb.ymin = 0; + device_bb.ymax = device_grid_height_ - 1; + device_bb.layer_min = 0; + device_bb.layer_max = device_grid_num_layers_ - 1; + double chan_fac_norm_x = 1.0 / chan_cost_handler_.get_chanx_cost_fac(device_bb); + double chan_fac_norm_y = 1.0 / chan_cost_handler_.get_chany_cost_fac(device_bb); + double chan_fac_norm = (chan_fac_norm_x + chan_fac_norm_y) / 2.0; + for (APNetId net_id : netlist_.nets()) { if (netlist_.net_is_ignored(net_id)) continue; @@ -1185,32 +1202,28 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement, unsigned itera // ==================================================================== // In the objective there is are wirelength connections and timing // connections, trade-off between the weight of each type of connection. - double wl_net_w = (1.0f - ap_timing_tradeoff_) * net_weights_[net_id]; + double wl_net_w = (1.0f - ap_timing_tradeoff_) * net_weights_[net_id] * wirelength_crossing_count(num_pins); // Find the bounding blocks APNetBounds net_bounds = get_unique_net_bounds(net_id, p_placement, netlist_); + // Create a bounding box around the net using the net bounds. This is + // used to get the per-dimension cost terms. + // TODO: Investigate using the legalized solution from the prior iteration. + t_bb net_bb; + net_bb.xmin = p_placement.block_x_locs[net_bounds.min_x_blk]; + net_bb.xmax = p_placement.block_x_locs[net_bounds.max_x_blk]; + net_bb.ymin = p_placement.block_y_locs[net_bounds.min_y_blk]; + net_bb.ymax = p_placement.block_y_locs[net_bounds.max_y_blk]; + net_bb.layer_min = p_placement.block_layer_nums[net_bounds.min_z_blk]; + net_bb.layer_min = p_placement.block_layer_nums[net_bounds.max_z_blk]; + // Compute the channel cost factors due to routing damand. - double chanx_cost_fac = 1.0; - double chany_cost_fac = 1.0; + double chanx_cost_fac = chan_cost_handler_.get_chanx_cost_fac(net_bb) * chan_fac_norm; + double chany_cost_fac = chan_cost_handler_.get_chany_cost_fac(net_bb) * chan_fac_norm; double chanz_cost_fac = 1.0; - if (iteration != 0) { - // Create a bounding box around the net using the net bounds. This is - // use to get the per-dimension cost terms. - // TODO: Investigate using the legalized solution from the prior iteration. - t_bb net_bb; - net_bb.xmin = p_placement.block_x_locs[net_bounds.min_x_blk]; - net_bb.xmax = p_placement.block_x_locs[net_bounds.max_x_blk]; - net_bb.ymin = p_placement.block_y_locs[net_bounds.min_y_blk]; - net_bb.ymax = p_placement.block_y_locs[net_bounds.max_y_blk]; - net_bb.layer_min = p_placement.block_layer_nums[net_bounds.min_z_blk]; - net_bb.layer_min = p_placement.block_layer_nums[net_bounds.max_z_blk]; - - chanx_cost_fac = chan_cost_handler_.get_chanx_cost_fac(net_bb); - chany_cost_fac = chan_cost_handler_.get_chany_cost_fac(net_bb); - if (is_multi_die()) - chanz_cost_fac = chan_cost_handler_.get_chanz_cost_fac(net_bb); - } + if (is_multi_die()) + chanz_cost_fac = chan_cost_handler_.get_chanz_cost_fac(net_bb) * chan_fac_norm; // Get the per-dimension, wirelength net weights. double wl_net_w_x = wl_net_w * chanx_cost_fac; From 69e29b66eaa8ab2ebaaf7a71e53af591303448a7 Mon Sep 17 00:00:00 2001 From: AlexandreSinger Date: Fri, 19 Dec 2025 18:51:54 -0500 Subject: [PATCH 7/9] [AP][Solver] Clamped the Chan Cost Factor BB --- vpr/src/analytical_place/analytical_solver.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/vpr/src/analytical_place/analytical_solver.cpp b/vpr/src/analytical_place/analytical_solver.cpp index 12a0ac8224b..1fc9aefd7fb 100644 --- a/vpr/src/analytical_place/analytical_solver.cpp +++ b/vpr/src/analytical_place/analytical_solver.cpp @@ -1211,12 +1211,15 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement, unsigned itera // used to get the per-dimension cost terms. // TODO: Investigate using the legalized solution from the prior iteration. t_bb net_bb; - net_bb.xmin = p_placement.block_x_locs[net_bounds.min_x_blk]; - net_bb.xmax = p_placement.block_x_locs[net_bounds.max_x_blk]; - net_bb.ymin = p_placement.block_y_locs[net_bounds.min_y_blk]; - net_bb.ymax = p_placement.block_y_locs[net_bounds.max_y_blk]; - net_bb.layer_min = p_placement.block_layer_nums[net_bounds.min_z_blk]; - net_bb.layer_min = p_placement.block_layer_nums[net_bounds.max_z_blk]; + net_bb.xmin = std::clamp(p_placement.block_x_locs[net_bounds.min_x_blk], 0, device_grid_width_ - 1); + net_bb.xmax = std::clamp(p_placement.block_x_locs[net_bounds.max_x_blk], 0, device_grid_width_ - 1); + net_bb.ymin = std::clamp(p_placement.block_y_locs[net_bounds.min_y_blk], 0, device_grid_height_ - 1); + net_bb.ymax = std::clamp(p_placement.block_y_locs[net_bounds.max_y_blk], 0, device_grid_height_ - 1); + net_bb.layer_min = std::clamp(p_placement.block_layer_nums[net_bounds.min_z_blk], 0, device_grid_num_layers_ - 1); + net_bb.layer_max = std::clamp(p_placement.block_layer_nums[net_bounds.max_z_blk], 0, device_grid_num_layers_ - 1); + VTR_ASSERT_SAFE(net_bb.xmin <= net_bb.xmax); + VTR_ASSERT_SAFE(net_bb.ymin <= net_bb.ymax); + VTR_ASSERT_SAFE(net_bb.layer_min <= net_bb.layer_max); // Compute the channel cost factors due to routing damand. double chanx_cost_fac = chan_cost_handler_.get_chanx_cost_fac(net_bb) * chan_fac_norm; From a06777bac90960c0283045e3986b6f19bfa70f8c Mon Sep 17 00:00:00 2001 From: AlexandreSinger Date: Fri, 19 Dec 2025 19:45:23 -0500 Subject: [PATCH 8/9] Updated Golden Results --- .../vtr_reg_strong/basic_ap/config/golden_results.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt index 010a8069213..cf0d52b4430 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/config/golden_results.txt @@ -1,5 +1,5 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time initial_placed_CPD_est placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time - fixed_k6_frac_N8_22nm.xml single_wire.v common 1.28 vpr 81.23 MiB -1 -1 0.06 28072 1 0.01 -1 -1 33176 -1 -1 0 1 0 0 success v8.0.0-13568-gf1bde671c release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-10T11:21:08 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong 83180 1 1 0 2 0 1 2 17 17 289 -1 unnamed_device -1 -1 2 2 3 0 0 3 81.2 MiB 0.46 0.00 0.2714 0.2714 -0.2714 -0.2714 nan 0.00 7.854e-06 5.036e-06 6.1826e-05 4.2744e-05 81.2 MiB 0.46 81.2 MiB 0.07 8 18 1 6.79088e+06 0 166176. 575.005 0.14 0.00152934 0.00118657 20206 45088 -1 18 1 1 1 110 40 0.7726 nan -0.7726 -0.7726 0 0 202963. 702.294 0.01 0.00 0.04 -1 -1 0.01 0.00148055 0.00115173 - fixed_k6_frac_N8_22nm.xml single_ff.v common 1.44 vpr 81.41 MiB -1 -1 0.06 28204 1 0.02 -1 -1 33460 -1 -1 1 2 0 0 success v8.0.0-13568-gf1bde671c release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-10T11:21:08 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong 83364 2 1 2 3 1 3 4 17 17 289 -1 unnamed_device -1 -1 20 20 9 0 2 7 81.4 MiB 0.46 0.00 0.942216 0.942216 -1.43836 -0.942216 0.942216 0.00 1.1579e-05 8.109e-06 7.777e-05 5.6357e-05 81.4 MiB 0.46 81.4 MiB 0.07 20 36 1 6.79088e+06 13472 414966. 1435.87 0.24 0.000947028 0.000860897 22510 95286 -1 27 1 2 2 75 22 0.942216 0.942216 -1.31306 -0.942216 0 0 503264. 1741.40 0.03 0.00 0.08 -1 -1 0.03 0.000905843 0.000830609 - fixed_k6_frac_N8_22nm.xml ch_intrinsics.v common 2.71 vpr 81.18 MiB -1 -1 0.19 29612 3 0.07 -1 -1 37056 -1 -1 67 99 1 0 success v8.0.0-13568-gf1bde671c release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-10T11:21:08 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong 83128 99 130 239 229 1 230 297 17 17 289 -1 unnamed_device -1 -1 888.525 833 8217 1122 526 6569 81.2 MiB 0.62 0.00 1.6707 1.6707 -125.805 -1.6707 1.6707 0.00 0.000598826 0.000527051 0.00729429 0.00656048 81.2 MiB 0.62 81.2 MiB 0.12 32 2022 20 6.79088e+06 1.45062e+06 586450. 2029.24 0.84 0.153546 0.136612 24814 144142 -1 1719 13 609 998 66105 19764 2.0466 2.0466 -142.961 -2.0466 -0.16867 -0.16867 744469. 2576.02 0.04 0.04 0.12 -1 -1 0.04 0.0572559 0.0515521 - fixed_k6_frac_N8_22nm.xml diffeq1.v common 8.53 vpr 83.88 MiB -1 -1 0.25 33708 15 0.28 -1 -1 37808 -1 -1 60 162 0 5 success v8.0.0-13568-gf1bde671c release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-63-generic x86_64 2025-08-10T11:21:08 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong 85896 162 96 805 258 1 689 323 17 17 289 -1 unnamed_device -1 -1 6738.34 6654 2537 23 682 1832 83.9 MiB 1.17 0.01 20.6638 20.6211 -1599.88 -20.6211 20.6211 0.00 0.00209772 0.00184968 0.0141792 0.0132053 83.9 MiB 1.17 83.9 MiB 0.25 50 12799 32 6.79088e+06 2.78832e+06 902133. 3121.57 4.81 0.987018 0.898351 27982 213445 -1 11365 17 3437 7668 955734 254879 20.1934 20.1934 -1634.46 -20.1934 0 0 1.08113e+06 3740.92 0.05 0.25 0.19 -1 -1 0.05 0.275593 0.25395 + fixed_k6_frac_N8_22nm.xml single_wire.v common 1.26 vpr 81.65 MiB -1 -1 0.06 27316 1 0.01 -1 -1 33172 -1 -1 0 1 0 0 success v8.0.0-14549-gf2054f2de-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-79-generic x86_64 2025-12-19T18:51:24 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong 83608 1 1 0 2 0 1 2 17 17 289 -1 unnamed_device -1 -1 2 2 3 0 0 3 81.6 MiB 0.42 0.00 0.271506 0.271506 -0.271506 -0.271506 nan 0.00 1.3018e-05 8.476e-06 7.1367e-05 4.9579e-05 81.6 MiB 0.42 81.6 MiB 0.05 8 18 1 6.79088e+06 0 166176. 575.005 0.15 0.000951022 0.000865704 20206 45088 -1 18 1 1 1 110 40 0.7726 nan -0.7726 -0.7726 0 0 202963. 702.294 0.01 0.00 0.04 -1 -1 0.01 0.000925113 0.00086261 + fixed_k6_frac_N8_22nm.xml single_ff.v common 1.42 vpr 81.66 MiB -1 -1 0.06 27568 1 0.02 -1 -1 33680 -1 -1 1 2 0 0 success v8.0.0-14549-gf2054f2de-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-79-generic x86_64 2025-12-19T18:51:24 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong 83616 2 1 2 3 1 3 4 17 17 289 -1 unnamed_device -1 -1 20 20 9 0 2 7 81.7 MiB 0.41 0.00 0.942322 0.942322 -1.43857 -0.942322 0.942322 0.00 1.1848e-05 8.305e-06 8.3312e-05 6.1998e-05 81.7 MiB 0.41 81.7 MiB 0.05 20 36 1 6.79088e+06 13472 414966. 1435.87 0.28 0.00134032 0.00124132 22510 95286 -1 27 1 2 2 75 22 0.942216 0.942216 -1.31306 -0.942216 0 0 503264. 1741.40 0.03 0.00 0.08 -1 -1 0.03 0.000959865 0.000884526 + fixed_k6_frac_N8_22nm.xml ch_intrinsics.v common 2.43 vpr 82.34 MiB -1 -1 0.22 29492 3 0.07 -1 -1 36928 -1 -1 67 99 1 0 success v8.0.0-14549-gf2054f2de-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-79-generic x86_64 2025-12-19T18:51:24 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong 84320 99 130 239 229 1 227 297 17 17 289 -1 unnamed_device -1 -1 910.525 872 2277 256 239 1782 82.3 MiB 0.61 0.00 1.67081 1.67081 -126.373 -1.67081 1.67081 0.00 0.000605405 0.000531936 0.00371381 0.00341873 82.3 MiB 0.61 82.3 MiB 0.11 32 2005 30 6.79088e+06 1.45062e+06 586450. 2029.24 0.56 0.140082 0.124872 24814 144142 -1 1757 13 615 1048 67625 20224 2.0466 2.0466 -143.795 -2.0466 -0.04337 -0.04337 744469. 2576.02 0.04 0.04 0.11 -1 -1 0.04 0.0581488 0.0522428 + fixed_k6_frac_N8_22nm.xml diffeq1.v common 9.91 vpr 84.64 MiB -1 -1 0.28 33456 15 0.28 -1 -1 37940 -1 -1 45 162 0 5 success v8.0.0-14549-gf2054f2de-dirty release VTR_ASSERT_LEVEL=3 GNU 13.3.0 on Linux-6.8.0-79-generic x86_64 2025-12-19T18:51:24 srivatsan-Precision-Tower-5810 /home/alex/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong 86672 162 96 805 258 1 663 308 17 17 289 -1 unnamed_device -1 -1 6953.87 6362 25244 343 6897 18004 84.6 MiB 1.34 0.01 21.7264 21.1502 -1597.5 -21.1502 21.1502 0.00 0.00203318 0.00178736 0.0619178 0.0552945 84.6 MiB 1.34 84.6 MiB 0.22 58 11855 39 6.79088e+06 2.58624e+06 997811. 3452.63 5.65 1.19073 1.07675 29710 251250 -1 10504 26 3184 6817 1034862 316168 20.6154 20.6154 -1488.55 -20.6154 0 0 1.25153e+06 4330.55 0.07 0.33 0.21 -1 -1 0.07 0.346336 0.31634 From 17635d73f7c83938e424806bd098567b7b9a780a Mon Sep 17 00:00:00 2001 From: AlexandreSinger Date: Fri, 19 Dec 2025 23:28:35 -0500 Subject: [PATCH 9/9] Resolved Assorted PR Comments --- vpr/src/analytical_place/analytical_solver.cpp | 9 +++++---- vpr/src/place/chan_cost_handler.cpp | 3 ++- vpr/src/place/chan_cost_handler.h | 2 +- .../basic_3d_ap/constraints/mm9a_io_constraint.xml | 4 ++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/vpr/src/analytical_place/analytical_solver.cpp b/vpr/src/analytical_place/analytical_solver.cpp index 1fc9aefd7fb..b9243dd7e9c 100644 --- a/vpr/src/analytical_place/analytical_solver.cpp +++ b/vpr/src/analytical_place/analytical_solver.cpp @@ -659,17 +659,18 @@ void B2BSolver::initialize_placement_least_dense(PartialPlacement& p_placement) size_t rows = std::ceil(device_grid_height_ / gap); // Spread the blocks at these grid coordinates. + size_t current_row_idx = 0; for (size_t d = 0; d < device_grid_num_layers_; d++) { for (size_t r = 0; r <= rows; r++) { for (size_t c = 0; c <= cols; c++) { - size_t i = r * cols + c; - if (i >= num_moveable_blocks_) + if (current_row_idx >= num_moveable_blocks_) break; - APRowId row_id = APRowId(i); + APRowId row_id = APRowId(current_row_idx); APBlockId blk_id = row_id_to_blk_id_[row_id]; p_placement.block_x_locs[blk_id] = c * gap; p_placement.block_y_locs[blk_id] = r * gap; p_placement.block_layer_nums[blk_id] = d; + current_row_idx++; } } } @@ -1172,7 +1173,7 @@ void B2BSolver::init_linear_system(PartialPlacement& p_placement, unsigned itera triplet_list_y.reserve(total_num_pins_in_netlist); std::vector> triplet_list_z; if (is_multi_die()) { - triplet_list_z.resize(total_num_pins_in_netlist); + triplet_list_z.reserve(total_num_pins_in_netlist); } // Compute a normalization term for the per-dimension channel factors. This diff --git a/vpr/src/place/chan_cost_handler.cpp b/vpr/src/place/chan_cost_handler.cpp index 68edd5d3e4e..61e31ab53d6 100644 --- a/vpr/src/place/chan_cost_handler.cpp +++ b/vpr/src/place/chan_cost_handler.cpp @@ -6,6 +6,7 @@ */ #include "chan_cost_handler.h" +#include #include "rr_graph_view.h" ChanCostHandler::ChanCostHandler(const std::vector& rr_chanx_width, @@ -84,7 +85,7 @@ ChanCostHandler::ChanCostHandler(const std::vector& rr_chanx_width, acc_tile_num_inter_die_conn_ = vtr::PrefixSum2D(grid.width(), grid.height(), [&](size_t x, size_t y) { - return (int)tile_num_inter_die_conn[x][y]; + return static_cast(std::round(tile_num_inter_die_conn[x][y])); }); } } diff --git a/vpr/src/place/chan_cost_handler.h b/vpr/src/place/chan_cost_handler.h index 2755015f5a6..bca57705812 100644 --- a/vpr/src/place/chan_cost_handler.h +++ b/vpr/src/place/chan_cost_handler.h @@ -96,7 +96,7 @@ class ChanCostHandler { return 1.0; int bb_num_tiles = (bb.xmax - bb.xmin + 1) * (bb.ymax - bb.ymin + 1); - return bb_num_tiles / static_cast(num_inter_dir_conn); + return static_cast(bb_num_tiles) / num_inter_dir_conn; } private: diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap/constraints/mm9a_io_constraint.xml b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap/constraints/mm9a_io_constraint.xml index 69caad14394..653f170b11e 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap/constraints/mm9a_io_constraint.xml +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_ap/basic_3d_ap/constraints/mm9a_io_constraint.xml @@ -58,8 +58,8 @@ - - + +