1+
2+ #include " initial_noc_placment.h"
3+ #include " initial_placement.h"
4+ #include " noc_place_utils.h"
5+ #include " noc_place_checkpoint.h"
6+
7+ /* *
8+ * @brief Evaluates whether a NoC router swap should be accepted or not.
9+ * If delta cost is non-positive, the move is always accepted. If the cost
10+ * has increased, the probability of accepting the move is prob.
11+ *
12+ * @param delta_cost Specifies how much the total cost would change if
13+ * the proposed swap is accepted.
14+ * @param prob The probability by which a router swap that increases
15+ * the cost is accepted. The passed value should be in range [0, 1].
16+ *
17+ * @return true if the proposed swap is accepted, false if not.
18+ */
19+ static bool accept_noc_swap (double delta_cost, double prob);
20+
21+ /* *
22+ * @brief Places a constrained NoC router within its partition region.
23+ *
24+ * @param router_blk_id NoC router cluster block ID
25+ */
26+ static void place_constrained_noc_router (ClusterBlockId router_blk_id);
27+
28+ /* *
29+ * @brief Randomly places unconstrained NoC routers.
30+ *
31+ * @param unfixed_routers Contains the cluster block ID for all unconstrained
32+ * NoC routers.
33+ * @param seed Used for shuffling NoC routers.
34+ */
35+ static void place_noc_routers_randomly (std::vector<ClusterBlockId>& unfixed_routers, int seed);
36+
37+ /* *
38+ * @brief Runs a simulated annealing optimizer for NoC routers.
39+ *
40+ * @param noc_opts Contains weighting factors for NoC cost terms.
41+ */
42+ static void noc_routers_anneal (const t_noc_opts& noc_opts);
43+
44+ static bool accept_noc_swap (double delta_cost, double prob) {
45+ if (delta_cost <= 0.0 ) {
46+ return true ;
47+ }
48+
49+ if (prob == 0.0 ) {
50+ return false ;
51+ }
52+
53+ float random_num = vtr::frand ();
54+ if (random_num < prob) {
55+ return true ;
56+ } else {
57+ return false ;
58+ }
59+ }
60+
61+ static void place_constrained_noc_router (ClusterBlockId router_blk_id)
62+ {
63+ auto & cluster_ctx = g_vpr_ctx.clustering ();
64+ const auto & floorplanning_ctx = g_vpr_ctx.floorplanning ();
65+
66+ auto block_type = cluster_ctx.clb_nlist .block_type (router_blk_id);
67+ const PartitionRegion& pr = floorplanning_ctx.cluster_constraints [router_blk_id];
68+
69+ // Create a macro with a single member
70+ t_pl_macro_member macro_member;
71+ macro_member.blk_index = router_blk_id;
72+ macro_member.offset = t_pl_offset (0 , 0 , 0 );
73+ t_pl_macro pl_macro;
74+ pl_macro.members .push_back (macro_member);
75+
76+ bool macro_placed = false ;
77+ for (int i_try = 0 ; i_try < MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY && !macro_placed; i_try++) {
78+ macro_placed = try_place_macro_randomly (pl_macro, pr, block_type, FREE);
79+ }
80+
81+ if (!macro_placed) {
82+ macro_placed = try_place_macro_exhaustively (pl_macro, pr, block_type, FREE);
83+ }
84+
85+ if (!macro_placed) {
86+ VPR_FATAL_ERROR (VPR_ERROR_PLACE, " Could not place a router cluster within its constrained region" );
87+ }
88+ }
89+
90+ static void place_noc_routers_randomly (std::vector<ClusterBlockId>& unfixed_routers, int seed)
91+ {
92+ auto & place_ctx = g_vpr_ctx.placement ();
93+ auto & noc_ctx = g_vpr_ctx.noc ();
94+ auto & cluster_ctx = g_vpr_ctx.clustering ();
95+ auto & device_ctx = g_vpr_ctx.device ();
96+
97+ /*
98+ * Unconstrained NoC routers are placed randomly, then NoC cost is optimized using simulated annealing.
99+ * For random placement, physical NoC routers are shuffled, the logical NoC routers are assigned
100+ * to shuffled physical routers. This is equivalent to placing each logical NoC router at a
101+ * randomly selected physical router. The only difference is that an occupied physical NoC router
102+ * might be selected multiple times. Shuffling makes sure that each physical NoC router is evaluated
103+ * only once.
104+ */
105+
106+ // Make a copy of NoC physical routers because we want to change its order
107+ vtr::vector<NocRouterId, NocRouter> noc_phy_routers = noc_ctx.noc_model .get_noc_routers ();
108+
109+ // Shuffle physical NoC routers
110+ vtr::RandState rand_state = seed;
111+ vtr::shuffle (noc_phy_routers.begin (), noc_phy_routers.end (), rand_state);
112+
113+ // Get the logical block type for router
114+ const auto router_block_type = cluster_ctx.clb_nlist .block_type (noc_ctx.noc_traffic_flows_storage .get_router_clusters_in_netlist ()[0 ]);
115+
116+ // Get the compressed grid for NoC
117+ const auto & compressed_noc_grid = place_ctx.compressed_block_grids [router_block_type->index ];
118+
119+ // Iterate over shuffled physical routers to place logical routers
120+ // Since physical routers are shuffled, router placement would be random
121+ for (const auto & phy_router : noc_phy_routers) {
122+ t_physical_tile_loc router_phy_loc = phy_router.get_router_physical_location ();
123+
124+ // Find a compatible sub-tile
125+ const auto & phy_type = device_ctx.grid .get_physical_type (router_phy_loc);
126+ const auto & compatible_sub_tiles = compressed_noc_grid.compatible_sub_tiles_for_tile .at (phy_type->index );
127+ int sub_tile = compatible_sub_tiles[vtr::irand ((int )compatible_sub_tiles.size () - 1 )];
128+
129+ t_pl_loc loc (router_phy_loc, sub_tile);
130+
131+ if (place_ctx.grid_blocks .is_sub_tile_empty (router_phy_loc, sub_tile)) {
132+ // Pick one of the unplaced routers
133+ auto logical_router_bid = unfixed_routers.back ();
134+ unfixed_routers.pop_back ();
135+
136+ // Create a macro with a single member
137+ t_pl_macro_member macro_member;
138+ macro_member.blk_index = logical_router_bid;
139+ macro_member.offset = t_pl_offset (0 , 0 , 0 );
140+ t_pl_macro pl_macro;
141+ pl_macro.members .push_back (macro_member);
142+
143+ bool legal = try_place_macro (pl_macro, loc);
144+ if (!legal) {
145+ VPR_FATAL_ERROR (VPR_ERROR_PLACE, " Could not place a router cluster into an empty physical router." );
146+ }
147+
148+ // When all router clusters are placed, stop iterating over remaining physical routers
149+ if (unfixed_routers.empty ()) {
150+ break ;
151+ }
152+ }
153+ } // end for of random router placement
154+ }
155+
156+ static void noc_routers_anneal (const t_noc_opts& noc_opts)
157+ {
158+ auto & noc_ctx = g_vpr_ctx.noc ();
159+
160+ // Only NoC related costs are considered
161+ t_placer_costs costs;
162+
163+ // Initialize NoC-related costs
164+ costs.noc_aggregate_bandwidth_cost = comp_noc_aggregate_bandwidth_cost ();
165+ costs.noc_latency_cost = comp_noc_latency_cost (noc_opts);
166+ update_noc_normalization_factors (costs);
167+ costs.cost = calculate_noc_cost (costs, noc_opts);
168+
169+ // Maximum distance in each direction that a router can travel in a move
170+ // It is assumed that NoC routers are organized in a square grid.
171+ // Each router can initially move within the entire grid with a single swap.
172+ const size_t n_physical_routers = noc_ctx.noc_model .get_noc_routers ().size ();
173+ const float max_r_lim = ceilf (sqrtf ((float )n_physical_routers));
174+
175+ // At most, two routers are swapped
176+ t_pl_blocks_to_be_moved blocks_affected (2 );
177+
178+ // Total number of moves grows linearly with the number of logical NoC routers.
179+ // The constant factor was selected experimentally by running the algorithm on
180+ // synthetic benchmarks. NoC-related metrics did not improve after increasing
181+ // the constant factor above 35000.
182+ // Get all the router clusters and figure out how many of them exist
183+ const int num_router_clusters = noc_ctx.noc_traffic_flows_storage .get_router_clusters_in_netlist ().size ();
184+ const int N_MOVES = num_router_clusters * 35000 ;
185+
186+ const double starting_prob = 0.5 ;
187+ const double prob_step = starting_prob / N_MOVES;
188+
189+ // The checkpoint stored the placement with the lowest cost.
190+ NoCPlacementCheckpoint checkpoint;
191+
192+ /* Algorithm overview:
193+ * In each iteration, one logical NoC router and a physical NoC router are selected randomly.
194+ * If the selected physical NoC router is occupied, two logical NoC routers are swapped.
195+ * If not, the selected logical NoC router is moved to the vacant physical router.
196+ * Then, the cost difference of this swap is computed. If the swap reduces the cost,
197+ * it is always accepted. Swaps that increase the cost are accepted with a
198+ * gradually decreasing probability. The placement with the lowest cost is saved
199+ * as a checkpoint. When the annealing is over, if the checkpoint has a better
200+ * cost than the current placement, the checkpoint is restored.
201+ * Range limit and the probability of accepting swaps with positive delta cost
202+ * decrease linearly as more swaps are evaluated. Late in the annealing,
203+ * NoC routers are swapped only with their neighbors as the range limit approaches 1.
204+ */
205+
206+ // Generate and evaluate router moves
207+ for (int i_move = 0 ; i_move < N_MOVES; i_move++) {
208+ e_create_move create_move_outcome = e_create_move::ABORT;
209+ clear_move_blocks (blocks_affected);
210+ // Shrink the range limit over time
211+ float r_lim_decayed = 1 .0f + (N_MOVES - i_move) * (max_r_lim / N_MOVES);
212+ create_move_outcome = propose_router_swap (blocks_affected, r_lim_decayed);
213+
214+ if (create_move_outcome != e_create_move::ABORT) {
215+ apply_move_blocks (blocks_affected);
216+
217+ double noc_aggregate_bandwidth_delta_c = 0.0 ;
218+ double noc_latency_delta_c = 0.0 ;
219+ find_affected_noc_routers_and_update_noc_costs (blocks_affected, noc_aggregate_bandwidth_delta_c, noc_latency_delta_c, noc_opts);
220+ double delta_cost = (noc_opts.noc_placement_weighting ) * (noc_latency_delta_c * costs.noc_latency_cost_norm + noc_aggregate_bandwidth_delta_c * costs.noc_aggregate_bandwidth_cost_norm );
221+
222+ double prob = starting_prob - i_move * prob_step;
223+ bool move_accepted = accept_noc_swap (delta_cost, prob);
224+
225+ if (move_accepted) {
226+ costs.cost += delta_cost;
227+ commit_move_blocks (blocks_affected);
228+ commit_noc_costs ();
229+ costs.noc_aggregate_bandwidth_cost += noc_aggregate_bandwidth_delta_c;
230+ costs.noc_latency_cost += noc_latency_delta_c;
231+ if (costs.cost < checkpoint.get_cost () || !checkpoint.is_valid ()) {
232+ checkpoint.save_checkpoint (costs.cost );
233+ }
234+ } else { // The proposed move is rejected
235+ revert_move_blocks (blocks_affected);
236+ revert_noc_traffic_flow_routes (blocks_affected);
237+ }
238+ }
239+ }
240+
241+ if (checkpoint.get_cost () < costs.cost ) {
242+ checkpoint.restore_checkpoint (noc_opts, costs);
243+ }
244+ }
245+
246+ void initial_noc_placement (const t_noc_opts& noc_opts, int seed) {
247+ auto & noc_ctx = g_vpr_ctx.noc ();
248+
249+ // Get all the router clusters
250+ const std::vector<ClusterBlockId>& router_blk_ids = noc_ctx.noc_traffic_flows_storage .get_router_clusters_in_netlist ();
251+
252+ // Holds all the routers that are not fixed into a specific location by constraints
253+ std::vector<ClusterBlockId> unfixed_routers;
254+
255+ // Check for floorplanning constraints and place constrained NoC routers
256+ for (auto router_blk_id : router_blk_ids) {
257+ // The block is fixed and was placed in mark_fixed_blocks()
258+ if (is_block_placed ((router_blk_id))) {
259+ continue ;
260+ }
261+
262+ if (is_cluster_constrained (router_blk_id)) {
263+ place_constrained_noc_router (router_blk_id);
264+ } else {
265+ unfixed_routers.push_back (router_blk_id);
266+ }
267+ }
268+
269+ // Place unconstrained NoC routers randomly
270+ place_noc_routers_randomly (unfixed_routers,seed);
271+
272+ // populate internal data structures to maintain route, bandwidth usage, and latencies
273+ initial_noc_routing ();
274+
275+ // Run the simulated annealing optimizer for NoC routers
276+ noc_routers_anneal (noc_opts);
277+ }
0 commit comments