Skip to content

Commit 81224e3

Browse files
Applied the comments.
1 parent d2d3904 commit 81224e3

File tree

16 files changed

+520
-489
lines changed

16 files changed

+520
-489
lines changed

vpr/src/base/vpr_types.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,11 +216,11 @@ class t_ext_pin_util_targets {
216216
class t_pack_high_fanout_thresholds {
217217
public:
218218
t_pack_high_fanout_thresholds() = default;
219-
t_pack_high_fanout_thresholds(int threshold);
220-
t_pack_high_fanout_thresholds(const std::vector<std::string>& specs);
219+
explicit t_pack_high_fanout_thresholds(int threshold);
220+
explicit t_pack_high_fanout_thresholds(const std::vector<std::string>& specs);
221221
t_pack_high_fanout_thresholds& operator=(t_pack_high_fanout_thresholds&& other) noexcept;
222222

223-
///@brief Returns the high fanout threshold of the specified block
223+
///@brief Returns the high fanout threshold of the specifi ed block
224224
int get_threshold(const std::string& block_type_name) const;
225225

226226
///@brief Returns a string describing high fanout thresholds for different block types

vpr/src/noc/noc_traffic_flows.cpp

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -89,28 +89,6 @@ void NocTrafficFlows::finished_noc_traffic_flows_setup(void) {
8989
int number_of_traffic_flows = noc_traffic_flows.size();
9090
traffic_flow_routes.resize(number_of_traffic_flows);
9191

92-
const int num_flows = get_number_of_traffic_flows();
93-
double bandwidth_sum = 0.0;
94-
double inverse_latency_sum = 0.0;
95-
96-
// Iterate over all flows and calculate bandwidth and inverse latency sums
97-
for (const auto& flow_id : noc_traffic_flows_ids) {
98-
const auto& flow = get_single_noc_traffic_flow(flow_id);
99-
bandwidth_sum += flow.traffic_flow_bandwidth;
100-
inverse_latency_sum += 1.0 / flow.max_traffic_flow_latency;
101-
}
102-
103-
double bandwidth_norm_factor = bandwidth_sum / num_flows;
104-
double inverse_latency_norm_factor = inverse_latency_sum / num_flows;
105-
106-
// Iterate over all flows and assign their scores
107-
for (const auto& flow_id : noc_traffic_flows_ids) {
108-
auto& flow = noc_traffic_flows[flow_id];
109-
double normalized_bandwidth = flow.traffic_flow_bandwidth / bandwidth_norm_factor;
110-
double normalized_inverse_latency = 1.0 / (flow.max_traffic_flow_latency * inverse_latency_norm_factor);
111-
flow.score = flow.traffic_flow_priority * normalized_bandwidth * normalized_inverse_latency;
112-
}
113-
11492
return;
11593
}
11694

vpr/src/noc/noc_traffic_flows.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,6 @@ struct t_noc_traffic_flow {
6666
/** Indicates the importance of the traffic flow. Higher priority traffic flows will have more importance and will be more likely to have their latency reduced and constraints met. Range: [0-inf) */
6767
int traffic_flow_priority;
6868

69-
/** When a weighted average is computed over flows or their properties, this score can be used as the contributing weight for its corresponding flow */
70-
double score;
71-
7269
/** Constructor initializes all variables*/
7370
t_noc_traffic_flow(std::string source_router_name, std::string sink_router_name, ClusterBlockId source_router_id, ClusterBlockId sink_router_id, double flow_bandwidth, double max_flow_latency, int flow_priority)
7471
: source_router_module_name(std::move(source_router_name))
@@ -78,7 +75,7 @@ struct t_noc_traffic_flow {
7875
, traffic_flow_bandwidth(flow_bandwidth)
7976
, max_traffic_flow_latency(max_flow_latency)
8077
, traffic_flow_priority(flow_priority)
81-
, score(0.0) {}
78+
{}
8279
};
8380

8481
class NocTrafficFlows {

vpr/src/pack/pack.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#include <unordered_set>
44
#include <unordered_map>
55
#include <fstream>
6-
#include <stdlib.h>
6+
#include <cstdlib>
77
#include <sstream>
88

99
#include "vtr_assert.h"
@@ -30,9 +30,9 @@
3030
static bool try_size_device_grid(const t_arch& arch, const std::map<t_logical_block_type_ptr, size_t>& num_type_instances, float target_device_utilization, std::string device_layout_name);
3131

3232
/**
33-
* @brief Counts the total number of models
33+
* @brief Counts the total number of logic models that the architecture can implement.
3434
*
35-
* @param user_models A linked list of models
35+
* @param user_models A linked list of logic models.
3636
* @return int The total number of models in the linked list
3737
*/
3838
static int count_models(const t_model* user_models);
Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,277 @@
1+
2+
#include "initial_noc_placment.h"
3+
#include "initial_placement.h"
4+
#include "noc_place_utils.h"
5+
#include "noc_place_checkpoint.h"
6+
7+
/**
8+
* @brief Evaluates whether a NoC router swap should be accepted or not.
9+
* If delta cost is non-positive, the move is always accepted. If the cost
10+
* has increased, the probability of accepting the move is prob.
11+
*
12+
* @param delta_cost Specifies how much the total cost would change if
13+
* the proposed swap is accepted.
14+
* @param prob The probability by which a router swap that increases
15+
* the cost is accepted. The passed value should be in range [0, 1].
16+
*
17+
* @return true if the proposed swap is accepted, false if not.
18+
*/
19+
static bool accept_noc_swap(double delta_cost, double prob);
20+
21+
/**
22+
* @brief Places a constrained NoC router within its partition region.
23+
*
24+
* @param router_blk_id NoC router cluster block ID
25+
*/
26+
static void place_constrained_noc_router(ClusterBlockId router_blk_id);
27+
28+
/**
29+
* @brief Randomly places unconstrained NoC routers.
30+
*
31+
* @param unfixed_routers Contains the cluster block ID for all unconstrained
32+
* NoC routers.
33+
* @param seed Used for shuffling NoC routers.
34+
*/
35+
static void place_noc_routers_randomly (std::vector<ClusterBlockId>& unfixed_routers, int seed);
36+
37+
/**
38+
* @brief Runs a simulated annealing optimizer for NoC routers.
39+
*
40+
* @param noc_opts Contains weighting factors for NoC cost terms.
41+
*/
42+
static void noc_routers_anneal(const t_noc_opts& noc_opts);
43+
44+
static bool accept_noc_swap(double delta_cost, double prob) {
45+
if (delta_cost <= 0.0) {
46+
return true;
47+
}
48+
49+
if (prob == 0.0) {
50+
return false;
51+
}
52+
53+
float random_num = vtr::frand();
54+
if (random_num < prob) {
55+
return true;
56+
} else {
57+
return false;
58+
}
59+
}
60+
61+
static void place_constrained_noc_router(ClusterBlockId router_blk_id)
62+
{
63+
auto& cluster_ctx = g_vpr_ctx.clustering();
64+
const auto& floorplanning_ctx = g_vpr_ctx.floorplanning();
65+
66+
auto block_type = cluster_ctx.clb_nlist.block_type(router_blk_id);
67+
const PartitionRegion& pr = floorplanning_ctx.cluster_constraints[router_blk_id];
68+
69+
// Create a macro with a single member
70+
t_pl_macro_member macro_member;
71+
macro_member.blk_index = router_blk_id;
72+
macro_member.offset = t_pl_offset(0, 0, 0);
73+
t_pl_macro pl_macro;
74+
pl_macro.members.push_back(macro_member);
75+
76+
bool macro_placed = false;
77+
for (int i_try = 0; i_try < MAX_NUM_TRIES_TO_PLACE_MACROS_RANDOMLY && !macro_placed; i_try++) {
78+
macro_placed = try_place_macro_randomly(pl_macro, pr, block_type, FREE);
79+
}
80+
81+
if (!macro_placed) {
82+
macro_placed = try_place_macro_exhaustively(pl_macro, pr, block_type, FREE);
83+
}
84+
85+
if (!macro_placed) {
86+
VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Could not place a router cluster within its constrained region");
87+
}
88+
}
89+
90+
static void place_noc_routers_randomly (std::vector<ClusterBlockId>& unfixed_routers, int seed)
91+
{
92+
auto& place_ctx = g_vpr_ctx.placement();
93+
auto& noc_ctx = g_vpr_ctx.noc();
94+
auto& cluster_ctx = g_vpr_ctx.clustering();
95+
auto& device_ctx = g_vpr_ctx.device();
96+
97+
/*
98+
* Unconstrained NoC routers are placed randomly, then NoC cost is optimized using simulated annealing.
99+
* For random placement, physical NoC routers are shuffled, the logical NoC routers are assigned
100+
* to shuffled physical routers. This is equivalent to placing each logical NoC router at a
101+
* randomly selected physical router. The only difference is that an occupied physical NoC router
102+
* might be selected multiple times. Shuffling makes sure that each physical NoC router is evaluated
103+
* only once.
104+
*/
105+
106+
// Make a copy of NoC physical routers because we want to change its order
107+
vtr::vector<NocRouterId, NocRouter> noc_phy_routers = noc_ctx.noc_model.get_noc_routers();
108+
109+
// Shuffle physical NoC routers
110+
vtr::RandState rand_state = seed;
111+
vtr::shuffle(noc_phy_routers.begin(), noc_phy_routers.end(), rand_state);
112+
113+
// Get the logical block type for router
114+
const auto router_block_type = cluster_ctx.clb_nlist.block_type(noc_ctx.noc_traffic_flows_storage.get_router_clusters_in_netlist()[0]);
115+
116+
// Get the compressed grid for NoC
117+
const auto& compressed_noc_grid = place_ctx.compressed_block_grids[router_block_type->index];
118+
119+
// Iterate over shuffled physical routers to place logical routers
120+
// Since physical routers are shuffled, router placement would be random
121+
for (const auto& phy_router : noc_phy_routers) {
122+
t_physical_tile_loc router_phy_loc = phy_router.get_router_physical_location();
123+
124+
// Find a compatible sub-tile
125+
const auto& phy_type = device_ctx.grid.get_physical_type(router_phy_loc);
126+
const auto& compatible_sub_tiles = compressed_noc_grid.compatible_sub_tiles_for_tile.at(phy_type->index);
127+
int sub_tile = compatible_sub_tiles[vtr::irand((int)compatible_sub_tiles.size() - 1)];
128+
129+
t_pl_loc loc(router_phy_loc, sub_tile);
130+
131+
if (place_ctx.grid_blocks.is_sub_tile_empty(router_phy_loc, sub_tile)) {
132+
// Pick one of the unplaced routers
133+
auto logical_router_bid = unfixed_routers.back();
134+
unfixed_routers.pop_back();
135+
136+
// Create a macro with a single member
137+
t_pl_macro_member macro_member;
138+
macro_member.blk_index = logical_router_bid;
139+
macro_member.offset = t_pl_offset(0, 0, 0);
140+
t_pl_macro pl_macro;
141+
pl_macro.members.push_back(macro_member);
142+
143+
bool legal = try_place_macro(pl_macro, loc);
144+
if (!legal) {
145+
VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Could not place a router cluster into an empty physical router.");
146+
}
147+
148+
// When all router clusters are placed, stop iterating over remaining physical routers
149+
if (unfixed_routers.empty()) {
150+
break;
151+
}
152+
}
153+
} // end for of random router placement
154+
}
155+
156+
static void noc_routers_anneal(const t_noc_opts& noc_opts)
157+
{
158+
auto& noc_ctx = g_vpr_ctx.noc();
159+
160+
// Only NoC related costs are considered
161+
t_placer_costs costs;
162+
163+
// Initialize NoC-related costs
164+
costs.noc_aggregate_bandwidth_cost = comp_noc_aggregate_bandwidth_cost();
165+
costs.noc_latency_cost = comp_noc_latency_cost(noc_opts);
166+
update_noc_normalization_factors(costs);
167+
costs.cost = calculate_noc_cost(costs, noc_opts);
168+
169+
// Maximum distance in each direction that a router can travel in a move
170+
// It is assumed that NoC routers are organized in a square grid.
171+
// Each router can initially move within the entire grid with a single swap.
172+
const size_t n_physical_routers = noc_ctx.noc_model.get_noc_routers().size();
173+
const float max_r_lim = ceilf(sqrtf((float)n_physical_routers));
174+
175+
// At most, two routers are swapped
176+
t_pl_blocks_to_be_moved blocks_affected(2);
177+
178+
// Total number of moves grows linearly with the number of logical NoC routers.
179+
// The constant factor was selected experimentally by running the algorithm on
180+
// synthetic benchmarks. NoC-related metrics did not improve after increasing
181+
// the constant factor above 35000.
182+
// Get all the router clusters and figure out how many of them exist
183+
const int num_router_clusters = noc_ctx.noc_traffic_flows_storage.get_router_clusters_in_netlist().size();
184+
const int N_MOVES = num_router_clusters * 35000;
185+
186+
const double starting_prob = 0.5;
187+
const double prob_step = starting_prob / N_MOVES;
188+
189+
// The checkpoint stored the placement with the lowest cost.
190+
NoCPlacementCheckpoint checkpoint;
191+
192+
/* Algorithm overview:
193+
* In each iteration, one logical NoC router and a physical NoC router are selected randomly.
194+
* If the selected physical NoC router is occupied, two logical NoC routers are swapped.
195+
* If not, the selected logical NoC router is moved to the vacant physical router.
196+
* Then, the cost difference of this swap is computed. If the swap reduces the cost,
197+
* it is always accepted. Swaps that increase the cost are accepted with a
198+
* gradually decreasing probability. The placement with the lowest cost is saved
199+
* as a checkpoint. When the annealing is over, if the checkpoint has a better
200+
* cost than the current placement, the checkpoint is restored.
201+
* Range limit and the probability of accepting swaps with positive delta cost
202+
* decrease linearly as more swaps are evaluated. Late in the annealing,
203+
* NoC routers are swapped only with their neighbors as the range limit approaches 1.
204+
*/
205+
206+
// Generate and evaluate router moves
207+
for (int i_move = 0; i_move < N_MOVES; i_move++) {
208+
e_create_move create_move_outcome = e_create_move::ABORT;
209+
clear_move_blocks(blocks_affected);
210+
// Shrink the range limit over time
211+
float r_lim_decayed = 1.0f + (N_MOVES - i_move) * (max_r_lim / N_MOVES);
212+
create_move_outcome = propose_router_swap(blocks_affected, r_lim_decayed);
213+
214+
if (create_move_outcome != e_create_move::ABORT) {
215+
apply_move_blocks(blocks_affected);
216+
217+
double noc_aggregate_bandwidth_delta_c = 0.0;
218+
double noc_latency_delta_c = 0.0;
219+
find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_aggregate_bandwidth_delta_c, noc_latency_delta_c, noc_opts);
220+
double delta_cost = (noc_opts.noc_placement_weighting) * (noc_latency_delta_c * costs.noc_latency_cost_norm + noc_aggregate_bandwidth_delta_c * costs.noc_aggregate_bandwidth_cost_norm);
221+
222+
double prob = starting_prob - i_move * prob_step;
223+
bool move_accepted = accept_noc_swap(delta_cost, prob);
224+
225+
if (move_accepted) {
226+
costs.cost += delta_cost;
227+
commit_move_blocks(blocks_affected);
228+
commit_noc_costs();
229+
costs.noc_aggregate_bandwidth_cost += noc_aggregate_bandwidth_delta_c;
230+
costs.noc_latency_cost += noc_latency_delta_c;
231+
if (costs.cost < checkpoint.get_cost() || !checkpoint.is_valid()) {
232+
checkpoint.save_checkpoint(costs.cost);
233+
}
234+
} else { // The proposed move is rejected
235+
revert_move_blocks(blocks_affected);
236+
revert_noc_traffic_flow_routes(blocks_affected);
237+
}
238+
}
239+
}
240+
241+
if (checkpoint.get_cost() < costs.cost) {
242+
checkpoint.restore_checkpoint(noc_opts, costs);
243+
}
244+
}
245+
246+
void initial_noc_placement(const t_noc_opts& noc_opts, int seed) {
247+
auto& noc_ctx = g_vpr_ctx.noc();
248+
249+
// Get all the router clusters
250+
const std::vector<ClusterBlockId>& router_blk_ids = noc_ctx.noc_traffic_flows_storage.get_router_clusters_in_netlist();
251+
252+
// Holds all the routers that are not fixed into a specific location by constraints
253+
std::vector<ClusterBlockId> unfixed_routers;
254+
255+
// Check for floorplanning constraints and place constrained NoC routers
256+
for (auto router_blk_id : router_blk_ids) {
257+
// The block is fixed and was placed in mark_fixed_blocks()
258+
if (is_block_placed((router_blk_id))) {
259+
continue;
260+
}
261+
262+
if (is_cluster_constrained(router_blk_id)) {
263+
place_constrained_noc_router(router_blk_id);
264+
} else {
265+
unfixed_routers.push_back(router_blk_id);
266+
}
267+
}
268+
269+
// Place unconstrained NoC routers randomly
270+
place_noc_routers_randomly(unfixed_routers,seed);
271+
272+
// populate internal data structures to maintain route, bandwidth usage, and latencies
273+
initial_noc_routing();
274+
275+
// Run the simulated annealing optimizer for NoC routers
276+
noc_routers_anneal(noc_opts);
277+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
2+
#ifndef VTR_INITIAL_NOC_PLACMENT_H
3+
#define VTR_INITIAL_NOC_PLACMENT_H
4+
5+
#include "vpr_types.h"
6+
7+
/**
8+
* @brief Randomly places NoC routers, then runs a quick simulated annealing
9+
* to minimize NoC costs.
10+
*
11+
* @param noc_opts NoC-related options. Used to calculate NoC-related costs.
12+
*/
13+
void initial_noc_placement(const t_noc_opts& noc_opts, int seed);
14+
15+
#endif //VTR_INITIAL_NOC_PLACMENT_H

0 commit comments

Comments
 (0)