@@ -17,27 +17,27 @@ class KArmedBanditAgent {
1717 virtual ~KArmedBanditAgent () {}
1818
1919 /* *
20- * @brief Choose a move type to perform and a block type that move should be performed with based on Q-table
21- *
22- * @return A move type and a block type as a "t_propose_action" struct
23- * If the agent is set to only propose move type, then block type index in the struct will be set to -1
24- */
20+ * @brief Choose a move type to perform and a block type that move should be performed with based on Q-table
21+ *
22+ * @return A move type and a block type as a "t_propose_action" struct
23+ * If the agent is set to only propose move type, then block type index in the struct will be set to -1
24+ */
2525 virtual t_propose_action propose_action () = 0;
2626
2727 /* *
28- * @brief Update the agent Q-table based on the reward received by the SA algorithm
29- *
30- * @param reward A double value calculated in "place.cpp" file showing how placement cost was affected by the prior action taken
31- * @param reward_func The reward function used by the agent, detail explanation can be found on "directed_moves_util.h" file
32- */
28+ * @brief Update the agent Q-table based on the reward received by the SA algorithm
29+ *
30+ * @param reward A double value calculated in "place.cpp" file showing how placement cost was affected by the prior action taken
31+ * @param reward_func The reward function used by the agent, detail explanation can be found on "directed_moves_util.h" file
32+ */
3333 void process_outcome (double , e_reward_function);
3434
3535 /* *
36- * @brief write all agent internal information (Q-table, reward for each performed action, ...) to a file (agent_info_file_)
37- *
38- * @param last_action Last action performed by the RL-agent
39- * @param reward A double value calculated in "place.cpp" file showing how placement cost was affected by the prior action taken
40- */
36+ * @brief write all agent internal information (Q-table, reward for each performed action, ...) to a file (agent_info_file_)
37+ *
38+ * @param last_action Last action performed by the RL-agent
39+ * @param reward A double value calculated in "place.cpp" file showing how placement cost was affected by the prior action taken
40+ */
4141 void write_agent_info (int last_action, double reward);
4242
4343 protected:
@@ -72,32 +72,31 @@ class EpsilonGreedyAgent : public KArmedBanditAgent {
7272 t_propose_action propose_action () override ; // Returns the type of the next action as well as the block type the agent wishes to perform
7373
7474 public:
75-
7675 /* *
77- * @brief Set the user-specified epsilon for the E-greedy agent
78- *
79- * @param epsilon Epsilon value for the agent, can be specified by the command-line option "--place_agent_epsilon"
80- * Epsilon default value is 0.3.
81- */
76+ * @brief Set the user-specified epsilon for the E-greedy agent
77+ *
78+ * @param epsilon Epsilon value for the agent, can be specified by the command-line option "--place_agent_epsilon"
79+ * Epsilon default value is 0.3.
80+ */
8281 void set_epsilon (float epsilon);
8382
8483 /* *
85- * @brief Set equal action probability to all available actions.
86- */
84+ * @brief Set equal action probability to all available actions.
85+ */
8786 void set_epsilon_action_prob ();
8887
8988 /* *
90- * @brief Set step size for q-table updates
91- *
92- * @param gamma Controls how quickly the agent's memory decays, can be specified by the command-line option "--place_agent_gamma"
93- * Gamma default value is 0.05.
94- * @param move_lim Number of moves per temperature
95- */
89+ * @brief Set step size for q-table updates
90+ *
91+ * @param gamma Controls how quickly the agent's memory decays, can be specified by the command-line option "--place_agent_gamma"
92+ * Gamma default value is 0.05.
93+ * @param move_lim Number of moves per temperature
94+ */
9695 void set_step (float gamma, int move_lim);
9796
9897 /* *
99- * @brief Initialize agent's Q-table and internal variable to zero (RL-agent learns everything throughout the placement run and has no prior knowledge)
100- */
98+ * @brief Initialize agent's Q-table and internal variable to zero (RL-agent learns everything throughout the placement run and has no prior knowledge)
99+ */
101100 void init_q_scores ();
102101
103102 private:
@@ -122,32 +121,30 @@ class SoftmaxAgent : public KArmedBanditAgent {
122121 t_propose_action propose_action () override ; // Returns the type of the next action as well as the block type the agent wishes to perform
123122
124123 public:
125-
126124 /* *
127- * @brief Calculate the fraction of total netlist blocks for each agent block type and will be used by the "set_action_prob" function.
128- */
125+ * @brief Calculate the fraction of total netlist blocks for each agent block type and will be used by the "set_action_prob" function.
126+ */
129127 void set_block_ratio ();
130128
131-
132129 /* *
133- * @brief Set action probability for all available actions.
134- * If agent only proposes move type, the action probabilities would be equal for all move types at the beginning.
135- * If agent proposes both move and block type, the action_prob for each action would be based on its block type count in the netlist.
136- */
130+ * @brief Set action probability for all available actions.
131+ * If agent only proposes move type, the action probabilities would be equal for all move types at the beginning.
132+ * If agent proposes both move and block type, the action_prob for each action would be based on its block type count in the netlist.
133+ */
137134 void set_action_prob ();
138135
139136 /* *
140- * @brief Set step size for q-table updates
141- *
142- * @param gamma Controls how quickly the agent's memory decays, can be specified by the command-line option "--place_agent_gamma"
143- * Gamma default value is 0.05.
144- * @param move_lim Number of moves per temperature
145- */
137+ * @brief Set step size for q-table updates
138+ *
139+ * @param gamma Controls how quickly the agent's memory decays, can be specified by the command-line option "--place_agent_gamma"
140+ * Gamma default value is 0.05.
141+ * @param move_lim Number of moves per temperature
142+ */
146143 void set_step (float gamma, int move_lim);
147144
148145 /* *
149- * @brief Initialize agent's Q-table and internal variable to zero (RL-agent learns everything throughout the placement run and has no prior knowledge)
150- */
146+ * @brief Initialize agent's Q-table and internal variable to zero (RL-agent learns everything throughout the placement run and has no prior knowledge)
147+ */
151148 void init_q_scores ();
152149
153150 private:
0 commit comments