11import numpy as np
2- import scipy
3- from scipy import integrate
2+
43from .env import Env
54
6- class TwoWheeledConstEnv (Env ):
7- """ Two wheeled robot with constant goal Env
5+ def step_two_wheeled_env (curr_x , u , dt , method = "Oylar" ):
6+ """ step two wheeled enviroment
7+
8+ Args:
9+ curr_x (numpy.ndarray): current state, shape(state_size, )
10+ u (numpy.ndarray): input, shape(input_size, )
11+ dt (float): sampling time
12+ Returns:
13+ next_x (numpy.ndarray): next state, shape(state_size. )
14+
15+ Notes:
16+ TODO: deal with another method, like Runge Kutta
817 """
9- def __init__ (self ):
10- """
11- """
12- self .config = {"state_size" : 3 ,\
13- "input_size" : 2 ,\
14- "dt" : 0.01 ,\
15- "max_step" : 500 ,\
16- "input_lower_bound" : [- 1.5 , - 3.14 ],\
17- "input_upper_bound" : [1.5 , 3.14 ],
18- }
19-
20- super (TwoWheeledEnv , self ).__init__ (self .config )
18+ B = np .array ([[np .cos (curr_x [- 1 ]), 0. ],
19+ [np .sin (curr_x [- 1 ]), 0. ],
20+ [0. , 1. ]])
2121
22- def reset (self , init_x = None ):
23- """ reset state
24- Returns:
25- init_x (numpy.ndarray): initial state, shape(state_size, )
26- info (dict): information
27- """
28- self .step_count = 0
29-
30- self .curr_x = np .zeros (self .config ["state_size" ])
31-
32- if init_x is not None :
33- self .curr_x = init_x
34-
35- # goal
36- self .goal_state = np .array ([0. , 0 , - 2. , 3. ])
37-
38- # clear memory
39- self .history_x = []
40- self .history_g_x = []
41-
42- return self .curr_x , {"goal_state" : self .goal_state }
43-
44- def step (self , u ):
45- """
46- Args:
47- u (numpy.ndarray) : input, shape(input_size, )
48- Returns:
49- next_x (numpy.ndarray): next state, shape(state_size, )
50- cost (float): costs
51- done (bool): end the simulation or not
52- info (dict): information
53- """
54- # clip action
55- u = np .clip (u ,
56- self .config ["input_lower_bound" ],
57- self .config ["input_lower_bound" ])
22+ x_dot = np .matmul (B , u [:, np .newaxis ])
5823
59- # step
60- next_x = np .matmul (self .A , self .curr_x [:, np .newaxis ]) \
61- + np .matmul (self .B , u [:, np .newaxis ])
62-
63- # TODO: implement costs
24+ next_x = x_dot .flatten () * dt + curr_x
6425
65- # save history
66- self .history_x .append (next_x .flatten ())
67- self .history_g_x .append (self .goal_state .flatten ())
68-
69- # update
70- self .curr_x = next_x .flatten ()
71- # update costs
72- self .step_count += 1
26+ return next_x
7327
74- return next_x .flatten (), 0. , self .step_count > self .config ["max_step" ], {"goal_state" : self .goal_state }
75-
76- class TwoWheeledEnv (Env ):
77- """ Two wheeled robot Env
28+ class TwoWheeledConstEnv (Env ):
29+ """ Two wheeled robot with constant goal Env
7830 """
7931 def __init__ (self ):
8032 """
8133 """
8234 self .config = {"state_size" : 3 ,\
8335 "input_size" : 2 ,\
8436 "dt" : 0.01 ,\
85- "max_step" : 500 ,\
37+ "max_step" : 1000 ,\
8638 "input_lower_bound" : [- 1.5 , - 3.14 ],\
8739 "input_upper_bound" : [1.5 , 3.14 ],
8840 }
8941
90- super (TwoWheeledEnv , self ).__init__ (self .config )
42+ super (TwoWheeledConstEnv , self ).__init__ (self .config )
9143
9244 def reset (self , init_x = None ):
9345 """ reset state
46+
9447 Returns:
9548 init_x (numpy.ndarray): initial state, shape(state_size, )
9649 info (dict): information
@@ -103,16 +56,17 @@ def reset(self, init_x=None):
10356 self .curr_x = init_x
10457
10558 # goal
106- self .goal_state = np .array ([0 . , 0 , - 2 . , 3 . ])
59+ self .g_x = np .array ([5 . , 5 . , 0 . ])
10760
10861 # clear memory
10962 self .history_x = []
11063 self .history_g_x = []
11164
112- return self .curr_x , {"goal_state" : self .goal_state }
65+ return self .curr_x , {"goal_state" : self .g_x }
11366
11467 def step (self , u ):
115- """
68+ """ step environments
69+
11670 Args:
11771 u (numpy.ndarray) : input, shape(input_size, )
11872 Returns:
@@ -124,22 +78,25 @@ def step(self, u):
12478 # clip action
12579 u = np .clip (u ,
12680 self .config ["input_lower_bound" ],
127- self .config ["input_lower_bound " ])
81+ self .config ["input_upper_bound " ])
12882
12983 # step
130- next_x = np .matmul (self .A , self .curr_x [:, np .newaxis ]) \
131- + np .matmul (self .B , u [:, np .newaxis ])
84+ next_x = step_two_wheeled_env (self .curr_x , u , self .config ["dt" ])
13285
133- # TODO: implement costs
86+ # TODO: costs
87+ costs = 0.
88+ costs += 0.1 * np .sum (u ** 2 )
89+ costs += np .sum ((self .curr_x - self .g_x )** 2 )
13490
13591 # save history
13692 self .history_x .append (next_x .flatten ())
137- self .history_g_x .append (self .goal_state .flatten ())
93+ self .history_g_x .append (self .g_x .flatten ())
13894
13995 # update
14096 self .curr_x = next_x .flatten ()
14197 # update costs
14298 self .step_count += 1
14399
144- return next_x .flatten (), 0. , self .step_count > self .config ["max_step" ], {"goal_state" : self .goal_state }
145-
100+ return next_x .flatten (), costs , \
101+ self .step_count > self .config ["max_step" ], \
102+ {"goal_state" : self .g_x }
0 commit comments