Skip to content

Commit 4e01264

Browse files
author
Shunichi09
committed
Add: Environments.md and MPPIWilliamns
1 parent a36a8bc commit 4e01264

File tree

20 files changed

+669
-30
lines changed

20 files changed

+669
-30
lines changed

Environments.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Enviroments
2+
3+
| Name | Linear | Nonlinear | State Size | Input size |
4+
|:----------|:---------------:|:----------------:|:----------------:|:----------------:|
5+
| First Order Lag System || x | 4 | 2 |
6+
| Two wheeled System (Constant Goal) | x || 3 | 2 |
7+
| Two wheeled System (Moving Goal) (Coming soon) | x || 3 | 2 |
8+
| Cartpole (Swing up) | x || 4 | 1 |
9+
10+
## FistOrderLagEnv
11+
12+
System equations.
13+
14+
<img src="assets/firstorderlag.png" width="550">
15+
16+
You can set arbinatry time constant, tau. The default is 0.63 s
17+
18+
## TwoWheeledEnv
19+
20+
System equations.
21+
22+
<img src="assets/twowheeled.png" width="300">
23+
24+
## CatpoleEnv (Swing up)
25+
26+
System equations.
27+
28+
<img src="assets/cartpole.png" width="600">
29+
30+
You can set arbinatry parameters, mc, mp, l and g.
31+
32+
Default settings are as follows:
33+
34+
mc = 1, mp = 0.2, l = 0.5, g = 9.8
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1 @@
11
import numpy as np
2-

PythonLinearNonlinearControl/configs/first_order_lag.py

Lines changed: 113 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ class FirstOrderLagConfigModule():
55
ENV_NAME = "FirstOrderLag-v0"
66
TYPE = "Linear"
77
TASK_HORIZON = 1000
8-
PRED_LEN = 10
8+
PRED_LEN = 50
99
STATE_SIZE = 4
1010
INPUT_SIZE = 2
1111
DT = 0.05
@@ -43,8 +43,33 @@ def __init__(self):
4343
"kappa": 0.9,
4444
"noise_sigma": 0.5,
4545
},
46+
"MPPIWilliams":{
47+
"popsize": 5000,
48+
"lambda": 1.,
49+
"noise_sigma": 0.9,
50+
},
4651
"MPC":{
47-
}
52+
},
53+
"iLQR":{
54+
"max_iter": 500,
55+
"init_mu": 1.,
56+
"mu_min": 1e-6,
57+
"mu_max": 1e10,
58+
"init_delta": 2.,
59+
"threshold": 1e-6,
60+
},
61+
"DDP":{
62+
"max_iter": 500,
63+
"init_mu": 1.,
64+
"mu_min": 1e-6,
65+
"mu_max": 1e10,
66+
"init_delta": 2.,
67+
"threshold": 1e-6,
68+
},
69+
"NMPC-CGMRES":{
70+
},
71+
"NMPC-Newton":{
72+
},
4873
}
4974

5075
@staticmethod
@@ -86,4 +111,89 @@ def terminal_state_cost_fn(terminal_x, terminal_g_x):
86111
shape(pop_size, pred_len)
87112
"""
88113
return ((terminal_x - terminal_g_x)**2) \
89-
* np.diag(FirstOrderLagConfigModule.Sf)
114+
* np.diag(FirstOrderLagConfigModule.Sf)
115+
116+
@staticmethod
117+
def gradient_cost_fn_with_state(x, g_x, terminal=False):
118+
""" gradient of costs with respect to the state
119+
120+
Args:
121+
x (numpy.ndarray): state, shape(pred_len, state_size)
122+
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
123+
124+
Returns:
125+
l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
126+
or shape(1, state_size)
127+
"""
128+
if not terminal:
129+
return 2. * (x - g_x) * np.diag(FirstOrderLagConfigModule.Q)
130+
131+
return (2. * (x - g_x) \
132+
* np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :]
133+
134+
@staticmethod
135+
def gradient_cost_fn_with_input(x, u):
136+
""" gradient of costs with respect to the input
137+
138+
Args:
139+
x (numpy.ndarray): state, shape(pred_len, state_size)
140+
u (numpy.ndarray): goal state, shape(pred_len, input_size)
141+
142+
Returns:
143+
l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
144+
"""
145+
return 2. * u * np.diag(FirstOrderLagConfigModule.R)
146+
147+
@staticmethod
148+
def hessian_cost_fn_with_state(x, g_x, terminal=False):
149+
""" hessian costs with respect to the state
150+
151+
Args:
152+
x (numpy.ndarray): state, shape(pred_len, state_size)
153+
g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
154+
155+
Returns:
156+
l_xx (numpy.ndarray): gradient of cost,
157+
shape(pred_len, state_size, state_size) or
158+
shape(1, state_size, state_size) or
159+
"""
160+
if not terminal:
161+
(pred_len, _) = x.shape
162+
return -g_x[:, :, np.newaxis] \
163+
* np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1))
164+
165+
return -g_x[:, np.newaxis] \
166+
* np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1))
167+
168+
@staticmethod
169+
def hessian_cost_fn_with_input(x, u):
170+
""" hessian costs with respect to the input
171+
172+
Args:
173+
x (numpy.ndarray): state, shape(pred_len, state_size)
174+
u (numpy.ndarray): goal state, shape(pred_len, input_size)
175+
176+
Returns:
177+
l_uu (numpy.ndarray): gradient of cost,
178+
shape(pred_len, input_size, input_size)
179+
"""
180+
(pred_len, _) = u.shape
181+
182+
return np.tile(2.*FirstOrderLagConfigModule.R, (pred_len, 1, 1))
183+
184+
@staticmethod
185+
def hessian_cost_fn_with_input_state(x, u):
186+
""" hessian costs with respect to the state and input
187+
188+
Args:
189+
x (numpy.ndarray): state, shape(pred_len, state_size)
190+
u (numpy.ndarray): goal state, shape(pred_len, input_size)
191+
192+
Returns:
193+
l_ux (numpy.ndarray): gradient of cost ,
194+
shape(pred_len, input_size, state_size)
195+
"""
196+
(_, state_size) = x.shape
197+
(pred_len, input_size) = u.shape
198+
199+
return np.zeros((pred_len, input_size, state_size))

PythonLinearNonlinearControl/configs/two_wheeled.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ def __init__(self):
3939
"kappa": 0.9,
4040
"noise_sigma": 0.5,
4141
},
42+
"MPPIWilliams":{
43+
"popsize": 5000,
44+
"lambda": 1,
45+
"noise_sigma": 1.,
46+
},
4247
"iLQR":{
4348
"max_iter": 500,
4449
"init_mu": 1.,

PythonLinearNonlinearControl/controllers/ddp.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,6 @@ def __init__(self, config, model):
2323
"""
2424
super(DDP, self).__init__(config, model)
2525

26-
if config.TYPE != "Nonlinear":
27-
raise ValueError("{} could be not applied to \
28-
this controller".format(model))
29-
3026
# model
3127
self.model = model
3228

@@ -296,6 +292,7 @@ def _calc_gradient_hessian_cost(self, pred_xs, g_x, sol):
296292

297293
def backward(self, f_x, f_u, f_xx, f_ux, f_uu, l_x, l_xx, l_u, l_uu, l_ux):
298294
""" backward step of iLQR
295+
299296
Args:
300297
f_x (numpy.ndarray): gradient of model with respecto to state,
301298
shape(pred_len+1, state_size, state_size)
@@ -317,7 +314,6 @@ def backward(self, f_x, f_u, f_xx, f_ux, f_uu, l_x, l_xx, l_u, l_uu, l_ux):
317314
shape(pred_len, input_size, input_size)
318315
l_ux (numpy.ndarray): hessian of cost with respect
319316
to state and input, shape(pred_len, input_size, state_size)
320-
321317
Returns:
322318
k (numpy.ndarray): gain, shape(pred_len, input_size)
323319
K (numpy.ndarray): gain, shape(pred_len, input_size, state_size)
@@ -353,7 +349,8 @@ def backward(self, f_x, f_u, f_xx, f_ux, f_uu, l_x, l_xx, l_u, l_uu, l_ux):
353349

354350
def _Q(self, f_x, f_u, f_xx, f_ux, f_uu,
355351
l_x, l_u, l_xx, l_ux, l_uu, V_x, V_xx):
356-
"""Computes second order expansion.
352+
""" compute Q function valued
353+
357354
Args:
358355
f_x (numpy.ndarray): gradient of model with respecto to state,
359356
shape(state_size, state_size)

PythonLinearNonlinearControl/controllers/ilqr.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@ def __init__(self, config, model):
2121
"""
2222
super(iLQR, self).__init__(config, model)
2323

24-
if config.TYPE != "Nonlinear":
25-
raise ValueError("{} could be not applied to \
26-
this controller".format(model))
27-
2824
# model
2925
self.model = model
3026

PythonLinearNonlinearControl/controllers/make_controllers.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from .cem import CEM
33
from .random import RandomShooting
44
from .mppi import MPPI
5+
from .mppi_williams import MPPIWilliams
56
from .ilqr import iLQR
67
from .ddp import DDP
78

@@ -15,6 +16,8 @@ def make_controller(args, config, model):
1516
return RandomShooting(config, model)
1617
elif args.controller_type == "MPPI":
1718
return MPPI(config, model)
19+
elif args.controller_type == "MPPIWilliams":
20+
return MPPIWilliams(config, model)
1821
elif args.controller_type == "iLQR":
1922
return iLQR(config, model)
2023
elif args.controller_type == "DDP":
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
from logging import getLogger
2+
3+
import numpy as np
4+
import scipy.stats as stats
5+
6+
from .controller import Controller
7+
from ..envs.cost import calc_cost
8+
9+
logger = getLogger(__name__)
10+
11+
class MPPIWilliams(Controller):
12+
""" Model Predictive Path Integral for linear and nonlinear method
13+
14+
Attributes:
15+
history_u (list[numpy.ndarray]): time history of optimal input
16+
Ref:
17+
G. Williams et al., "Information theoretic MPC
18+
for model-based reinforcement learning,"
19+
2017 IEEE International Conference on Robotics and Automation (ICRA),
20+
Singapore, 2017, pp. 1714-1721.
21+
"""
22+
def __init__(self, config, model):
23+
super(MPPIWilliams, self).__init__(config, model)
24+
25+
# model
26+
self.model = model
27+
28+
# general parameters
29+
self.pred_len = config.PRED_LEN
30+
self.input_size = config.INPUT_SIZE
31+
32+
# mppi parameters
33+
self.pop_size = config.opt_config["MPPIWilliams"]["popsize"]
34+
self.lam = config.opt_config["MPPIWilliams"]["lambda"]
35+
self.noise_sigma = config.opt_config["MPPIWilliams"]["noise_sigma"]
36+
self.opt_dim = self.input_size * self.pred_len
37+
38+
# get bound
39+
self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND,
40+
(self.pred_len, 1))
41+
self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND,
42+
(self.pred_len, 1))
43+
44+
# get cost func
45+
self.state_cost_fn = config.state_cost_fn
46+
self.terminal_state_cost_fn = config.terminal_state_cost_fn
47+
self.input_cost_fn = config.input_cost_fn
48+
49+
# init mean
50+
self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \
51+
+ config.INPUT_LOWER_BOUND) / 2.,
52+
self.pred_len)
53+
self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
54+
55+
# save
56+
self.history_u = [np.zeros(self.input_size)]
57+
58+
def clear_sol(self):
59+
""" clear prev sol
60+
"""
61+
logger.debug("Clear Solution")
62+
self.prev_sol = \
63+
(self.input_upper_bounds + self.input_lower_bounds) / 2.
64+
self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
65+
66+
def calc_cost(self, curr_x, samples, g_xs):
67+
""" calculate the cost of input samples by using MPPI's eq
68+
69+
Args:
70+
curr_x (numpy.ndarray): shape(state_size),
71+
current robot position
72+
samples (numpy.ndarray): shape(pop_size, opt_dim),
73+
input samples
74+
g_xs (numpy.ndarray): shape(pred_len, state_size),
75+
goal states
76+
Returns:
77+
costs (numpy.ndarray): shape(pop_size, )
78+
"""
79+
# get size
80+
pop_size = samples.shape[0]
81+
g_xs = np.tile(g_xs, (pop_size, 1, 1))
82+
83+
# calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size)
84+
pred_xs = self.model.predict_traj(curr_x, samples)
85+
86+
# get particle cost
87+
costs = calc_cost(pred_xs, samples, g_xs,
88+
self.state_cost_fn, None, \
89+
self.terminal_state_cost_fn)
90+
91+
return costs
92+
93+
def obtain_sol(self, curr_x, g_xs):
94+
""" calculate the optimal inputs
95+
96+
Args:
97+
curr_x (numpy.ndarray): current state, shape(state_size, )
98+
g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size)
99+
Returns:
100+
opt_input (numpy.ndarray): optimal input, shape(input_size, )
101+
"""
102+
# get noised inputs
103+
noise = np.random.normal(
104+
loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
105+
self.input_size)) * self.noise_sigma
106+
107+
noised_inputs = self.prev_sol + noise
108+
109+
# clip actions
110+
noised_inputs = np.clip(
111+
noised_inputs, self.input_lower_bounds, self.input_upper_bounds)
112+
113+
# calc cost
114+
costs = self.calc_cost(curr_x, noised_inputs, g_xs)
115+
116+
costs += np.sum(np.sum(
117+
self.lam * self.prev_sol * noise / self.noise_sigma,
118+
axis=-1), axis=-1)
119+
120+
# mppi update
121+
beta = np.min(costs)
122+
eta = np.sum(np.exp(- 1. / self.lam * (costs - beta)), axis=0) \
123+
+ 1e-10
124+
125+
# weight
126+
# eta.shape = (pred_len, input_size)
127+
weights = np.exp(- 1. / self.lam * (costs - beta)) / eta
128+
129+
# update inputs
130+
sol = self.prev_sol \
131+
+ np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0)
132+
133+
# update
134+
self.prev_sol[:-1] = sol[1:]
135+
self.prev_sol[-1] = sol[-1] # last use the terminal input
136+
137+
# log
138+
self.history_u.append(sol[0])
139+
140+
return sol[0]
141+
142+
def __str__(self):
143+
return "MPPIWilliams"

0 commit comments

Comments
 (0)