Shunichi09
diff --git a/‎Environments.md‎
Lines changed: 34 additions & 0 deletions b/‎Environments.md‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎PythonLinearNonlinearControl/common/utils.py‎
Lines changed: 0 additions & 1 deletion b/‎PythonLinearNonlinearControl/common/utils.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎PythonLinearNonlinearControl/configs/first_order_lag.py‎
Lines changed: 113 additions & 3 deletions b/‎PythonLinearNonlinearControl/configs/first_order_lag.py‎
Lines changed: 113 additions & 3 deletions
diff --git a/‎PythonLinearNonlinearControl/configs/two_wheeled.py‎
Lines changed: 5 additions & 0 deletions b/‎PythonLinearNonlinearControl/configs/two_wheeled.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎PythonLinearNonlinearControl/controllers/ddp.py‎
Lines changed: 3 additions & 6 deletions b/‎PythonLinearNonlinearControl/controllers/ddp.py‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎PythonLinearNonlinearControl/controllers/ilqr.py‎
Lines changed: 0 additions & 4 deletions b/‎PythonLinearNonlinearControl/controllers/ilqr.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎PythonLinearNonlinearControl/controllers/make_controllers.py‎
Lines changed: 3 additions & 0 deletions b/‎PythonLinearNonlinearControl/controllers/make_controllers.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎PythonLinearNonlinearControl/controllers/mppi_williams.py‎
Lines changed: 143 additions & 0 deletions b/‎PythonLinearNonlinearControl/controllers/mppi_williams.py‎
Lines changed: 143 additions & 0 deletions
@@ -0,0 +1,34 @@
+# Enviroments
+
+| Name | Linear | Nonlinear | State Size | Input size |
+|:----------|:---------------:|:----------------:|:----------------:|:----------------:|
+| First Order Lag System | ✓ | x | 4 | 2 | 
+| Two wheeled System (Constant Goal) | x | ✓ | 3 | 2 | 
+| Two wheeled System (Moving Goal) (Coming soon) | x | ✓ | 3 | 2 | 
+| Cartpole (Swing up) | x | ✓ | 4 | 1 | 
+
+## FistOrderLagEnv
+
+System equations.
+
+<img src="assets/firstorderlag.png" width="550">
+
+You can set arbinatry time constant, tau. The default is 0.63 s
+
+## TwoWheeledEnv
+
+System equations.
+
+<img src="assets/twowheeled.png" width="300">
+
+## CatpoleEnv (Swing up)
+
+System equations.
+
+<img src="assets/cartpole.png" width="600">
+
+You can set arbinatry parameters, mc, mp, l and g. 
+
+Default settings are as follows:
+
+mc = 1, mp = 0.2, l = 0.5, g = 9.8
@@ -1,2 +1 @@
 import numpy as np
-
@@ -5,7 +5,7 @@ class FirstOrderLagConfigModule():
     ENV_NAME = "FirstOrderLag-v0"
     TYPE = "Linear"
     TASK_HORIZON = 1000
-    PRED_LEN = 10
+    PRED_LEN = 50
     STATE_SIZE = 4
     INPUT_SIZE = 2
     DT = 0.05
@@ -43,8 +43,33 @@ def __init__(self):
                 "kappa": 0.9,
                 "noise_sigma": 0.5,
             },
+            "MPPIWilliams":{
+                "popsize": 5000,
+                "lambda": 1.,
+                "noise_sigma": 0.9,
+            },
            "MPC":{
-           }
+           },
+           "iLQR":{
+                "max_iter": 500,
+                "init_mu": 1.,
+                "mu_min": 1e-6,
+                "mu_max": 1e10,
+                "init_delta": 2.,
+                "threshold": 1e-6,
+           },
+           "DDP":{
+                "max_iter": 500,
+                "init_mu": 1.,
+                "mu_min": 1e-6,
+                "mu_max": 1e10,
+                "init_delta": 2.,
+                "threshold": 1e-6,
+           },
+           "NMPC-CGMRES":{
+           },
+           "NMPC-Newton":{
+           },
         }   
 
     @staticmethod
@@ -86,4 +111,89 @@ def terminal_state_cost_fn(terminal_x, terminal_g_x):
                 shape(pop_size, pred_len)
         """
         return ((terminal_x - terminal_g_x)**2) \
-                * np.diag(FirstOrderLagConfigModule.Sf)
+                * np.diag(FirstOrderLagConfigModule.Sf)
+    
+    @staticmethod
+    def gradient_cost_fn_with_state(x, g_x, terminal=False):
+        """ gradient of costs with respect to the state
+
+        Args:
+            x (numpy.ndarray): state, shape(pred_len, state_size)
+            g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
+        
+        Returns:
+            l_x (numpy.ndarray): gradient of cost, shape(pred_len, state_size)
+                or shape(1, state_size)
+        """
+        if not terminal:
+            return 2. * (x - g_x) * np.diag(FirstOrderLagConfigModule.Q)
+        
+        return (2. * (x - g_x) \
+            * np.diag(FirstOrderLagConfigModule.Sf))[np.newaxis, :]
+
+    @staticmethod
+    def gradient_cost_fn_with_input(x, u):
+        """ gradient of costs with respect to the input
+
+        Args:
+            x (numpy.ndarray): state, shape(pred_len, state_size)
+            u (numpy.ndarray): goal state, shape(pred_len, input_size)
+        
+        Returns:
+            l_u (numpy.ndarray): gradient of cost, shape(pred_len, input_size)
+        """
+        return 2. * u * np.diag(FirstOrderLagConfigModule.R)
+
+    @staticmethod
+    def hessian_cost_fn_with_state(x, g_x, terminal=False):
+        """ hessian costs with respect to the state
+
+        Args:
+            x (numpy.ndarray): state, shape(pred_len, state_size)
+            g_x (numpy.ndarray): goal state, shape(pred_len, state_size)
+        
+        Returns:
+            l_xx (numpy.ndarray): gradient of cost,
+                shape(pred_len, state_size, state_size) or
+                shape(1, state_size, state_size) or
+        """
+        if not terminal:
+            (pred_len, _) = x.shape
+            return -g_x[:, :, np.newaxis] \
+                * np.tile(2.*FirstOrderLagConfigModule.Q, (pred_len, 1, 1))               
+        
+        return -g_x[:, np.newaxis] \
+            * np.tile(2.*FirstOrderLagConfigModule.Sf, (1, 1, 1))    
+
+    @staticmethod
+    def hessian_cost_fn_with_input(x, u):
+        """ hessian costs with respect to the input
+
+        Args:
+            x (numpy.ndarray): state, shape(pred_len, state_size)
+            u (numpy.ndarray): goal state, shape(pred_len, input_size)
+        
+        Returns:
+            l_uu (numpy.ndarray): gradient of cost,
+                shape(pred_len, input_size, input_size)
+        """
+        (pred_len, _) = u.shape
+
+        return np.tile(2.*FirstOrderLagConfigModule.R, (pred_len, 1, 1))
+    
+    @staticmethod
+    def hessian_cost_fn_with_input_state(x, u):
+        """ hessian costs with respect to the state and input
+
+        Args:
+            x (numpy.ndarray): state, shape(pred_len, state_size)
+            u (numpy.ndarray): goal state, shape(pred_len, input_size)
+        
+        Returns:
+            l_ux (numpy.ndarray): gradient of cost ,
+                shape(pred_len, input_size, state_size)
+        """
+        (_, state_size) = x.shape
+        (pred_len, input_size) = u.shape
+
+        return np.zeros((pred_len, input_size, state_size))
@@ -39,6 +39,11 @@ def __init__(self):
                 "kappa": 0.9,
                 "noise_sigma": 0.5,
             },
+            "MPPIWilliams":{
+                "popsize": 5000,
+                "lambda": 1,
+                "noise_sigma": 1.,
+            },
            "iLQR":{
                 "max_iter": 500,
                 "init_mu": 1.,
 
@@ -23,10 +23,6 @@ def __init__(self, config, model):
         """
         super(DDP, self).__init__(config, model)
 
-        if config.TYPE != "Nonlinear":
-            raise ValueError("{} could be not applied to \
-                              this controller".format(model))
-
         # model
         self.model = model
 
@@ -296,6 +292,7 @@ def _calc_gradient_hessian_cost(self, pred_xs, g_x, sol):
 
     def backward(self, f_x, f_u, f_xx, f_ux, f_uu, l_x, l_xx, l_u, l_uu, l_ux):
         """ backward step of iLQR
+
         Args:
             f_x (numpy.ndarray): gradient of model with respecto to state,
                 shape(pred_len+1, state_size, state_size)
@@ -317,7 +314,6 @@ def backward(self, f_x, f_u, f_xx, f_ux, f_uu, l_x, l_xx, l_u, l_uu, l_ux):
                 shape(pred_len, input_size, input_size)
             l_ux (numpy.ndarray): hessian of cost with respect
                 to state and input, shape(pred_len, input_size, state_size)
-        
         Returns:
             k (numpy.ndarray): gain, shape(pred_len, input_size)
             K (numpy.ndarray): gain, shape(pred_len, input_size, state_size)
@@ -353,7 +349,8 @@ def backward(self, f_x, f_u, f_xx, f_ux, f_uu, l_x, l_xx, l_u, l_uu, l_ux):
 
     def _Q(self, f_x, f_u, f_xx, f_ux, f_uu,
            l_x, l_u, l_xx, l_ux, l_uu, V_x, V_xx):
-        """Computes second order expansion.
+        """ compute Q function valued
+
         Args:
             f_x (numpy.ndarray): gradient of model with respecto to state,
                 shape(state_size, state_size)
 
@@ -21,10 +21,6 @@ def __init__(self, config, model):
         """
         super(iLQR, self).__init__(config, model)
 
-        if config.TYPE != "Nonlinear":
-            raise ValueError("{} could be not applied to \
-                              this controller".format(model))
-
         # model
         self.model = model
 
 
@@ -2,6 +2,7 @@
 from .cem import CEM
 from .random import RandomShooting
 from .mppi import MPPI
+from .mppi_williams import MPPIWilliams
 from .ilqr import iLQR
 from .ddp import DDP
 
@@ -15,6 +16,8 @@ def make_controller(args, config, model):
         return RandomShooting(config, model)
     elif args.controller_type == "MPPI":
         return MPPI(config, model)
+    elif args.controller_type == "MPPIWilliams":
+        return MPPIWilliams(config, model)
     elif args.controller_type == "iLQR":
         return iLQR(config, model)
     elif args.controller_type == "DDP":
 
@@ -0,0 +1,143 @@
+from logging import getLogger
+
+import numpy as np
+import scipy.stats as stats
+
+from .controller import Controller
+from ..envs.cost import calc_cost
+
+logger = getLogger(__name__)
+
+class MPPIWilliams(Controller):
+    """ Model Predictive Path Integral for linear and nonlinear method
+
+    Attributes:
+        history_u (list[numpy.ndarray]): time history of optimal input
+    Ref:
+        G. Williams et al., "Information theoretic MPC
+        for model-based reinforcement learning,"
+        2017 IEEE International Conference on Robotics and Automation (ICRA),
+        Singapore, 2017, pp. 1714-1721.
+    """
+    def __init__(self, config, model):
+        super(MPPIWilliams, self).__init__(config, model)
+
+        # model
+        self.model = model
+
+        # general parameters
+        self.pred_len = config.PRED_LEN
+        self.input_size = config.INPUT_SIZE
+
+        # mppi parameters
+        self.pop_size = config.opt_config["MPPIWilliams"]["popsize"]
+        self.lam = config.opt_config["MPPIWilliams"]["lambda"]
+        self.noise_sigma = config.opt_config["MPPIWilliams"]["noise_sigma"]
+        self.opt_dim = self.input_size * self.pred_len
+
+        # get bound 
+        self.input_upper_bounds = np.tile(config.INPUT_UPPER_BOUND,
+                                          (self.pred_len, 1))
+        self.input_lower_bounds = np.tile(config.INPUT_LOWER_BOUND,
+                                          (self.pred_len, 1))
+
+        # get cost func
+        self.state_cost_fn = config.state_cost_fn
+        self.terminal_state_cost_fn = config.terminal_state_cost_fn
+        self.input_cost_fn = config.input_cost_fn
+
+        # init mean
+        self.prev_sol = np.tile((config.INPUT_UPPER_BOUND \
+                                 + config.INPUT_LOWER_BOUND) / 2.,
+                                self.pred_len)
+        self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
+
+        # save
+        self.history_u = [np.zeros(self.input_size)]
+    
+    def clear_sol(self):
+        """ clear prev sol
+        """
+        logger.debug("Clear Solution")
+        self.prev_sol = \
+            (self.input_upper_bounds + self.input_lower_bounds) / 2.
+        self.prev_sol = self.prev_sol.reshape(self.pred_len, self.input_size)
+    
+    def calc_cost(self, curr_x, samples, g_xs):
+        """ calculate the cost of input samples by using MPPI's eq
+
+        Args:
+            curr_x (numpy.ndarray): shape(state_size),
+                current robot position
+            samples (numpy.ndarray): shape(pop_size, opt_dim), 
+                input samples
+            g_xs (numpy.ndarray): shape(pred_len, state_size),
+                goal states
+        Returns:
+            costs (numpy.ndarray): shape(pop_size, )
+        """
+        # get size
+        pop_size = samples.shape[0]
+        g_xs = np.tile(g_xs, (pop_size, 1, 1))
+
+        # calc cost, pred_xs.shape = (pop_size, pred_len+1, state_size)
+        pred_xs = self.model.predict_traj(curr_x, samples)
+        
+        # get particle cost
+        costs = calc_cost(pred_xs, samples, g_xs,
+                          self.state_cost_fn, None, \
+                          self.terminal_state_cost_fn)
+        
+        return costs
+
+    def obtain_sol(self, curr_x, g_xs):
+        """ calculate the optimal inputs
+
+        Args:
+            curr_x (numpy.ndarray): current state, shape(state_size, )
+            g_xs (numpy.ndarrya): goal trajectory, shape(plan_len, state_size)
+        Returns:
+            opt_input (numpy.ndarray): optimal input, shape(input_size, )
+        """
+        # get noised inputs
+        noise = np.random.normal(
+                loc=0, scale=1.0, size=(self.pop_size, self.pred_len,
+                                        self.input_size)) * self.noise_sigma
+            
+        noised_inputs = self.prev_sol + noise
+
+        # clip actions
+        noised_inputs = np.clip(
+            noised_inputs, self.input_lower_bounds, self.input_upper_bounds)
+
+        # calc cost
+        costs = self.calc_cost(curr_x, noised_inputs, g_xs)
+
+        costs += np.sum(np.sum(
+            self.lam * self.prev_sol * noise / self.noise_sigma,
+            axis=-1), axis=-1)
+
+        # mppi update
+        beta = np.min(costs)
+        eta = np.sum(np.exp(- 1. / self.lam * (costs - beta)), axis=0) \
+              + 1e-10
+
+        # weight
+        # eta.shape = (pred_len, input_size)
+        weights = np.exp(- 1. / self.lam * (costs - beta)) / eta
+
+        # update inputs
+        sol = self.prev_sol \
+              + np.sum(weights[:, np.newaxis, np.newaxis] * noise, axis=0)
+
+        # update
+        self.prev_sol[:-1] = sol[1:]
+        self.prev_sol[-1] = sol[-1]  # last use the terminal input
+
+        # log
+        self.history_u.append(sol[0])
+
+        return sol[0]
+
+    def __str__(self):
+        return "MPPIWilliams"