Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

unconverge #2

Open
TianrongChen opened this issue Aug 22, 2019 · 1 comment
Open

unconverge #2

TianrongChen opened this issue Aug 22, 2019 · 1 comment

Comments

@TianrongChen
Copy link

TianrongChen commented Aug 22, 2019

Hi! I'm learning DDP method recently and also upvoted your brilliant implementation. It seems like you are using the MPC version of ilqr? I change it into normal version but it does not converged any more.... Could you please exaplain a little bit for me.
But anyway thanks for your implementation!!
The following is my code which is modified from your code!

import gym
import env
from autograd import grad, jacobian
import autograd.numpy as np

class ILqr:
def init(self, next_state, running_cost, final_cost,
umax, state_dim, pred_time=500):
self.pred_time = pred_time
self.umax = umax
self.v = [0.0 for _ in range(pred_time + 1)]
self.v_x = [np.zeros(state_dim) for _ in range(pred_time + 1)]
self.v_xx = [np.zeros((state_dim, state_dim)) for _ in range(pred_time + 1)]
self.f = next_state
self.lf = final_cost
self.lf_x = grad(self.lf)
self.lf_xx = jacobian(self.lf_x)
self.l_x = grad(running_cost, 0)
self.l_u = grad(running_cost, 1)
self.l_xx = jacobian(self.l_x, 0)
self.l_uu = jacobian(self.l_u, 1)
self.l_ux = jacobian(self.l_u, 0)
self.f_x = jacobian(self.f, 0)
self.f_u = jacobian(self.f, 1)

def backward(self, x_seq, u_seq):
    self.v[-1] = self.lf(x_seq[-1])
    self.v_x[-1] = self.lf_x(x_seq[-1])
    self.v_xx[-1] = self.lf_xx(x_seq[-1])
    k_seq = []
    kk_seq = []
    for t in range(self.pred_time - 1, -1, -1):
        f_x_t = self.f_x(x_seq[t], u_seq[t])
        f_u_t = self.f_u(x_seq[t], u_seq[t])
        q_x = self.l_x(x_seq[t], u_seq[t]) + np.matmul(f_x_t.T, self.v_x[t + 1])
        q_u = self.l_u(x_seq[t], u_seq[t]) + np.matmul(f_u_t.T, self.v_x[t + 1])
        q_xx = self.l_xx(x_seq[t], u_seq[t]) + \
          np.matmul(np.matmul(f_x_t.T, self.v_xx[t + 1]), f_x_t) 
        tmp = np.matmul(f_u_t.T, self.v_xx[t + 1])
        q_uu = self.l_uu(x_seq[t], u_seq[t]) + np.matmul(tmp, f_u_t) 
        q_ux = self.l_ux(x_seq[t], u_seq[t]) + np.matmul(tmp, f_x_t) 
        inv_q_uu = np.linalg.inv(q_uu)
        k = -np.matmul(inv_q_uu, q_u)
        kk = -np.matmul(inv_q_uu, q_ux)
        dv = 0.5 * np.matmul(q_u, k)
        self.v[t] += dv
        self.v_x[t] = q_x - np.matmul(np.matmul(q_u, inv_q_uu), q_ux)
        self.v_xx[t] = q_xx + np.matmul(q_ux.T, kk)
        k_seq.append(k)
        kk_seq.append(kk)
    k_seq.reverse()
    kk_seq.reverse()
    return k_seq, kk_seq

def forward(self, x_seq, u_seq, k_seq, kk_seq):
    x_seq_hat = np.array(x_seq)
    u_seq_hat = np.array(u_seq)
    for t in range(len(u_seq)):
        control = k_seq[t] + np.matmul(kk_seq[t], (x_seq_hat[t] - x_seq[t]))
        u_seq_hat[t] = np.clip(u_seq[t] + control, -self.umax, self.umax)


    return  u_seq_hat

env = gym.make('CartPoleContinuous-v0').env
obs = env.reset()
ilqr = ILqr(lambda x, u: env._state_eq(x, u),
lambda x, u: 0.5 * np.sum(np.square(u)),
lambda x: 0.5 * (np.square(1.0 - np.cos(x[2])) + np.square(x[1]) + np.square(x[3])),
env.max_force,
env.observation_space.shape[0])
u_seq = [np.zeros(1) for _ in range(ilqr.pred_time)]
x_seq = [obs.copy()]
for t in range(ilqr.pred_time):
x_seq.append(env._state_eq(x_seq[-1], u_seq[t]))

cnt = 0
while True:
env.reset()

k_seq, kk_seq = ilqr.backward(x_seq, u_seq)
u_seq = ilqr.forward(x_seq, u_seq, k_seq, kk_seq)


cost=0
x_seq = [env.reset()]
for t in range(ilqr.pred_time):
    obs, _, _, _ = env.step(u_seq[t])
    x_seq.append(obs)
    cost=cost+0.5 * np.sum(np.square(u_seq[t]))
x=x_seq[-1]
cost=cost+0.5 * (np.square(1.0 - np.cos(x[2])) + np.square(x[1]) + np.square(x[3]))
cnt += 1
print(cost)
if cost<15:
    break
@neka-nat
Copy link
Owner

HI @TianrongChen,

Thank you for your reporting.
If possible, can you push the changes to your forked repository?
I'll clone it from there and examine it.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants