Solving the HJB equation with state constraints (source code)

From TomWiki
(Redirected from HJB)
Jump to navigationJump to search

This code is based on collocation using PROPT, and the SNOPT nonlinear solver.

For more information see, Solving the Hamilton-Jacobi-Bellman equation for a stochastic system with state constraints by P. Rutquist et al, in Procedings from the 53rd IEEE Conference on Decision and Control, or the technical report with the same name in the Chalmers Publication Library.

%% Solving the HJB equation for a stochastic system with state constraints

% Problem definition
% xdot = A*x + G*(u+w)
% cost = u'*R*u + x'*Q*x
% noise covariance = W
% xmin < x < xmax

% For more info:
% http://publications.lib.chalmers.se/publication/195410-solving-the-hamilton-jacobi-bellman-equation-for-a-stochastic-system-with-state-constraints

%% Define the system

A = [0 0; 1 0];
G = [1 0; 0 1];
R = [1 0; 0 4];
Q = [1 0; 0 1];
W = [1 0; 0 1];
xmin = [-1; -1];
xmax = [1; 1];       

Nd = size(A,1);
Nu = size(G,2);

assert(Nd<=9) % Or names might collide, and probably too big to solve anyway.

% The number of collocation points in each dimension:
N = [6 6];

%% Set up symbolic variables

lambda = tom('lambda');

xc = cell(1,Nd);
pc = cell(1,Nd);
cc = cell(1,Nd);
iic = cell(1,Nd);
for i=1:Nd
    xc{i} = tom(['x' num2str(i)]);
    pc{i} = tomPhase(['p' num2str(i)],xc{i},xmin(i),xmax(i)-xmin(i),N(i));
    cc{i} = collocate(pc{i},xc{i});
    iic{i} = [xmin(i); cc{i}; xmax(i)];
end

x = vertcat(xc{:});

Zp = tom('Zp',prod(N),1);
Z = Zp;

Kpc = cell(Nd,Nd);
Kc = cell(Nd,Nd);

if size(G,2)==1
    QW = W*R*eye(size(G,1)); % Works better
else
    QW = G*W*R/G;
end

for i=1:Nd
    for j=1:Nd
        Kpc{i,j} = tom(['K' num2str(10*i+j) 'p'],prod(N),1);
        Kc{i,j} = Kpc{i,j};
    end
end

for k=1:Nd
    Z = interp1p(iic{k},[zeros(1,length(Z)/N(k)); reshape(Z, N(k), length(Z)/N(k)); zeros(1,length(Z)/N(k))], xc{k});
    for i=1:Nd
        for j=1:Nd
            Kc{i,j} = interp1p(iic{k},[zeros(1,length(Kc{i,j})/N(k)); reshape(Kc{i,j}, N(k), length(Kc{i,j})/N(k)); zeros(1,length(Kc{i,j})/N(k))], xc{k});
        end
    end
end
for i=1:Nd
    Kc{i,i} = QW(i,i) + Kc{i,i};
end

K = reshape(vertcat(Kc{:}),Nd,Nd);

%% Set up equations

warning off propt:SlowInterp1pJ2

gradZ = derivative(Z,x);
hessZ = derivative(gradZ,x);

gradV = -2*(gradZ*K)./Z;
hessV = derivative(gradV,x);

% The nonlinear term must equal zero:
eq0 = gradZ*K*G*inv(R)*G'*K'*gradZ' - gradZ*G*W*G'*K'*gradZ' == 0;

% The Hessian of V must be symmetric:
eqH = cell(Nd-1,Nd-1);
for i=1:Nd-1
    for j=i+1:Nd
        resH = hessV(i,j)-hessV(j,i);
        eqH{i,j-1} = ( Z^2*resH == 0 );
    end
end

% The original HJB residual (for testing purpouses):
resHJB = -lambda + (x'*Q*x) - 0.25*gradV*G*inv(R)*G'*gradV' ...
    + gradV*(A*x) + 0.5*sum(vec(hessV'.*(G*W*G')));

% The transformed HJB equation:
dK = derivative(K,x)*(G*W*G');
trkron = 0;
for i=1:Nd
    trkron = trkron + gradZ*dK((i-1)*Nd+(1:Nd),i);
end
eqHJB = lambda*Z == (x'*Q*x)*Z - 2*gradZ*K*(A*x) ...
    - trkron - sum(vec(hessZ.*(K*(G*W*G'))'));

% Collocate PDE:s
PDEs = {eq0, eqH, eqHJB};
for i=Nd:-1:1
    PDEs = collocate(pc{i}, PDEs);
end

% Norming constraint
nrm = Z.^2;
for i=Nd:-1:1
    nrm = integrate(pc{i},nrm);
end
nrmEq = ( nrm == 1 );

% positivity, and positive definiteness constraints
pos = ( Zp >= 1e-6 );

%% Diffusing minimization objective

obj = 0;
for i=1:Nd
    for j=1:Nd
        obj = obj + sum(derivative(Kc{i,j},x).^2);
    end
end
for i=Nd:-1:1
    obj = integrate(pc{i},obj);
end

%% Initial guess

Kgc = cell(Nd,Nd);
for i=1:Nd
    for j=1:Nd
        Kgc{i,j} = ( Kpc{i,j} == 0 );  % K11p is the offset that is added to QW(1,1)
    end
end
evp = subs(PDEs{3}, Kgc);
AA = sym2eig(lambda, evp);
[VV,DD] = eigs(0.5*(AA+AA'),1,'sa');
VV = VV.*sign(VV(ceil(end/2)));
nrms = sqrt(subs(nrm, Zp == VV));
guess = {
    Kgc
    Zp == VV./nrms
    lambda == DD
    };

%% Solve the problem

options = struct;
options.solver = 'snopt';
options.Prob.SOL.moremem = 1e6;

solution = ezsolve(obj,{PDEs, nrmEq},guess,options);

%% We now have a solution. 
%
% The remainder of this script is a stochastic simulation 
% using the computed optimal control policy.

%% Compute the control signal
u = (1/Z)*inv(R)*G'*(derivative(Z,x)*K)';

% Turn the symbolic control into a matlab function, 
% since we will call it many times
ufun = 'ufun';
ufile = [ufun '.m'];
udata = mfile(subs(u,solution),ufile);
rehash

%% Do stochastic simulation

x_0 = zeros(length(x),1);

dt = 0.001; % time step
t_0 = 0; % initial time
t_f = 10; % final time

t = (t_0:dt:t_f);
Nt = length(t);
disp(['Simulating ' num2str(Nt-1) ' timesteps...']);

sqrtW = W^0.5; % Matrix square root (not element-wise).

% pre-allocate arrays
xv = zeros(length(x),Nt);
uv = zeros(length(u),Nt);

xv(:,1) = x_0; % initial state

xx = cell(1,length(x));
for i=1:Nt-1
    for k=1:length(x)
        xx{k} = xv(k,i);
    end
    uv(:,i) = feval(ufun, xx{:}, udata);
    xv(:,i+1) = xv(:,i) + dt*(A*xv(:,i) + G*uv(:,i)) + sqrt(dt)*G*sqrtW*randn(size(W,1),1);
    % The following assertation may fail due to a combination of bad luck
    % and large dt
    assert(all(xmin<=xv(:,i+1)) && all(xv(:,i+1)<=xmax));
end

%% Plot results

subplot(2,1,1);
plot(t',xv');
xl = cell(1,length(x));
for k=1:length(x)
    xl{k} = ['x_' num2str(k)];
end
legend(xl);

subplot(2,1,2);
plot(t',uv');
ul = cell(1,length(u));
for k=1:length(u)
    ul{k} = ['u_' num2str(k)];
end
legend(ul);

%% Compute cost
% The cost will be different each time, because the noise is generated at
% random, but for small dt and  large t_f, the cost should approach lambda.

cst = zeros(1,Nt-1);
for i=1:Nt-1
    cst(i) = xv(:,i)'*Q*xv(:,i) + uv(:,i)'*R*uv(:,i);
end

disp(['mean cost = ' num2str(mean(cst))]);
disp(['lambda = ' num2str(solution.lambda)]);

%% Delete temporary m-code file
delete(ufile);