Solving the HJB equation with state constraints (source code)
From TomWiki
(Redirected from Hjb)
Jump to navigationJump to search
This code is based on collocation using PROPT, and the SNOPT nonlinear solver.
For more information see, Solving the Hamilton-Jacobi-Bellman equation for a stochastic system with state constraints by P. Rutquist et al, in Procedings from the 53rd IEEE Conference on Decision and Control, or the technical report with the same name in the Chalmers Publication Library.
%% Solving the HJB equation for a stochastic system with state constraints
% Problem definition
% xdot = A*x + G*(u+w)
% cost = u'*R*u + x'*Q*x
% noise covariance = W
% xmin < x < xmax
% For more info:
% http://publications.lib.chalmers.se/publication/195410-solving-the-hamilton-jacobi-bellman-equation-for-a-stochastic-system-with-state-constraints
%% Define the system
A = [0 0; 1 0];
G = [1 0; 0 1];
R = [1 0; 0 4];
Q = [1 0; 0 1];
W = [1 0; 0 1];
xmin = [-1; -1];
xmax = [1; 1];
Nd = size(A,1);
Nu = size(G,2);
assert(Nd<=9) % Or names might collide, and probably too big to solve anyway.
% The number of collocation points in each dimension:
N = [6 6];
%% Set up symbolic variables
lambda = tom('lambda');
xc = cell(1,Nd);
pc = cell(1,Nd);
cc = cell(1,Nd);
iic = cell(1,Nd);
for i=1:Nd
xc{i} = tom(['x' num2str(i)]);
pc{i} = tomPhase(['p' num2str(i)],xc{i},xmin(i),xmax(i)-xmin(i),N(i));
cc{i} = collocate(pc{i},xc{i});
iic{i} = [xmin(i); cc{i}; xmax(i)];
end
x = vertcat(xc{:});
Zp = tom('Zp',prod(N),1);
Z = Zp;
Kpc = cell(Nd,Nd);
Kc = cell(Nd,Nd);
if size(G,2)==1
QW = W*R*eye(size(G,1)); % Works better
else
QW = G*W*R/G;
end
for i=1:Nd
for j=1:Nd
Kpc{i,j} = tom(['K' num2str(10*i+j) 'p'],prod(N),1);
Kc{i,j} = Kpc{i,j};
end
end
for k=1:Nd
Z = interp1p(iic{k},[zeros(1,length(Z)/N(k)); reshape(Z, N(k), length(Z)/N(k)); zeros(1,length(Z)/N(k))], xc{k});
for i=1:Nd
for j=1:Nd
Kc{i,j} = interp1p(iic{k},[zeros(1,length(Kc{i,j})/N(k)); reshape(Kc{i,j}, N(k), length(Kc{i,j})/N(k)); zeros(1,length(Kc{i,j})/N(k))], xc{k});
end
end
end
for i=1:Nd
Kc{i,i} = QW(i,i) + Kc{i,i};
end
K = reshape(vertcat(Kc{:}),Nd,Nd);
%% Set up equations
warning off propt:SlowInterp1pJ2
gradZ = derivative(Z,x);
hessZ = derivative(gradZ,x);
gradV = -2*(gradZ*K)./Z;
hessV = derivative(gradV,x);
% The nonlinear term must equal zero:
eq0 = gradZ*K*G*inv(R)*G'*K'*gradZ' - gradZ*G*W*G'*K'*gradZ' == 0;
% The Hessian of V must be symmetric:
eqH = cell(Nd-1,Nd-1);
for i=1:Nd-1
for j=i+1:Nd
resH = hessV(i,j)-hessV(j,i);
eqH{i,j-1} = ( Z^2*resH == 0 );
end
end
% The original HJB residual (for testing purpouses):
resHJB = -lambda + (x'*Q*x) - 0.25*gradV*G*inv(R)*G'*gradV' ...
+ gradV*(A*x) + 0.5*sum(vec(hessV'.*(G*W*G')));
% The transformed HJB equation:
dK = derivative(K,x)*(G*W*G');
trkron = 0;
for i=1:Nd
trkron = trkron + gradZ*dK((i-1)*Nd+(1:Nd),i);
end
eqHJB = lambda*Z == (x'*Q*x)*Z - 2*gradZ*K*(A*x) ...
- trkron - sum(vec(hessZ.*(K*(G*W*G'))'));
% Collocate PDE:s
PDEs = {eq0, eqH, eqHJB};
for i=Nd:-1:1
PDEs = collocate(pc{i}, PDEs);
end
% Norming constraint
nrm = Z.^2;
for i=Nd:-1:1
nrm = integrate(pc{i},nrm);
end
nrmEq = ( nrm == 1 );
% positivity, and positive definiteness constraints
pos = ( Zp >= 1e-6 );
%% Diffusing minimization objective
obj = 0;
for i=1:Nd
for j=1:Nd
obj = obj + sum(derivative(Kc{i,j},x).^2);
end
end
for i=Nd:-1:1
obj = integrate(pc{i},obj);
end
%% Initial guess
Kgc = cell(Nd,Nd);
for i=1:Nd
for j=1:Nd
Kgc{i,j} = ( Kpc{i,j} == 0 ); % K11p is the offset that is added to QW(1,1)
end
end
evp = subs(PDEs{3}, Kgc);
AA = sym2eig(lambda, evp);
[VV,DD] = eigs(0.5*(AA+AA'),1,'sa');
VV = VV.*sign(VV(ceil(end/2)));
nrms = sqrt(subs(nrm, Zp == VV));
guess = {
Kgc
Zp == VV./nrms
lambda == DD
};
%% Solve the problem
options = struct;
options.solver = 'snopt';
options.Prob.SOL.moremem = 1e6;
solution = ezsolve(obj,{PDEs, nrmEq},guess,options);
%% We now have a solution.
%
% The remainder of this script is a stochastic simulation
% using the computed optimal control policy.
%% Compute the control signal
u = (1/Z)*inv(R)*G'*(derivative(Z,x)*K)';
% Turn the symbolic control into a matlab function,
% since we will call it many times
ufun = 'ufun';
ufile = [ufun '.m'];
udata = mfile(subs(u,solution),ufile);
rehash
%% Do stochastic simulation
x_0 = zeros(length(x),1);
dt = 0.001; % time step
t_0 = 0; % initial time
t_f = 10; % final time
t = (t_0:dt:t_f);
Nt = length(t);
disp(['Simulating ' num2str(Nt-1) ' timesteps...']);
sqrtW = W^0.5; % Matrix square root (not element-wise).
% pre-allocate arrays
xv = zeros(length(x),Nt);
uv = zeros(length(u),Nt);
xv(:,1) = x_0; % initial state
xx = cell(1,length(x));
for i=1:Nt-1
for k=1:length(x)
xx{k} = xv(k,i);
end
uv(:,i) = feval(ufun, xx{:}, udata);
xv(:,i+1) = xv(:,i) + dt*(A*xv(:,i) + G*uv(:,i)) + sqrt(dt)*G*sqrtW*randn(size(W,1),1);
% The following assertation may fail due to a combination of bad luck
% and large dt
assert(all(xmin<=xv(:,i+1)) && all(xv(:,i+1)<=xmax));
end
%% Plot results
subplot(2,1,1);
plot(t',xv');
xl = cell(1,length(x));
for k=1:length(x)
xl{k} = ['x_' num2str(k)];
end
legend(xl);
subplot(2,1,2);
plot(t',uv');
ul = cell(1,length(u));
for k=1:length(u)
ul{k} = ['u_' num2str(k)];
end
legend(ul);
%% Compute cost
% The cost will be different each time, because the noise is generated at
% random, but for small dt and large t_f, the cost should approach lambda.
cst = zeros(1,Nt-1);
for i=1:Nt-1
cst(i) = xv(:,i)'*Q*xv(:,i) + uv(:,i)'*R*uv(:,i);
end
disp(['mean cost = ' num2str(mean(cst))]);
disp(['lambda = ' num2str(solution.lambda)]);
%% Delete temporary m-code file
delete(ufile);