function H = vi( beta, inf, aname, A, N, R, T ) % % function H = vi( beta, inf, A, N, R, T ) % V(s,i) = R(s) + beta * max_a EV_s(a) % EV_s(a) = sum_s2 T(s,a,s2) * V(s2,i-1) % % to run: % load( 'model.mat' ); % H = vi( 100 ); % V = zeros( N, inf ); for s=1:N, V(s,1) = R(s); end H = V; for iter=2:inf % iterate until( V^*_i+1 - V^*_i ) < eps for s=1:N EV_s = zeros(A,1); for a = 1:A val_a = 0; for s2=1:N val_a = val_a + T(s,s2,a)*V(s2,iter-1); end EV_s(a) = val_a; end V(s,iter) = R(s) + beta * max( EV_s ); % display policy if( iter == inf ) [high,action] = max( EV_s ); p = aname( action ); val = V(s,iter); strcat( num2str(s), p, num2str(val) ) end end end H = V;