# UPDATE THE TABLE OF THE Q FUNCTION. Arguments are a state (s), the action # taken in this state (a), the reward obtained the next time step (r), the new # state entered (sn), the old Q table, the discount to apply to rewards (gamma), # the learning rate (alpha), and the probability of taking a random action # (epsilon). Returns the updated Q table. Qlearn = function (s, a, r, sn, Q, gamma, alpha, epsilon) { Q[s,a] = (1-alpha) * Q[s,a] + alpha * (r + gamma * (epsilon*mean(Q[sn,]) + (1-epsilon)*max(Q[sn,]))) Q } # SIMULATE Q-LEARNING IN SOME WORLD. Arguments are a function generating an # initial state of the world (init), the function defining the transitions # and rewards (world), the discount rate for rewards (gamma), the learning # rate (alpha), the probability of taking a random action (epsilon), and the # number of steps to simulate (steps). # # The global variables n.states and n.actions should contain the number of # states (labelled from 1 up ) and the number of actions (labelled from 1 up). # # Returns a list containing a matrix with the history of the simulation, one # row per time step, and the final Q function table. simulate = function (init, world, gamma, alpha, epsilon, steps) { history = matrix(NA,steps,6) colnames(history) = c("t","s","a","r","rs","sn") Q = matrix(0,n.states,n.actions) s = init() for (t in 1:steps) { if (runif(1)