num = 10; time_limit = 10000;
agent = array2table(zeros(num,4),'VariableNames',{'loc','prev_loc','loc_time','collected'});
env = randi(10,10,10); env(1,1) = 0;
agent.loc = ones(num,1);
agent.loc_time = ones(num,1);
tic
for t = 1:time_limit
for i = 1:num
if agent.loc_time(i) == t
if env(agent.loc(i)) > 0
[agent(i,:),env] = return_with_reward(agent(i,:),env);
else
[agent(i,:),env] = find_reward(agent(i,:),env);
end
end
end
end
toc
function [agent,env] = find_reward(agent,env)
agent.prev_loc = agent.loc;
agent.loc = randi([2,length(env(:))],1);
agent.loc_time = agent.loc_time + abs(agent.loc - agent.prev_loc);
end
function [agent,env] = return_with_reward(agent,env)
loc = agent.loc;
env(loc) = max(0, env(loc) - 1);
agent.collected = agent.collected + 1;
agent.prev_loc = loc;
agent.loc = 1;
agent.loc_time = agent.loc_time + abs(agent.loc - agent.prev_loc);
end