% MBParameters=struct(alpha_MB,Valpha_MB,…
% gamma_MB,Vgamma_MB,…
% lambda_MB,Vlambda_MB,…
% StoppingPathThresh,VStoppingPathThresh,…
% StoppingPathLength,VStoppingPathLength,…
% StoppingPathThreshReward=VStoppingPathThreshReward,…
% stoppingThresh,VstoppingThresh,…
% MaxItr,VMaxItr,…
% StopSim,VStopSim,…
% SelectActionSim,VSelectActionSim,…
% stopOnUncertaintyVal,VstopOnUncertaintyVal);
function updateQtable_IntegratedWithSimulatedPathTDLambda(nextStateSim, rewardSim, currentStateSim,actionSim,Model,MBParameters,environment,stateActionVisitCounts)
%% init
persistent elegibilityTraceSim;
global Qtable_Integrated_loc;
if isempty(elegibilityTraceSim)
elegibilityTraceSim=zeros(size(Qtable_Integrated_loc.mean));
end
%% getNextAction
nextAct=selectActionSim(currentStateSim,Model,MBParameters, Qtable_Integrated_loc,environment,stateActionVisitCounts);
qnew=Qtable_Integrated_loc.mean(nextStateSim,nextAct);
qOld=Qtable_Integrated_loc.mean(currentStateSim,actionSim);
d= rewardSim+ MBParameters.gamma_MB*qnew -qOld ;
if MBParameters.gamma_MB>0
elegibilityTraceSim=MBParameters.lambda_MB*MBParameters.gamma_MB*elegibilityTraceSim;
elegibilityTraceSim(currentStateSim,actionSim)=1;
Qtable_Integrated_loc.mean= Qtable_Integrated_loc.mean+MBParameters.alpha_MB*elegibilityTraceSim*d;
else
Qtable_Integrated_loc.mean(currentStateSim,actionSim)= Qtable_Integrated_loc.mean(currentStateSim,actionSim)+MBParameters.alpha_MB*d;
end
end