% MBParameters=struct(alpha_MB,Valpha_MB,…
% StoppingPathThresh,VStoppingPathThresh,…
% StoppingPathLength,VStoppingPathLength,…
% StoppingPathThreshReward=VStoppingPathThreshReward,…
% stoppingThresh,VstoppingThresh,…
% MaxItr,VMaxItr,…
% StopSim,VStopSim,…
% SelectActionSim,VSelectActionSim,…
% stopOnUncertaintyVal,VstopOnUncertaintyVal);
% QTablePerm=struct(QTablePermMean,VQTablePermMean,…
% QTableVar,VQTableVar)
function [Qtable_Integrated,N_itr]=runInternalSimulation(QTablePerm,currentState,Model,MBParameters,resetSim)
persistent stateActionVisitCounts;
if isempty(stateActionVisitCounts) || resetSim
stateActionVisitCounts=zeros(Model.Num_States,Model.Num_Actions);
end
path_num=1;
path_step=1;
N_itr=1;
Qtable_Integrated= QTablePerm;
%% begin_simulation_iteration_loop
while(N_itr<=MBParameters.MaxItrMB)
%display('runInternalSimulation')
% while(N_itr<=10)
currentStateSim= currentState;
path_end=0;
reset=resetSim;
path_step=0;
%begin_path_simulation_iteration_loop
while(path_end==0)
%simulate one step
baseEF=MBParameters.explorationFactor;
%MBParameters.explorationFactor=10*MBParameters.explorationFactor;
actionSim=selectActionSim(currentStateSim,Model,MBParameters, Qtable_Integrated,stateActionVisitCounts);
MBParameters.explorationFactor=baseEF;
[nextStateSim,rewardSim,valid]=doActionInModel(actionSim,Model,currentStateSim,MBParameters);
if ~valid
%display('new state cannot simulate');
%displayModel(Model);
%path_end=1;
%N_itr
break ;
end;
stateActionVisitCounts(currentStateSim,actionSim)=stateActionVisitCounts(currentStateSim,actionSim)+1;
%path_end = testNewStateToEndPathSimulation(actionSim, currentStateSim, nextStateSim, rewardSim, path_step, Qtable_Integrated,Model, MBParameters);
%path_end=true;
%% update
Qtable_Integrated=updateQTablePerm(Qtable_Integrated, rewardSim, nextStateSim,actionSim , currentStateSim,stateActionVisitCounts, MBParameters,reset);
currentStateSim= nextStateSim;
reset=0;
path_step=path_step+1;
path_end=path_step==6;
%% logging
% Paths_Sampled(path_num).action(path_step)= actionSim;
% Paths_Sampled(path_num).state(path_step)= currentStateSim;
% Paths_Sampled(path_num).reward(path_step)= rewardSim;
% Paths_Sampled(path_num).path_end(path_step)=path_end;
% Paths_Sampled(path_num).steps=path_step;
% Paths_Sampled(path_num).last_state=nextStateSim;
%updateQtable_IntegratedWithSimulatedPathTDLambda(nextStateSim, rewardSim, currentStateSim,actionSim,Model,MBParameters,Model,stateActionVisitCounts);
%end_path_simulation_iteration_loop
end
N_itr=N_itr+1;
%display(['simulation done, steps:' num2str(path_step)])
end
%updateQtable_IntegratedWithSimulatedPathBacktracking(path_num, Paths_Sampled,Qtable_Integrated,Model,MBParameters);
%path_num= path_num+1;
%end_simulation_iteration_loop
end