function Out = RLdecayStayGoStateValues(num_trial,RLparas,decay_rate,DAdep_paras)

%-----
% This file is associated with the following article, which has been provisionally accepted for publication in PLOS Computational Biology
% (initially submitted on May 11, 2016, and provisionally accepted on Sep 14, 2016):
% Authors: Ayaka Kato (1) & Kenji Morita (2)
% Affiliations:
%  (1) Department of Biological Sciences, Graduate School of Science, The University of Tokyo, Tokyo, Japan
%  (2) Physical and Health Education, Graduate School of Education, The University of Tokyo, Tokyo Japan
% Title: Forgetting in Reinforcement Learning Links Sustained Dopamine Signals to Motivation
% Short title: Dynamic Equilibrium in Reinforcement Learning
% Correspondence: Kenji Morita (morita@p.u-tokyo.ac.jp)
%-----
% Out = RLdecayStayGoStateValues(num_trial,RLparas,decay_rate,DAdep_paras);
%
% <input variables>
%   num_trial: number of trials
%	RLparas: [p_alpha, p_beta, p_gamma]
%   decay_rate: rate of the decay of the learned values per time-step, e.g., 0.01
%   DAdep_paras: [DAdep_factor (mulitiplicative), DAdep_start_trial], e.g., [0.25, 251]
% <output variable>
%   Out.TDs: TD error at each time step
%   Out.States: tansitions of states
%   Out.goalsteps: index of time step when reaching the goal
%   Out.Vs_whole: learned values of all the states evaluated at the end of each trial

% number of states
num_state = 7;

% set random numbers
rands_for_action = rand(1,num_trial*num_state*100);

% RL parameters
p_alpha = RLparas(1);
p_beta = RLparas(2);
p_gamma = RLparas(3);

% DA depletion parameters
DAdep_factor = DAdep_paras(1); % the size of value update becomes DAdep_factor * p_alpha * TDerror
DAdep_start_trial = DAdep_paras(2); % index of trial from which DA depletion starts

% reward
Rs = [zeros(1,num_state-1), 1];  % reward at each state (time step) for all the trials

% variables to save
TDs = zeros(1,num_trial*num_state*100); % TD error at each time step for all the trials, initialization
States = zeros(1,num_trial*num_state*100); % tansitions of states, initialization
goalsteps = []; % index of time step when reaching the goal, initialization
Vs_whole = []; % learned values of all the states evaluated at the end of each trial, initialization

% run simulation
current_state = 1; % initialization
Vs_latest = zeros(1,num_state); % latest (i.e., updated at each time step) learned values of all the states, initialization
for k_tstep = 1:num_trial*num_state*100
    current_trial = size(Vs_whole,1)+1;
    States(k_tstep) = current_state;
    if current_state ~= num_state
        prob_chooseNoGo = exp(p_beta * Vs_latest(current_state)) / (exp(p_beta * Vs_latest(current_state)) + exp(p_beta * Vs_latest(current_state+1)));
        next_state = current_state + 1 - (rands_for_action(k_tstep) <= prob_chooseNoGo);
        TDs(k_tstep) = Rs(current_state) + p_gamma * Vs_latest(next_state) - Vs_latest(current_state);
        if current_trial >= DAdep_start_trial
            tmpTDeffective = TDs(k_tstep) * DAdep_factor;
        else
            tmpTDeffective = TDs(k_tstep);
        end
        Vs_latest(current_state) = Vs_latest(current_state) + p_alpha * tmpTDeffective;
        Vs_latest = Vs_latest * (1 - decay_rate); % decay of learned values
        current_state = next_state;
    else % when reaching the goal
        goalsteps = [goalsteps; k_tstep];
        TDs(k_tstep) = Rs(current_state) + p_gamma * 0 - Vs_latest(current_state);
        if current_trial >= DAdep_start_trial
            tmpTDeffective = TDs(k_tstep) * DAdep_factor;
        else
            tmpTDeffective = TDs(k_tstep);
        end
        Vs_latest(current_state) = Vs_latest(current_state) + p_alpha * tmpTDeffective;
        Vs_latest = Vs_latest * (1 - decay_rate); % decay of learned values
        Vs_whole = [Vs_whole; Vs_latest];
        if current_trial == num_trial
            TDs = TDs(1:k_tstep);
            States = States(1:k_tstep);
            break;
        end
        current_state = 1;
    end
end
if length(goalsteps) < num_trial
    error('planned number of trials have not been completed');
else
    Out.TDs = TDs;
    Out.States = States;
    Out.goalsteps = goalsteps;
    Out.Vs_whole = Vs_whole;
end