function  RunExperimentLearning96( maxepisodes,experimentname,varargin)
%RunExperiments, the main function of the experiment
%maxepisodes: maximum number of episodes to run the experiment

parser = inputParser;
defaultMethod='QLearning';
expectedMethods= {'QLearning','DP','KTD','MF+MB','FWBW_NoLearning','FW_NoLearning','Q_FW','random','optimal'};


addRequired(parser,'maxepisodes',@isnumeric);
addRequired(parser,'experimentname',@isstr);
addOptional(parser,'method',defaultMethod,...
    @(x) any(validatestring(x,expectedMethods)));

addOptional(parser,'Display',false);
addOptional(parser,'useBayesianValueFunction',false);

addOptional(parser,'fileId',0,@isnumeric);

parse(parser,maxepisodes,experimentname,varargin{:});

method=parser.Results.method;

fileId=parser.Results.fileId;
if(strcmp(method,'MF+MB'))
    [balanceFac, alphaMF, alphaMB, therapymethod]=ind2sub([4,2,2,3],(mod(fileId,4*4*3)+1));
    %balanceFac=balanceFac
    alphaMF=0.1;
    alphaMB=0.2;
    mfFactor=0.5;
    mbFactor=0.5;
elseif (strcmp(method,'QLearning'))
    [alphaMF1, temp1]=ind2sub([10,3],(mod(fileId,30)+1));
    mfFactor=1;
    mbFactor=0;
    alphaMF=power(0.5,alphaMF1/2);
    softmaxft=2/(2+temp1);
    alphaMB=0.2;
    GenotypeStr=strcat('MF_T',num2str(softmaxft),'_AMF',num2str(alphaMF));
    balanceFac=1;
    therapymethod=1;
else
    %UCTKs=[0.001 0.01];
    %UCTKs=[0.1 1 5 10];
    MBFs=[0 0.05 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 0.95 1.0];
    alphaMFs=[0.01 0.05 0.1];
    MTSSS=[ 50 75];
    %[genotype1,UCTKidx]=ind2sub([length(MBFs),length(alphaMFs)],(mod(fileId,length(MBFs)*length(UCTKs))+1));
    %[mbFactorF,alphaMFF,therapymethod]=ind2sub([4,3,3],(mod(fileId,4*3*3)+1));
    [mbFactorIDX,alphaMFIDX,therapymethod]=ind2sub([length(MBFs),length(alphaMFs),3],(mod(fileId,length(MBFs)*length(alphaMFs)*3)+1));
    
    
    balanceFac=1;
    softmaxft=0.5;
    MFEF=0.1;
    alphaMB=0.1;
    UCTK=5;%UCTKs(UCTKidx);
    %alphaMFIDX=4;
    MaxTotalSimSteps=50;%MTSSS(alphaMFIDX);%500+250*maxsteps;
    mbsoftmaxft=1;%* (temp1*temp1)/9;
    
    
    mbFactor=MBFs(mbFactorIDX);
    alphaMF=alphaMFs(alphaMFIDX);
    

    mfFactor=min(max(1-mbFactor,0),1);
    
    GenotypeStr=strcat('MBF',num2str(mbFactor),'_AMF',num2str(alphaMF));
    %     ?alphaMB=0;%1/10;
end

if therapymethod == 1
    resetModelFactor=0.99;
    resetPolicyFactor=0.99;
    therapyModeLF=2;
    therapyMFLFF=1;
        
    tpy='TPY-BAL';
elseif therapymethod==2
    resetModelFactor=0.99;
    resetPolicyFactor=0.99*0.8;
    therapyModeLF=2;
    therapyMFLFF=0.01;
    
    tpy='TPY-Cog';
elseif therapymethod==3
    resetModelFactor=0.99*0.8;
    resetPolicyFactor=0.99;
    therapyModeLF=0.1;
    therapyMFLFF=1;
    
    tpy='TPY-Bhv';
end



useBayesianValueFunction=parser.Results.useBayesianValueFunction;

parser.Results

clc
start       = 5;

environmentParameters=struct(...
    'punishmentOutsideLine',-0.3,...
    'sides',[4,4],...
    'n_healthy_goals',1,...
    'rew_Goals',[1],...
    'p_GetRewardGoals',0.5,...
    'p_GetRewardDrug',0.5,...
    'n_drug_goals',15,...
    'rew_DG',10,...
    'rew_DGV',[1,0.5],...
    'pun_DG',-4,...
    'pun_DGV',[-0.5,-0.75],...
    'pDG',[0.75],...
    'pDGV',[0.5,0.25],...
    'escaLation_factor_DG',0.5,...
    'n_base_states',6,...
    'deterministic',false,...
    'autoGen',1,...
    'reducedPunishmentF',0.3,...
    'minFactor',0.001); %#ok<NBRAK>

Environment=CreateEnvironmentMultiStepDeterministic7(environmentParameters);

displayEnvironment(Environment);






%Model       = BuildModel(Environments ); % the Qtable


maxsteps    = 2600;  % maximum number of steps per episode

initDrugStartSteps=50;%maxsteps/200;
therapyStartSteps=1000;ceil(maxsteps/2);
therapyEndSteps=therapyStartSteps+1000;%ceil(6*maxsteps/10);
simulatedTherapy=false;


%alpha       = 0.1;   % learning rate
gamma       = 0.9;  % discount factor
lambda=.9;
epsilon     = 0.1;   % probability of a random action selection

maxInternalNoise=parser.Results.maxepisodes;
plot_graphs=true;


MFParameters=struct(...
    'alpha',alphaMF,...
    'gamma',gamma,...%gamma,...
    'lambda',lambda,...
    'explorationFactor',MFEF,...
    'SelectAction','DYNA',...
    'randExpl',true,...
    'softMax',false,...
    'softMax_t',1,...
    'changeLearningFactorWithCounts',false,...
    'updateQTablePerm',0,...
    'useKTD',useBayesianValueFunction);

MBParameters=struct('alpha',alphaMB,...
    'StoppingPathThreshMB',1e-6,...
    'computePolicyWithDP',0,...
    'stoppingThreshMB',20,...
    'StoppingPathLengthMB',12,...
    'useTrace',false,...
    'MaxItrMB',200,...
    'softMax_t',mbsoftmaxft,...
    'MaxTotalSimSteps',MaxTotalSimSteps,...
    'StopSimMB',5,...
    'knownTransitions',0,...
    'pStopPath',0.05,...
    'stopOnUncertaintyVal',1,...
    'sigma_square_noise_external',0.000001,...
    'noiseVal',0.000001,...
    'mixMFMBPolicies',true,...
    'UseMFToBoothMBFinalVAl',false,...
    'mf_factor',mfFactor,...
    'mb_factor',mbFactor,...
    'noiceVar',0.000001,...
    'runInternalSimulation',0,...
    'updateModel',0,...
    'useMFToDriveMB',false,...
    'computeStatistics',false,...
    'modelLearningFactor',1,...
    'periodToComputeStatistics',50,...
    'SelectAction','UCT',...
    'statisticsStepsPerState',50, ...
    'MBMethod','DPBound',...
    'modelDecay',0.01,...
    'UCTK',UCTK, ...
    'greedy',false, ...
    'mixmode','+',...
    'softMaxMix',false ...
    );
%%

MBReplayParameters = struct(...
    'sigma_square_noise_external',MBParameters.sigma_square_noise_external,...
    'noiseVal',MBParameters.noiseVal,...
    'P_starting_point_high_R',balanceFac,...
    'P_starting_point_Low_R',0.5,...
    'P_starting_point_recent_change',0.1,...
    'restart_sweep_Prob',0.3,... %'frequency',2,...
    'sweeps',4,...
    'sweepsDepth',4,...
    'stepsTotal',8,...
    'P_update_After_Reward',2,....
    'internalReplay',0);

if strcmp(method,'QLearning')
    MFParameters.updateQTablePerm=1;
elseif strcmp(method,'MF+MB')
    MFParameters.updateQTablePerm=1;
    MBParameters.runInternalSimulation=1;
    MBParameters.updateModel=1;
    MBReplayParameters.internalReplay=1;
elseif strcmp(method,'Q_FW')
    MFParameters.updateQTablePerm=1;
    MBParameters.runInternalSimulation=1;
    MBParameters.updateModel=1;
    MBReplayParameters.internalReplay=0;
elseif strcmp(method,'FWBW_NoLearning')
    MBParameters.runInternalSimulation=1;
    MBParameters.updateModel=0;
    MFParameters.updateQTablePerm=0;
    MBReplayParameters.internalReplay=1;
elseif strcmp(method,'FW_NoLearning')
    MBParameters.runInternalSimulation=1;
    MBParameters.updateModel=0;
    MFParameters.updateQTablePerm=0;
    MBReplayParameters.internalReplay=0;
elseif strcmp(method,'random')
    MBParameters.runInternalSimulation=0;
    MBParameters.updateModel=0;
    MFParameters.updateQTablePerm=0;
    MBReplayParameters.internalReplay=0;
elseif strcmp(method,'optimal')
    MBParameters.runInternalSimulation=0;
    MBParameters.updateModel=0;
    MFParameters.updateQTablePerm=0;
    MBReplayParameters.internalReplay=0;
    MBParameters.computePolicyWithDP=1;
    MFParameters.softmax=false;
    MFParameters.explorationFactor=0;
    MFParameters.randExpl=true;
end



MBParameters=setstructfields(MFParameters,MBParameters);

MBReplayParameters=setstructfields(MBParameters,MBReplayParameters);


display('MFparameters')
display(MFParameters)

display('MBFWparameters')
display(MBParameters)

display('MBBWparameters')
display(MBReplayParameters)




for internalNoiseIdx=1:maxInternalNoise
    
    %% init random generator
    currentTime=rem(now,1) %#ok<NOPRT>
    seed=(currentTime*3000000+fileId)%#ok<NOPRT> %679
    rng(seed,'twister')
    randomGeNstruct=rng(seed)  %#ok<NOPRT>
    x = rand(1,5) %#ok<NASGU,NOPRT>
    
    %% run simulation
    display('start')
    
    %     [total_reward,steps,Q,Model,last_actions,last_states,last_reward,last_Q,lastK,lastVar,lastDreward,lastMA_noise_n,last_maxD,last_meanD  ] = ...
    %         Episode_Dim_UCT_UCRL_with_reset( maxsteps, Environment,start,MBReplayParameters);
    %
    
    if  strcmp(method,'DP')
        
        %Environment=changeToTherapyReward(Environment);
        
        [~,~,~,Model,last_actions,last_states,last_reward,last_Q,~,~,~,~,~,~  ] = ...
            Episode_DP( maxsteps,Environment,start,MBParameters);
        arguments=[];
        displayQValueMean(squeeze(last_Q(maxsteps,:,:)),Environment);
        displayEnvironment(Environment);
    else
        
        inputVals.maxsteps=maxsteps;
        inputVals.initDrugStartSteps=initDrugStartSteps;
        inputVals.therapyStartSteps=therapyStartSteps;
        inputVals.therapyEndSteps=therapyEndSteps;
        inputVals.simulatedTherapy=simulatedTherapy;
        inputVals.resetModelFactor=resetModelFactor;
        inputVals.resetPolicyFactor=resetPolicyFactor;
        inputVals.Environment=Environment;
        inputVals.start=start;
        inputVals.parametersMF=MFParameters;
        inputVals.parametersMBFW=MBParameters;
        inputVals.parametersMBBW=MBReplayParameters;
        inputVals.therapyModelLF=therapyModeLF;
        inputVals.therapyMFLFF=therapyMFLFF;
        
        Results=...
            Episode_WithReset_And_Statistics(inputVals);
        last_reward=Results.last_reward;
        last_Q=Results.last_Q;
        last_states=Results.last_states;
        last_actions=Results.last_actions;
        arguments=parser.Results;
        Model=Results.Model;
        displayQValueMean(squeeze(last_Q(maxsteps,:,:)),Environment);
%         displayEnvironment(Environment);
        
    end
    
    
    display('end')
    
    
    %% store and plot results
    if (plot_graphs)
        %                 figure; plot(1:maxsteps,smooth(lastK,50));title('Max Kalman Gain');
        %                 figure;plot(1:maxsteps,smooth(lastVar,50));title('Max Q.var');
        %                 figure;plot(1:maxsteps,smooth(abs(lastDreward),50));title('Max lastDreward');
        %                 figure;plot(1:maxsteps,smooth(abs(lastMA_noise_n),50));title('Max lastMA_noise_n');
        %figure;plot(1:maxsteps,smooth(abs(last_maxD),50),'LineWidth',2);title('Max Q up');
        %figure;plot(1:maxsteps,smooth(abs(last_meanD),50),'LineWidth',2);title('Mean Q up');
        formatOut = 'yy-mm-dd-HH-MM-SS-FFF';
        if (strcmp(method,'MF+MB'))
            dirname=strcat(experimentname,'/data/',GenotypeStr,'-',tpy,'-Met-',method,'-balFac-',num2str(MBReplayParameters.P_starting_point_high_R),'-MFalpha-',num2str(MFarameters.alpha),'-MBalpha-',num2str(MBParameters.alpha),'-tpy-',num2str(therapymethod),'-drg-',num2str(environmentParameters.n_drug_goals),'/',method,'-',datestr(now,formatOut),'-',num2str(fileId));
        else
            dirname=strcat(experimentname,'/data/',GenotypeStr,'-',tpy,'-Met-',method,'-MFfactor-',num2str(MBParameters.mf_factor),'-MBfactor-',num2str(MBParameters.mb_factor),'-MFalpha-',num2str(MFParameters.alpha),'-MBalpha-',num2str(MBParameters.alpha),'-tpy-',num2str(therapymethod),'-drg-',num2str(environmentParameters.n_drug_goals),'/',method,'-',datestr(now,formatOut),'-',num2str(fileId));
        end
        
        d=pwd;
        mkdir(dirname);
        cd(dirname);
        
        % plotUpdates(maxsteps,last_maxD,last_meanD,MFParameters,internalNoiseIdx,maxInternalNoise,noise);
        save 'lastreward.mat' last_reward
        %plotReward(maxsteps,last_reward,MFParameters,internalNoiseIdx,maxInternalNoise,noise);
        save 'lastQ.mat' last_Q
        %PlotQEvolution(maxsteps,MFParameters,Environment,last_Q,internalNoiseIdx,maxInternalNoise,noise);
        save 'last_states.mat' last_states
        %plotVisits(maxsteps,last_states,Environment,MFParameters,internalNoiseIdx,maxInternalNoise,noise);
        save 'lastActions.mat' last_actions
        %plotActionSelection(MFParameters,Environment,maxsteps,last_states,last_actions,internalNoiseIdx,maxInternalNoise,noise);
        save 'Model.mat' Model
        
        save 'Environment.mat' Environment
        save 'timing.mat' maxsteps initDrugStartSteps therapyStartSteps therapyEndSteps
        
        
        qt(:,:)=last_Q(maxsteps,:,:);
        
        save 'q_final.mat' qt
        save 'arguments.mat' arguments
        save 'EnvPar.mat' environmentParameters
        save 'MBBWparameters.mat' MBReplayParameters
        save 'MBFWparameters.mat' MBParameters
        save 'MFparameters.mat' MFParameters
        save 'randomGeNstruct.mat' randomGeNstruct
        cd(d);
        
    end
    
end



end