package grsnc.binb; import lnsc.*; import lnsc.page.*; import java.util.Random; /** *

* Title: BG Math Model (Francois Rivest, 10 Mar 2006)
* Description: Agent based a Mathematical Model of the Basal Ganglia
* Copyright: Copyright (c) 2004
* Company: UdeM
* * Note: Eligibility traces are bounded between -1 and 1. (Actors still untraced) * *

* * *

Summary. In this model, the critic uses standard TD formula, * while the actor uses a natural gradient that gives biological three-synaptic * update rule. Although the critic part is not totally biologically plausible, * it is the same as the Suri&Schultz1999Model equations. It would be interesting * to find a biologically plausible equivalent formula.

* *

* Implementation details: *

Si(t) is stimuli given by StateRepresentation *
r(t) is the primary reward *
Unless state is final, returnReward value is processed in next call to * requestAction. If it is final, it is process in episodeTerminated. *
Critic: *
- Wik is weights between stimuli Si(t) and prediction of stimuli Pk(t) *
- Pk(t) = sum(Wik*Si(t)) is reward prediction k *
- P(t) = sum(Pk(t)) *
- e(t) = r(t) + gamma*P(t) - P(t-1) *
- eti(t) = lambda*eti(t-1) + Si(t-1) *
- gamma = .98 (discounting factor) *
- learning rule: * Wik(t) = Wik(t-1) + etac*e(t)*eti(,t) *
- initialisation: * Wik(t=0) > 0 *
*
Actor: *
- Wij is weights between stimuli Si(t) and action Aj *
- Aj(t) = sum(Wij*Si(t)) is actor activity k *
- Aj'(t) = 1 if Aj'(t)>0 and Aj'(t) > Al'(t) for all l not j *
- learning rule: * Wij(t) = Wij(t-1) + etaa*e(t)*Aj'(t-1)*Si(t-1) *
- initialisation: * Wij(t=0) > 0 *
*
* *
* Assumptions:

*
*
newEpisode & first requestAction have the same state *
requestAction & next returnReward have the same state *
last returnReward and endEpisode have the same state * * * @author François Rivest * @version 1.1 */ public class Rivest06 extends AbstractObservableAgent { /*********************************************************************/ //Serial Version UID /** Serial version UID. */ static final long serialVersionUID = 3615829507271034044L; /**********************************************************************/ //Private fields protected Random rnd = new Random(); /** Indicates whether or not the model agent should be in evaluation mode * only (no learning). */ //protected boolean m_EvalMode; /** StateRepresentation converting state into real-valued vector. */ protected StateRepresentation m_StateRep; /** Number of actor neurons. */ protected int m_ActorCount; /** Number of critic neurons. */ protected int m_CriticCount; /** Stimuli to actor weights. */ protected double[][] m_Wa; /** Stimuli to critic weights. */ protected double[][] m_Wc; /** Previous stimuli activty. */ protected transient double[] m_PrevStimuli; /** Previous critics activity (useless). */ protected transient double[] m_PrevCritics; /** Previous prediction activity. */ protected transient double m_PrevPrediction; /** Previous actor activity. */ protected transient double[] m_PrevAction; /** Previous eligibility trace (for critic only). */ protected transient double[] m_PrevETraces; /** Reward. */ protected transient double m_Reward; /** Indicate no first state yet. */ protected transient boolean m_Reset; /** Discounting factor. */ protected double m_Gamma = 0.98; /** Eligibility trace discount factor. */ protected double m_Lambda = .9; /** Actor learning rate. */ protected double m_Etaa = .01; /** Critic learning rate. */ protected double m_Etac = .01; /** Initialization weight factor. */ protected double m_InitWeightFactor = .1; /** Description DataSet for toDataSet (updated at the end of processContext). */ protected transient DataSet m_Description; /**********************************************************************/ //Constructors /** Construct an agent based on Francois Rivest May 17 BG Math Model. * @param newActionCount Number of action neurons * @param newCriticCount Number of critic neurons. * @param newStateRepresentation State representation or stimuli. * @param newLearningRate Actor & Critic learning rates. * @param newInitWeightFactor Initialization weight factor. */ public Rivest06(int newActorCount, int newCriticCount, StateRepresentation newStateRep, double newLearningRate, double newInitWeightFactor) { m_IsEvaluable = true; m_ActorCount = newActorCount; m_CriticCount = newCriticCount; m_StateRep = newStateRep; m_Etaa = newLearningRate; m_Etac = newLearningRate; m_InitWeightFactor = newInitWeightFactor; m_Wa = new double[m_ActorCount][m_StateRep.getOutputCount()]; m_Wc = new double[m_CriticCount][m_StateRep.getOutputCount()]; initWeights(m_Wa); initWeights(m_Wc); } /**********************************************************************/ //Agent interface implementation /** Starts by filling previous stimuli, prediction and action. */ public void newEpisode(State newState) { //Save previous activity m_PrevStimuli = new double[m_StateRep.getOutputCount()]; m_PrevPrediction = 0.0; m_PrevAction = new double[m_ActorCount]; m_PrevCritics = new double[m_CriticCount]; m_PrevETraces = new double[m_StateRep.getOutputCount()]; //Initial reward = 0 m_Reward = 0; m_Reset = true; m_StateRep.reset(); } /** Computes actors and critics activities with no-reward given at time t.*/ public Action requestAction(State currentState) { int actionIndex = processContext(currentState, m_Reward); return currentState.getActions()[actionIndex];//Assume actions are always the same. } /** Save reward. */ public void returnReward(State resultState, double reward) { if (resultState.isFinal()) { // int actionIndex = processContext(resultState, reward); } else { m_Reward = reward; } } /** Complete processContext. */ public void endEpisode(State finalState) { processContext(finalState, m_Reward); } //public boolean getEvalMode() {return m_EvalMode;} //public void setEvalMode(boolean newEvalMode) { // m_EvalMode = newEvalMode; //} //public boolean isEvaluable() {return false;} //public boolean isAdaptive() {return true;} /**********************************************************************/ //Helper function /** Initialize a weight matrices with values higher then 0 (1/aORc_count).*/ protected void initWeights(double[][] w) { double v = m_InitWeightFactor/(double)(w.length); for (int i=0; i