package lnsc.page;

/** Run a single agent environement for a given number of step or until success.
 *
 * <P>The reward given to the agent after each action can be either, based on
 * the action cost (-), the resulting state value (+), or given values for
 * normal and goal-leading (an action leading to a final state) action, or any
 * addition of these options.</P>
 *
 * <P>The state should generate a single state in returns of an action (hence
 * technically have isDeterministic = true), not multiple like in a search
 * procedure. It should also return a single observable state on request
 * (wich may be the state itself) and not many (since there is a single agent.
 * </P>
 *
 * <P>Agents may requires the observable state to provide actions in a specific
 * format. But the observable state does not need to implement getNextStates,
 * or getObservableStates, nor does the State need to implement getActions or
 * getActionsEnumerator.</P>
 *
 * <P> If there are obervers, a toDataSet description of each state is sent
 * to them initially, and after the processing of every action (e.i. after
 * {@link State#getNextStates}).</P>
 *
 * @author Francois Rivest
 * @version 1.1
 */


public class SingleAgentEnvironment extends AbstractObservableEnvironment {

	/*********************************************************************/
	//Private fields

	/** Normal action reward. */
	protected double m_Reward = 0.0;

	/** Successful (goal-leading) action reward. */
	protected double m_SuccessReward = 0.0;

	/** Indicates to use action-cost as reward instead of other values. */
	protected boolean m_ActionCostReward = false;

	/** Indicates to use state-value as reward instead of other values. */
	protected boolean m_StateValueReward = false;

	/** Maximum number of steps. */
	protected int m_MaxSteps = 100000;

	/** Indicates whether or not to output dots every 1000 steps.
	 * Default is true.*/
	public boolean showDots = true;

	/*********************************************************************/
	//Constructors

	/** Constructs a server for a single agent with specific rewards.
	 * @param   newReward          Normal action reward.
	 * @param   newSuccessReward   Successful (goal-leading) action reward.
	 * @param   newMaxSteps        Maximum number of steps.
	 */
	public SingleAgentEnvironment(double newReward,
		                          double newSuccessReward,
                    			  int newMaxSteps)
	{
		m_Reward = newReward;
		m_SuccessReward = newSuccessReward;
		m_MaxSteps = newMaxSteps;
	}

	/** Constructs a server for a single agent with action or state based reward.
	 * @param   costReward         Reward -= Action.Cost (if true)
	 * @param   valueReward        Reward += ResultingState.Value (if true)
	 * @param   newMaxSteps        Maximum number of steps.
	 */
	public SingleAgentEnvironment(boolean costReward,
		                          boolean valueReward,
                     			  int newMaxSteps)
	{
		m_ActionCostReward = costReward;
		m_StateValueReward = valueReward;
		m_MaxSteps = newMaxSteps;
	}

	/** Constructs a server for a single agent specifying all reward details.
	 * @param   costReward         Reward -= Action.Cost (if true)
	 * @param   valueReward        Reward += ResultingState.Value (if true)
	 * @param   newReward          Normal action reward.
	 * @param   newSuccessReward   Successful (goal-leading) action reward.
	 * @param   newMaxSteps        Maximum number of steps.
	 */
	public SingleAgentEnvironment(boolean costReward,
		                          boolean valueReward,
								  double newReward,
								  double newSuccessReward,
								  int newMaxSteps)
	{
		m_ActionCostReward = costReward;
		m_StateValueReward = valueReward;
		m_Reward = newReward;
		m_SuccessReward = newSuccessReward;
		m_MaxSteps = newMaxSteps;
	}

	/*********************************************************************/
	//

	/** Run an agent on the task once.
	  * @param     agent     The agent to run on the task.
	  * @param     initState Optional initial state.
	  * @return    Number of step required, total action cost, total reward. */
	public double[] go(Agent agent, State initState)
	{
	   int actionCount = 0;
	   double actionCost = 0;
	   double totalReward = 0;

	   //initial state
	   State s = initState;
	   State os = s.getObservableStates()[0];
	   setChanged();
	   notifyObservers(s.toDataSet());
	   agent.newEpisode(os);

	   //until its done
	   while (!s.isFinal() & actionCount<m_MaxSteps)
	   {

		   //request action
		   Action a = agent.requestAction(os);
		   actionCount++;
		   actionCost += a.getCost();
		   s = s.getNextStates(a)[0];
		   os = s.getObservableStates()[0];
		   setChanged();
		   notifyObservers(s.toDataSet());

		   //return reward
		   double r = 0;
		   if (m_ActionCostReward) {
			   r -= a.getCost();
		   }
		   if (m_StateValueReward) {
			   r += s.getValue();
		   }
		   if (!s.isFinal()) {
			   r += m_Reward;
		   } else {
			   r += m_SuccessReward;
		   }
		   agent.returnReward(os, r);
		   totalReward += r;

		   //dots
		   if (showDots && (actionCount % 1000 == 0)) {System.out.print(".");}

	   }

	   //done
	   agent.endEpisode(os);

	   return new double[] {actionCount, actionCost, totalReward};
   }


}