# -*- coding: utf-8 -*-
"""
Created on Wed Aug 5 10:28:21 2020
@author: kblackw1
"""
import numpy as np
class Environment:
"""Class for a reinforcement learning environment"""
def __init__(self, nstate, naction):
"""Create a new environment"""
self.Ns = nstate # number of states
self.Na = naction # number of actions
def start(self):
"""start an episode"""
# randomly pick a state
self.state = np.random.randint(self.Ns)
return self.state
def step(self, action):
"""step forward given an action"""
# random reward
self.reward = np.random.random() # between 0 and 1
# random state transition
self.state = np.random.randint(self.Ns)
return self.reward, self.state
def visual(self):
"""visualize envirnment variables"""
print("r =", self.reward, "; s = ", self.state)
class Agent:
"""Class for a reinforcement learning agent"""
def __init__(self, nstate, naction):
"""Create a new agent"""
self.Ns = nstate # number of states
self.Na = naction # number of actions
def start(self, state):
"""first action, without reward feedback"""
# randomly pick an action
self.action = np.random.randint(self.Na)
return self.action
def step(self, reward, state):
"""learn by reward and take an action"""
# do nothing for reward
# randomly pick an action, not constrained by current state
self.action = np.random.randint(self.Na)
return self.action
def visual(self):
"""visualize agent variables"""
print("a =", self.action)