added entropy graph
This commit is contained in:
64
main.py
64
main.py
@ -163,13 +163,12 @@ class A2C(nn.Module):
|
|||||||
self.actor.eval()
|
self.actor.eval()
|
||||||
|
|
||||||
|
|
||||||
fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(15,5))
|
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(15,5))
|
||||||
fig.suptitle(
|
fig.suptitle(
|
||||||
f"training plots for the Simple Reference environment"
|
f"training plots for the Simple Reference environment"
|
||||||
)
|
)
|
||||||
|
|
||||||
def drawPlots():
|
def drawPlots():
|
||||||
rolling_length = 20
|
|
||||||
|
|
||||||
agent0_average = []
|
agent0_average = []
|
||||||
agent1_average = []
|
agent1_average = []
|
||||||
@ -177,7 +176,9 @@ def drawPlots():
|
|||||||
agent0_average_aloss = []
|
agent0_average_aloss = []
|
||||||
agent1_average_closs = []
|
agent1_average_closs = []
|
||||||
agent1_average_aloss = []
|
agent1_average_aloss = []
|
||||||
window = 20
|
agent0_average_ent = []
|
||||||
|
agent1_average_ent = []
|
||||||
|
window = 100
|
||||||
for ind in range(len(agent0_rewards) - window + 1):
|
for ind in range(len(agent0_rewards) - window + 1):
|
||||||
agent0_average.append(np.mean(agent0_rewards[ind:ind+window]))
|
agent0_average.append(np.mean(agent0_rewards[ind:ind+window]))
|
||||||
for ind in range(len(agent1_rewards) - window + 1):
|
for ind in range(len(agent1_rewards) - window + 1):
|
||||||
@ -190,23 +191,35 @@ def drawPlots():
|
|||||||
agent1_average_closs.append(np.mean(agent1_critic_loss[ind:ind+window]))
|
agent1_average_closs.append(np.mean(agent1_critic_loss[ind:ind+window]))
|
||||||
for ind in range(len(agent1_actor_loss) - window + 1):
|
for ind in range(len(agent1_actor_loss) - window + 1):
|
||||||
agent1_average_aloss.append(np.mean(agent1_actor_loss[ind:ind+window]))
|
agent1_average_aloss.append(np.mean(agent1_actor_loss[ind:ind+window]))
|
||||||
axs[0].cla()
|
for ind in range(len(agent0_entropy) - window + 1):
|
||||||
axs[0].plot(agent0_average, label="Agent 0")
|
agent0_average_ent.append(np.mean(agent0_entropy[ind:ind+window]))
|
||||||
axs[0].plot(agent1_average, label="Agent 1")
|
for ind in range(len(agent1_entropy) - window + 1):
|
||||||
axs[0].legend()
|
agent1_average_ent.append(np.mean(agent1_entropy[ind:ind+window]))
|
||||||
axs[0].set_title("Rewards over Tme")
|
|
||||||
|
|
||||||
axs[1].cla()
|
axs[0][0].cla()
|
||||||
axs[1].plot(agent0_average_closs, label="Agent 0")
|
axs[0][0].plot(agent0_average, label="Agent 0")
|
||||||
axs[1].plot(agent1_average_closs, label="Agent 1")
|
axs[0][0].plot(agent1_average, label="Agent 1")
|
||||||
axs[1].legend()
|
axs[0][0].legend()
|
||||||
axs[1].set_title("Critic Loss over Tme")
|
axs[0][0].set_title("Rewards over Tme")
|
||||||
|
|
||||||
axs[2].cla()
|
axs[1][0].cla()
|
||||||
axs[2].plot(agent0_average_aloss, label="Agent 0")
|
axs[1][0].plot(agent0_average_closs, label="Agent 0")
|
||||||
axs[2].plot(agent1_average_aloss, label="Agent 1")
|
axs[1][0].plot(agent1_average_closs, label="Agent 1")
|
||||||
axs[2].legend()
|
axs[1][0].legend()
|
||||||
axs[2].set_title("Actor Loss over Tme")
|
axs[1][0].set_title("Critic Loss over Tme")
|
||||||
|
|
||||||
|
axs[1][1].cla()
|
||||||
|
axs[1][1].plot(agent0_average_aloss, label="Agent 0")
|
||||||
|
axs[1][1].plot(agent1_average_aloss, label="Agent 1")
|
||||||
|
axs[1][1].legend()
|
||||||
|
axs[1][1].set_title("Actor Loss over Tme")
|
||||||
|
|
||||||
|
axs[0][1].cla()
|
||||||
|
axs[0][1].ticklabel_format(style='plain')
|
||||||
|
axs[0][1].plot(agent0_average_ent, label="Agent 0")
|
||||||
|
axs[0][1].plot(agent1_average_ent, label="Agent 1")
|
||||||
|
axs[0][1].legend()
|
||||||
|
axs[0][1].set_title("Actor Entropy over Tme")
|
||||||
|
|
||||||
agent0_critic_loss = []
|
agent0_critic_loss = []
|
||||||
agent0_actor_loss = []
|
agent0_actor_loss = []
|
||||||
@ -214,6 +227,8 @@ agent1_critic_loss = []
|
|||||||
agent1_actor_loss = []
|
agent1_actor_loss = []
|
||||||
agent0_rewards = []
|
agent0_rewards = []
|
||||||
agent1_rewards = []
|
agent1_rewards = []
|
||||||
|
agent0_entropy = []
|
||||||
|
agent1_entropy = []
|
||||||
|
|
||||||
def train(n_episodes, gamma, ent_coef, actor_lr, critic_lr):
|
def train(n_episodes, gamma, ent_coef, actor_lr, critic_lr):
|
||||||
global agent0_critic_loss
|
global agent0_critic_loss
|
||||||
@ -222,12 +237,16 @@ def train(n_episodes, gamma, ent_coef, actor_lr, critic_lr):
|
|||||||
global agent1_actor_loss
|
global agent1_actor_loss
|
||||||
global agent0_rewards
|
global agent0_rewards
|
||||||
global agent1_rewards
|
global agent1_rewards
|
||||||
|
global agent0_entropy
|
||||||
|
global agent1_entropy
|
||||||
agent0_critic_loss = []
|
agent0_critic_loss = []
|
||||||
agent0_actor_loss = []
|
agent0_actor_loss = []
|
||||||
agent1_critic_loss = []
|
agent1_critic_loss = []
|
||||||
agent1_actor_loss = []
|
agent1_actor_loss = []
|
||||||
agent0_rewards = []
|
agent0_rewards = []
|
||||||
agent1_rewards = []
|
agent1_rewards = []
|
||||||
|
agent0_entropy = []
|
||||||
|
agent1_entropy = []
|
||||||
env = simple_reference_v3.parallel_env(max_cycles = 50, render_mode="rgb_array")
|
env = simple_reference_v3.parallel_env(max_cycles = 50, render_mode="rgb_array")
|
||||||
#obs_space
|
#obs_space
|
||||||
#action_space
|
#action_space
|
||||||
@ -282,13 +301,13 @@ def train(n_episodes, gamma, ent_coef, actor_lr, critic_lr):
|
|||||||
agent_0_rewards.append(rewards["agent_0"])
|
agent_0_rewards.append(rewards["agent_0"])
|
||||||
agent_0_probs.append(agent_0_log_probs)
|
agent_0_probs.append(agent_0_log_probs)
|
||||||
agent_0_pred.append(agent_0_state_val)
|
agent_0_pred.append(agent_0_state_val)
|
||||||
agent_0_ents.append(agent_0_ent)
|
agent_0_ents.append(agent_0_ent.item())
|
||||||
agent_0_mask.append( 1 if env.agents else 0)
|
agent_0_mask.append( 1 if env.agents else 0)
|
||||||
|
|
||||||
agent_1_rewards.append(rewards["agent_1"])
|
agent_1_rewards.append(rewards["agent_1"])
|
||||||
agent_1_probs.append(agent_1_log_probs)
|
agent_1_probs.append(agent_1_log_probs)
|
||||||
agent_1_pred.append(agent_1_state_val)
|
agent_1_pred.append(agent_1_state_val)
|
||||||
agent_1_ents.append(agent_1_ent)
|
agent_1_ents.append(agent_1_ent.item())
|
||||||
agent_1_mask.append( 1 if env.agents else 0)
|
agent_1_mask.append( 1 if env.agents else 0)
|
||||||
#eve_closs, eve_aloss = eve.get_losses([rewards["eve_0"]], eve_log_probs, eve_state_val, eve_ent, [1], gamma, ent_coef)
|
#eve_closs, eve_aloss = eve.get_losses([rewards["eve_0"]], eve_log_probs, eve_state_val, eve_ent, [1], gamma, ent_coef)
|
||||||
#print("Eve: Critic Loss: " + str(eve_closs.item()) + " Actor Loss: " + str(eve_aloss.item()))
|
#print("Eve: Critic Loss: " + str(eve_closs.item()) + " Actor Loss: " + str(eve_aloss.item()))
|
||||||
@ -307,6 +326,9 @@ def train(n_episodes, gamma, ent_coef, actor_lr, critic_lr):
|
|||||||
|
|
||||||
agent0_rewards.append(np.array(agent_0_rewards).sum())
|
agent0_rewards.append(np.array(agent_0_rewards).sum())
|
||||||
agent1_rewards.append(np.array(agent_1_rewards).sum())
|
agent1_rewards.append(np.array(agent_1_rewards).sum())
|
||||||
|
#print(np.array(agent_0_ents).sum())
|
||||||
|
agent0_entropy.append(np.array(agent_0_ents).sum())
|
||||||
|
agent1_entropy.append(np.array(agent_1_ents).sum())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -321,7 +343,7 @@ def train(n_episodes, gamma, ent_coef, actor_lr, critic_lr):
|
|||||||
|
|
||||||
#environment hyperparams
|
#environment hyperparams
|
||||||
n_episodes = 1000
|
n_episodes = 1000
|
||||||
train(10000, 0.999, 0, 0.0001, 0.0001)
|
train(10000, 0.999, 0.01, 0.0001, 0.0005)
|
||||||
best = 1
|
best = 1
|
||||||
for gamma in np.arange(0.999, 0.99, -0.1):
|
for gamma in np.arange(0.999, 0.99, -0.1):
|
||||||
for ent_coef in np.arange(0, 0.1, 0.01):
|
for ent_coef in np.arange(0, 0.1, 0.01):
|
||||||
|
Reference in New Issue
Block a user