added entropy graph

This commit is contained in:
2025-08-31 23:35:06 -06:00
parent fc04bdcd97
commit 35e90ad016

64
main.py
View File

@ -163,13 +163,12 @@ class A2C(nn.Module):
self.actor.eval()
fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(15,5))
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(15,5))
fig.suptitle(
f"training plots for the Simple Reference environment"
)
def drawPlots():
rolling_length = 20
agent0_average = []
agent1_average = []
@ -177,7 +176,9 @@ def drawPlots():
agent0_average_aloss = []
agent1_average_closs = []
agent1_average_aloss = []
window = 20
agent0_average_ent = []
agent1_average_ent = []
window = 100
for ind in range(len(agent0_rewards) - window + 1):
agent0_average.append(np.mean(agent0_rewards[ind:ind+window]))
for ind in range(len(agent1_rewards) - window + 1):
@ -190,23 +191,35 @@ def drawPlots():
agent1_average_closs.append(np.mean(agent1_critic_loss[ind:ind+window]))
for ind in range(len(agent1_actor_loss) - window + 1):
agent1_average_aloss.append(np.mean(agent1_actor_loss[ind:ind+window]))
axs[0].cla()
axs[0].plot(agent0_average, label="Agent 0")
axs[0].plot(agent1_average, label="Agent 1")
axs[0].legend()
axs[0].set_title("Rewards over Tme")
for ind in range(len(agent0_entropy) - window + 1):
agent0_average_ent.append(np.mean(agent0_entropy[ind:ind+window]))
for ind in range(len(agent1_entropy) - window + 1):
agent1_average_ent.append(np.mean(agent1_entropy[ind:ind+window]))
axs[1].cla()
axs[1].plot(agent0_average_closs, label="Agent 0")
axs[1].plot(agent1_average_closs, label="Agent 1")
axs[1].legend()
axs[1].set_title("Critic Loss over Tme")
axs[0][0].cla()
axs[0][0].plot(agent0_average, label="Agent 0")
axs[0][0].plot(agent1_average, label="Agent 1")
axs[0][0].legend()
axs[0][0].set_title("Rewards over Tme")
axs[2].cla()
axs[2].plot(agent0_average_aloss, label="Agent 0")
axs[2].plot(agent1_average_aloss, label="Agent 1")
axs[2].legend()
axs[2].set_title("Actor Loss over Tme")
axs[1][0].cla()
axs[1][0].plot(agent0_average_closs, label="Agent 0")
axs[1][0].plot(agent1_average_closs, label="Agent 1")
axs[1][0].legend()
axs[1][0].set_title("Critic Loss over Tme")
axs[1][1].cla()
axs[1][1].plot(agent0_average_aloss, label="Agent 0")
axs[1][1].plot(agent1_average_aloss, label="Agent 1")
axs[1][1].legend()
axs[1][1].set_title("Actor Loss over Tme")
axs[0][1].cla()
axs[0][1].ticklabel_format(style='plain')
axs[0][1].plot(agent0_average_ent, label="Agent 0")
axs[0][1].plot(agent1_average_ent, label="Agent 1")
axs[0][1].legend()
axs[0][1].set_title("Actor Entropy over Tme")
agent0_critic_loss = []
agent0_actor_loss = []
@ -214,6 +227,8 @@ agent1_critic_loss = []
agent1_actor_loss = []
agent0_rewards = []
agent1_rewards = []
agent0_entropy = []
agent1_entropy = []
def train(n_episodes, gamma, ent_coef, actor_lr, critic_lr):
global agent0_critic_loss
@ -222,12 +237,16 @@ def train(n_episodes, gamma, ent_coef, actor_lr, critic_lr):
global agent1_actor_loss
global agent0_rewards
global agent1_rewards
global agent0_entropy
global agent1_entropy
agent0_critic_loss = []
agent0_actor_loss = []
agent1_critic_loss = []
agent1_actor_loss = []
agent0_rewards = []
agent1_rewards = []
agent0_entropy = []
agent1_entropy = []
env = simple_reference_v3.parallel_env(max_cycles = 50, render_mode="rgb_array")
#obs_space
#action_space
@ -282,13 +301,13 @@ def train(n_episodes, gamma, ent_coef, actor_lr, critic_lr):
agent_0_rewards.append(rewards["agent_0"])
agent_0_probs.append(agent_0_log_probs)
agent_0_pred.append(agent_0_state_val)
agent_0_ents.append(agent_0_ent)
agent_0_ents.append(agent_0_ent.item())
agent_0_mask.append( 1 if env.agents else 0)
agent_1_rewards.append(rewards["agent_1"])
agent_1_probs.append(agent_1_log_probs)
agent_1_pred.append(agent_1_state_val)
agent_1_ents.append(agent_1_ent)
agent_1_ents.append(agent_1_ent.item())
agent_1_mask.append( 1 if env.agents else 0)
#eve_closs, eve_aloss = eve.get_losses([rewards["eve_0"]], eve_log_probs, eve_state_val, eve_ent, [1], gamma, ent_coef)
#print("Eve: Critic Loss: " + str(eve_closs.item()) + " Actor Loss: " + str(eve_aloss.item()))
@ -307,6 +326,9 @@ def train(n_episodes, gamma, ent_coef, actor_lr, critic_lr):
agent0_rewards.append(np.array(agent_0_rewards).sum())
agent1_rewards.append(np.array(agent_1_rewards).sum())
#print(np.array(agent_0_ents).sum())
agent0_entropy.append(np.array(agent_0_ents).sum())
agent1_entropy.append(np.array(agent_1_ents).sum())
@ -321,7 +343,7 @@ def train(n_episodes, gamma, ent_coef, actor_lr, critic_lr):
#environment hyperparams
n_episodes = 1000
train(10000, 0.999, 0, 0.0001, 0.0001)
train(10000, 0.999, 0.01, 0.0001, 0.0005)
best = 1
for gamma in np.arange(0.999, 0.99, -0.1):
for ent_coef in np.arange(0, 0.1, 0.01):