From 35e90ad016a547035d299dafe3cda93e866f89fc Mon Sep 17 00:00:00 2001 From: Samuel Walker Date: Sun, 31 Aug 2025 23:35:06 -0600 Subject: [PATCH] added entropy graph --- main.py | 64 ++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 21 deletions(-) diff --git a/main.py b/main.py index 7439fc6..ff5ea2f 100644 --- a/main.py +++ b/main.py @@ -163,13 +163,12 @@ class A2C(nn.Module): self.actor.eval() -fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(15,5)) +fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(15,5)) fig.suptitle( f"training plots for the Simple Reference environment" ) def drawPlots(): - rolling_length = 20 agent0_average = [] agent1_average = [] @@ -177,7 +176,9 @@ def drawPlots(): agent0_average_aloss = [] agent1_average_closs = [] agent1_average_aloss = [] - window = 20 + agent0_average_ent = [] + agent1_average_ent = [] + window = 100 for ind in range(len(agent0_rewards) - window + 1): agent0_average.append(np.mean(agent0_rewards[ind:ind+window])) for ind in range(len(agent1_rewards) - window + 1): @@ -190,23 +191,35 @@ def drawPlots(): agent1_average_closs.append(np.mean(agent1_critic_loss[ind:ind+window])) for ind in range(len(agent1_actor_loss) - window + 1): agent1_average_aloss.append(np.mean(agent1_actor_loss[ind:ind+window])) - axs[0].cla() - axs[0].plot(agent0_average, label="Agent 0") - axs[0].plot(agent1_average, label="Agent 1") - axs[0].legend() - axs[0].set_title("Rewards over Tme") + for ind in range(len(agent0_entropy) - window + 1): + agent0_average_ent.append(np.mean(agent0_entropy[ind:ind+window])) + for ind in range(len(agent1_entropy) - window + 1): + agent1_average_ent.append(np.mean(agent1_entropy[ind:ind+window])) - axs[1].cla() - axs[1].plot(agent0_average_closs, label="Agent 0") - axs[1].plot(agent1_average_closs, label="Agent 1") - axs[1].legend() - axs[1].set_title("Critic Loss over Tme") + axs[0][0].cla() + axs[0][0].plot(agent0_average, label="Agent 0") + axs[0][0].plot(agent1_average, label="Agent 1") + axs[0][0].legend() + axs[0][0].set_title("Rewards over Tme") - axs[2].cla() - axs[2].plot(agent0_average_aloss, label="Agent 0") - axs[2].plot(agent1_average_aloss, label="Agent 1") - axs[2].legend() - axs[2].set_title("Actor Loss over Tme") + axs[1][0].cla() + axs[1][0].plot(agent0_average_closs, label="Agent 0") + axs[1][0].plot(agent1_average_closs, label="Agent 1") + axs[1][0].legend() + axs[1][0].set_title("Critic Loss over Tme") + + axs[1][1].cla() + axs[1][1].plot(agent0_average_aloss, label="Agent 0") + axs[1][1].plot(agent1_average_aloss, label="Agent 1") + axs[1][1].legend() + axs[1][1].set_title("Actor Loss over Tme") + + axs[0][1].cla() + axs[0][1].ticklabel_format(style='plain') + axs[0][1].plot(agent0_average_ent, label="Agent 0") + axs[0][1].plot(agent1_average_ent, label="Agent 1") + axs[0][1].legend() + axs[0][1].set_title("Actor Entropy over Tme") agent0_critic_loss = [] agent0_actor_loss = [] @@ -214,6 +227,8 @@ agent1_critic_loss = [] agent1_actor_loss = [] agent0_rewards = [] agent1_rewards = [] +agent0_entropy = [] +agent1_entropy = [] def train(n_episodes, gamma, ent_coef, actor_lr, critic_lr): global agent0_critic_loss @@ -222,12 +237,16 @@ def train(n_episodes, gamma, ent_coef, actor_lr, critic_lr): global agent1_actor_loss global agent0_rewards global agent1_rewards + global agent0_entropy + global agent1_entropy agent0_critic_loss = [] agent0_actor_loss = [] agent1_critic_loss = [] agent1_actor_loss = [] agent0_rewards = [] agent1_rewards = [] + agent0_entropy = [] + agent1_entropy = [] env = simple_reference_v3.parallel_env(max_cycles = 50, render_mode="rgb_array") #obs_space #action_space @@ -282,13 +301,13 @@ def train(n_episodes, gamma, ent_coef, actor_lr, critic_lr): agent_0_rewards.append(rewards["agent_0"]) agent_0_probs.append(agent_0_log_probs) agent_0_pred.append(agent_0_state_val) - agent_0_ents.append(agent_0_ent) + agent_0_ents.append(agent_0_ent.item()) agent_0_mask.append( 1 if env.agents else 0) agent_1_rewards.append(rewards["agent_1"]) agent_1_probs.append(agent_1_log_probs) agent_1_pred.append(agent_1_state_val) - agent_1_ents.append(agent_1_ent) + agent_1_ents.append(agent_1_ent.item()) agent_1_mask.append( 1 if env.agents else 0) #eve_closs, eve_aloss = eve.get_losses([rewards["eve_0"]], eve_log_probs, eve_state_val, eve_ent, [1], gamma, ent_coef) #print("Eve: Critic Loss: " + str(eve_closs.item()) + " Actor Loss: " + str(eve_aloss.item())) @@ -307,6 +326,9 @@ def train(n_episodes, gamma, ent_coef, actor_lr, critic_lr): agent0_rewards.append(np.array(agent_0_rewards).sum()) agent1_rewards.append(np.array(agent_1_rewards).sum()) + #print(np.array(agent_0_ents).sum()) + agent0_entropy.append(np.array(agent_0_ents).sum()) + agent1_entropy.append(np.array(agent_1_ents).sum()) @@ -321,7 +343,7 @@ def train(n_episodes, gamma, ent_coef, actor_lr, critic_lr): #environment hyperparams n_episodes = 1000 -train(10000, 0.999, 0, 0.0001, 0.0001) +train(10000, 0.999, 0.01, 0.0001, 0.0005) best = 1 for gamma in np.arange(0.999, 0.99, -0.1): for ent_coef in np.arange(0, 0.1, 0.01):