Initial agent/env setup

2025-08-31 11:25:29 -06:00
parent 52c8c21b40
commit 366bc88355
1 changed files with 60 additions and 1 deletions
--- a/main.py
+++ b/main.py
@ -1 +1,60 @@
-from mpe2 import simple_crypto_v3
+import os
+
+import numpy as np
+import torch
+import torch.nn as nn
+from torch import optim
+
+import matplotlib.pyplot as plt
+from tqdm import tqdm
+
+import gymnasium as gym
+from pettingzoo.mpe import simple_crypto_v3
+
+class A2C():
+    def __init__(self):
+        pass
+
+    def forward(self):
+        pass
+
+    def select_action(self):
+        pass
+
+    def get_losses(self):
+        pass
+
+    def update_params(self):
+        pass
+
+#environment hyperparams
+n_episodes = 10
+
+#agent hyperparams
+ent_coef = 0.01 # coefficient for entropy bonus
+actor_lr = 0.001
+critic_lr = 0.005
+
+#environment setup
+env = simple_crypto_v3.parallel_env(render_mode="human")
+
+#obs_space
+#action_space
+
+device = torch.device("cpu")
+
+#init the agent
+#agent = A2C(obs_shape, action_shape, device, critic_lr, actor_lr, n_envs)
+    
+#wrapper to record statistics
+#env_wrapper_stats = gym.wrappers.vector.RecordEpisodeStatistics(
+#    env, buffer_length=n_episodes
+#)
+observations, infos = env.reset()
+done = False
+while env.agents:
+    
+    actions = {agent: env.action_space(agent).sample() for agent in env.agents}
+    observations, rewards, terminations, truncations, infos = env.step(actions)
+
+env.close()