Initial agent/env setup

2025-08-31 11:25:29 -06:00
parent 52c8c21b40
commit 366bc88355
1 changed files with 60 additions and 1 deletions
--- a/main.py
+++ b/main.py
@ -1 +1,60 @@
-from mpe2 import simple_crypto_v3
+import os
 import numpy as np
 import torch
 import torch.nn as nn
 from torch import optim
 import matplotlib.pyplot as plt
 from tqdm import tqdm
 import gymnasium as gym
 from pettingzoo.mpe import simple_crypto_v3
 class A2C():
    def __init__(self):
        pass
    def forward(self):
        pass
    def select_action(self):
        pass
    def get_losses(self):
        pass
    def update_params(self):
        pass
 #environment hyperparams
 n_episodes = 10
 #agent hyperparams
 ent_coef = 0.01 # coefficient for entropy bonus
 actor_lr = 0.001
 critic_lr = 0.005
 #environment setup
 env = simple_crypto_v3.parallel_env(render_mode="human")
 #obs_space
 #action_space
 device = torch.device("cpu")
 #init the agent
 #agent = A2C(obs_shape, action_shape, device, critic_lr, actor_lr, n_envs)
 #wrapper to record statistics
 #env_wrapper_stats = gym.wrappers.vector.RecordEpisodeStatistics(
 #    env, buffer_length=n_episodes
 #)
 observations, infos = env.reset()
 done = False
 while env.agents:
    actions = {agent: env.action_space(agent).sample() for agent in env.agents}
    observations, rewards, terminations, truncations, infos = env.step(actions)
 env.close()