simple_crypto/main.py

import os

import numpy as np
import torch
import torch.nn as nn
from torch import optim

import matplotlib.pyplot as plt
from tqdm import tqdm

import gymnasium as gym
from pettingzoo.mpe import simple_crypto_v3

class A2C():
    def __init__(self):
        pass

    def forward(self):
        pass

    def select_action(self):
        pass

    def get_losses(self):
        pass

    def update_params(self):
        pass

#environment hyperparams
n_episodes = 10

#agent hyperparams
ent_coef = 0.01 # coefficient for entropy bonus
actor_lr = 0.001
critic_lr = 0.005

#environment setup
env = simple_crypto_v3.parallel_env(render_mode="human")

#obs_space
#action_space

device = torch.device("cpu")

#init the agent
#agent = A2C(obs_shape, action_shape, device, critic_lr, actor_lr, n_envs)

#wrapper to record statistics
#env_wrapper_stats = gym.wrappers.vector.RecordEpisodeStatistics(
#    env, buffer_length=n_episodes
#)
observations, infos = env.reset()
done = False
while env.agents:

    actions = {agent: env.action_space(agent).sample() for agent in env.agents}
    observations, rewards, terminations, truncations, infos = env.step(actions)

env.close()