Initial agent/env setup
This commit is contained in:
61
main.py
61
main.py
@ -1 +1,60 @@
|
|||||||
from mpe2 import simple_crypto_v3
|
import os
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
from torch import optim
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
import gymnasium as gym
|
||||||
|
from pettingzoo.mpe import simple_crypto_v3
|
||||||
|
|
||||||
|
class A2C():
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def forward(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def select_action(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_losses(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def update_params(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
#environment hyperparams
|
||||||
|
n_episodes = 10
|
||||||
|
|
||||||
|
#agent hyperparams
|
||||||
|
ent_coef = 0.01 # coefficient for entropy bonus
|
||||||
|
actor_lr = 0.001
|
||||||
|
critic_lr = 0.005
|
||||||
|
|
||||||
|
#environment setup
|
||||||
|
env = simple_crypto_v3.parallel_env(render_mode="human")
|
||||||
|
|
||||||
|
#obs_space
|
||||||
|
#action_space
|
||||||
|
|
||||||
|
device = torch.device("cpu")
|
||||||
|
|
||||||
|
#init the agent
|
||||||
|
#agent = A2C(obs_shape, action_shape, device, critic_lr, actor_lr, n_envs)
|
||||||
|
|
||||||
|
#wrapper to record statistics
|
||||||
|
#env_wrapper_stats = gym.wrappers.vector.RecordEpisodeStatistics(
|
||||||
|
# env, buffer_length=n_episodes
|
||||||
|
#)
|
||||||
|
observations, infos = env.reset()
|
||||||
|
done = False
|
||||||
|
while env.agents:
|
||||||
|
|
||||||
|
actions = {agent: env.action_space(agent).sample() for agent in env.agents}
|
||||||
|
observations, rewards, terminations, truncations, infos = env.step(actions)
|
||||||
|
|
||||||
|
env.close()
|
Reference in New Issue
Block a user