Source code for pantheonrl.envs.rpsgym.rps

"""
Definition of the Rock-paper-scissors environment.
"""
import gymnasium as gym
import numpy as np

from pantheonrl.common.agents import Agent
from pantheonrl.common.multiagentenv import SimultaneousEnv

ACTION_NAMES = ["ROCK", "PAPER", "SCISSORS"]
ACTION_SPACE = gym.spaces.Discrete(3)
OBS_SPACE = gym.spaces.Discrete(1)

NULL_OBS = 0


[docs] class RPSWeightedAgent(Agent): """ Random RPS agent based on weights of each action. """ def __init__(self, r=1, p=1, s=1, np_random=np.random): weight = r + p + s if weight == 0: self.c0 = 1.0 / 3 self.c1 = 2.0 / 3 else: self.c0 = r / weight self.c1 = (r + p) / weight self.np_random = np_random
[docs] def get_action(self, obs): roll = self.np_random.rand() return 0 if roll < self.c0 else 1 if roll < self.c1 else 2
[docs] def update(self, reward, done): pass
[docs] class RPSEnv(SimultaneousEnv): """ Definition of the RPS environment. The observation is always 0, and the valid actions are 0, 1, and 2. """ def __init__(self): super().__init__([OBS_SPACE] * 2, [ACTION_SPACE] * 2) self.history = []
[docs] def multi_step(self, ego_action, alt_action): outcome = (ego_action - alt_action + 3) % 3 outcome = -1 if outcome == 2 else outcome return (NULL_OBS, NULL_OBS), (outcome, -outcome), True, {}
[docs] def multi_reset(self): return (NULL_OBS, NULL_OBS)