Source code for pantheonrl.envs.rpsgym.rps

"""
Definition of the Rock-paper-scissors environment.
"""
import gymnasium as gym
import numpy as np

from pantheonrl.common.agents import Agent
from pantheonrl.common.multiagentenv import SimultaneousEnv

ACTION_NAMES = ["ROCK", "PAPER", "SCISSORS"]
ACTION_SPACE = gym.spaces.Discrete(3)
OBS_SPACE = gym.spaces.Discrete(1)

NULL_OBS = 0



[docs]
class RPSWeightedAgent(Agent):
    """
    Random RPS agent based on weights of each action.
    """

    def __init__(self, r=1, p=1, s=1, np_random=np.random):
        weight = r + p + s
        if weight == 0:
            self.c0 = 1.0 / 3
            self.c1 = 2.0 / 3
        else:
            self.c0 = r / weight
            self.c1 = (r + p) / weight
        self.np_random = np_random


[docs]
    def get_action(self, obs):
        roll = self.np_random.rand()
        return 0 if roll < self.c0 else 1 if roll < self.c1 else 2



[docs]
    def update(self, reward, done):
        pass





[docs]
class RPSEnv(SimultaneousEnv):
    """
    Definition of the RPS environment.

    The observation is always 0, and the valid actions are 0, 1, and 2.
    """

    def __init__(self):
        super().__init__([OBS_SPACE] * 2, [ACTION_SPACE] * 2)
        self.history = []


[docs]
    def multi_step(self, ego_action, alt_action):
        outcome = (ego_action - alt_action + 3) % 3
        outcome = -1 if outcome == 2 else outcome

        return (NULL_OBS, NULL_OBS), (outcome, -outcome), True, {}



[docs]
    def multi_reset(self):
        return (NULL_OBS, NULL_OBS)