"""A demo for the regret-matching algorithm (Hart and Mas-Colell 2000) for various
N-player normal form games.
For 2-player zero-sum game, regret matching algorithm's average strategy provably converges to Nash.
However, it seems to work for more than 2-player games as well.
Usage:
Run::
$ python3 imperfecto/demos/regret_matching_demo.py --help
to print the available options.
"""
import logging
from pprint import pprint
from typing import Type
import click
import numpy as np
from imperfecto.algos.regret_matching import RegretMatchingPlayer
from imperfecto.games.bar_crowding import BarCrowdingGame
from imperfecto.games.game import ExtensiveFormGame
from imperfecto.games.prisoner_dilemma import PrisonerDilemmaGame
from imperfecto.games.rock_paper_scissor import (
AsymmetricRockPaperScissorGame,
RockPaperScissorGame,
)
from imperfecto.misc.evaluate import evaluate_strategies
from imperfecto.misc.trainer import NormalFormTrainer
[docs]def generate_random_prob_dist(n_actions: int) -> np.ndarray:
"""Generate a random probability distribution for a game.
Args:
n_actions: The number of actions in the game.
Returns:
A numpy array of shape (n_actions,).
"""
return np.random.dirichlet(np.ones(n_actions), size=1)[0]
[docs]def verify_nash_strategy(Game: Type[ExtensiveFormGame], nash_strategy: np.ndarray,
n_iters: int = 10000, n_random_strategies: int = 5) -> None:
"""
Verifies (roughly) that the given strategy is a Nash equilibrium. The idea of
Nash strategy is only pplicable for 2-player (normal form).
zero-sum games. We verify Nash strategy by pitting the strategy against random opponent's strategy.
The Nash strategy should be unexploitable (i.e., having the payoff >= 0).
Args:
Game: The game to verify the strategy for.
nash_strategy: The strategy to verify.
n_iters: The number of iterations to run the game for.
"""
print(f"In {Game.__name__}, the nash strategy {nash_strategy} is unexploitable by "
"any other strategy.")
print("That means that it will always have 0 payoff against all strategy[p, q, 1-p-q]. Do the math with the "
"normal form game matrix to convince yourself that this is true.")
print("In this example, we will fix P0 strategy to be nash strategy, and vary P1 strategy")
print("\n")
P0_uniform_strategy = {"P0": nash_strategy}
strategies = [generate_random_prob_dist(
len(Game.actions))for _ in range(n_random_strategies)]
print("P1 strategy \t \t payoff")
print("-" * 40)
with np.printoptions(suppress=True, precision=2):
for strat in strategies:
P1_strategy = {"P1": strat}
avg_payoffs = evaluate_strategies(Game, [
P0_uniform_strategy, P1_strategy], n_iters=n_iters)
print(f"{np.array2string(strat):20} \t {avg_payoffs}")
print()
[docs]def to_train_regret_matching(Game: Type[ExtensiveFormGame], n_iters: int = 10000) -> None:
"""Train all players simultaneously by the regret-matching algorithm and print the average
strategies and payoffs.
Args:
Game: The game to train the players for.
n_iters: The number of iterations to run the game for.
"""
players = [RegretMatchingPlayer(name=f"RM{i}", n_actions=len(Game.actions))
for i in range(Game.n_players)]
trainer = NormalFormTrainer(Game, players, n_iters=n_iters)
avg_payoffs = trainer.train()
with np.printoptions(suppress=True, precision=2):
print(
f'Training regret-matching players for game {Game.__name__} after {n_iters} iters:')
print('average strategies:')
pprint(trainer.avg_strategies)
print(f'eps_rewards: {avg_payoffs}')
print()
filenames = {
'strategy_file': 'strategy.json',
'avg_strategy_file': 'avg_strategy.json',
'payoff_history_file': 'payoff_history.json',
}
trainer.store_data(filenames)
[docs]def to_train_delay_regret_matching(Game: Type[ExtensiveFormGame], n_iters: int = 10000, freeze_duration: int = 10):
"""Train all players by the regret-matching algorithm and print the average strategies and payoffs.
We alternatively freeze one player's strategy and train the other player(s). This is a process of
co-evolution.
Args:
Game: The game to train the players for.
n_iters: The number of iterations to run the game for.
freeze_duration: The number of iterations to freeze the strategy of the player that is not being trained.
"""
assert 0 < freeze_duration < n_iters
# no. of intervals where someone is frozen
freeze_interval = n_iters // freeze_duration
players = [RegretMatchingPlayer(name=f"RM{i}", n_actions=len(Game.actions))
for i in range(Game.n_players)]
trainer = NormalFormTrainer(Game, players, n_iters=freeze_duration)
for _ in range(freeze_interval):
for i in range(Game.n_players):
# train player i freezing the rest
freeze_list = [player for player_id,
player in enumerate(players) if player_id != i]
trainer.train(freeze_ls=freeze_list)
with np.printoptions(suppress=True, precision=2):
print(
f'Training delay regret-matching players for game {Game.__name__} after {n_iters} iters:')
print('average strategies:')
pprint(trainer.avg_strategies)
print(f'eps_rewards: {trainer.avg_payoffs}')
print()
filenames = {
'strategy_file': 'strategy.json',
'avg_strategy_file': 'avg_strategy.json',
'payoff_history_file': 'payoff_history.json',
}
trainer.store_data(filenames)
@click.command()
@click.option("--game", type=click.Choice(["RockPaperScissorGame",
"AsymmetricRockPaperScissorGame", "BarCrowdingGame", "PrisonerDilemmaGame"]),
default="RockPaperScissorGame", help="The game to demo.")
@click.option("--n_iters", type=int, default=10000, help="The number of iterations to run the game for.")
@click.option("--train_regret_matching", is_flag=True, default=False, help="Train regret matching players.")
@click.option("--train_delay_regret_matching", is_flag=True, default=False,
help="Train delay regret matching players.")
@click.option("--verbose_level", type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]),
default="INFO", help="The verbosity level of the game.")
@click.option("--seed", type=int, default=0, help="The random seed to use for the game.")
def main(game: str, n_iters: int = 10000, train_regret_matching: bool = False,
train_delay_regret_matching: bool = False,
verbose_level: str = "INFO", seed: int = 0) -> None:
"""Demo for N-player normal-form games using the regret-matching algorithm.
Available games:
----------------
RockPaperScissorGame, AsymmetricRockPaperScissorGame, BarCrowdingGame, PrisonerDilemmaGame
Available options:
------------------
--game: The game to demo.
--n_iters: The number of iterations to run the game for.
--train_regret_matching: Whether to train regret matching players.
--train_delay_regret_matching: Whether to train delay regret matching players.
We will also show the Nash equilibrium for 2-player zero-sum games so the user can verify that
the regret matching players' strategies indeed converge to Nash.
"""
logging.basicConfig(level=getattr(
logging, verbose_level), format="%(message)s")
np.random.seed(seed)
Game_dict = {
"RockPaperScissorGame": RockPaperScissorGame,
"AsymmetricRockPaperScissorGame": AsymmetricRockPaperScissorGame,
"BarCrowdingGame": BarCrowdingGame,
"PrisonerDilemmaGame": PrisonerDilemmaGame,
}
nash_strategy_dict = {
"RockPaperScissorGame": np.array([0.5, 0.5, 0.0]),
"AsymmetricRockPaperScissorGame": np.array([0.5, 0.5, 0.0]),
}
Game = Game_dict[game]
if game in nash_strategy_dict:
nash_strategy = nash_strategy_dict[game]
verify_nash_strategy(Game, nash_strategy, n_iters=n_iters)
if train_regret_matching:
to_train_regret_matching(Game, n_iters=n_iters)
if train_delay_regret_matching:
to_train_delay_regret_matching(Game, n_iters=n_iters)
if __name__ == "__main__":
main()