Source code for imperfecto.misc.trainer

"""A class to train players in an extensive form game.

The players are trained over a number of games by calling each player's ``update_strategy`` method
after each game. The average payoffs and the average strategies during training are recorded.
"""
import logging
from typing import Sequence, Type

import numpy as np

import enlighten
from imperfecto.algos.player import Player
from imperfecto.games.game import ExtensiveFormGame
import pandas as pd


[docs]class NormalFormTrainer: """A class to train players in an extensive form game. Args: Game: The game class to train players in. players: The players to train. n_iters: The number of games to train for. display_status_bar: Whether to display a status bar during training. Attributes: game (ExtensiveFormGame): The game to train players in. n_iter (int): The number of games to train for. ep_strategies (dict): The strategies of each player in each game. ep_payoffs (np.ndarray): The payoffs of each player in each game over the course of this trainer instance. display_status_bar (bool): Whether to display a status bar during training. manager (enlighten.Manager): The enlighten manager to display the status bar. pbar (enlighten.Counter): The enlighten counter to display the status bar. """ def __init__(self, Game: Type[ExtensiveFormGame], players: Sequence[Player], n_iters: int = 100, display_status_bar: bool = True): self.game = Game(players) self.n_iters = n_iters self.ep_strategies = {player: [] for player in self.game.players} self.ep_payoffs = [] self.ep_histories = [] self.display_status_bar = display_status_bar if self.display_status_bar: self.manager = enlighten.get_manager() self.pbar = self.manager.counter( total=self.n_iters, desc=f'RM/{self.game.__class__.__name__}:', unit='ticks')
[docs] def train(self, freeze_ls: Sequence[Player] = []) -> np.ndarray: """Train the players for `n_iter` games using each player's `update_strategy` function. Note: Players in the ``freeze_ls`` list will not be trained. Args: freeze_ls: The players to freeze during training. Returns: The average payoffs of each player during this `train` call. """ num_spaces = 8 * self.game.n_players logging.debug( f"iter | history {' '* num_spaces} | payoffs") for i in range(self.n_iters): history, payoffs = self.game.play() self.ep_payoffs.append(payoffs) self.ep_histories.append(history) for player_id, player in enumerate(self.game.players): self.ep_strategies[player].append( player.strategy) if player not in freeze_ls: player.update_strategy(history, player_id) if self.display_status_bar: self.pbar.update() logging.debug( f"{i:4} {self.game.history_to_str(history):{int(1.5 * num_spaces)}} {np.array2string(np.array(payoffs)):2}") return np.mean(self.ep_payoffs[-self.n_iters:], axis=0)
@property def avg_payoffs(self) -> np.ndarray: """Get the average payoffs of each player over the course of this trainer instance. Returns: The average payoffs of each player. """ return np.mean(self.ep_payoffs, axis=0) @property def avg_strategies(self) -> dict: """Get the average strategies of each player. Returns: The average strategies of each player. """ return {player: np.mean(strategies, axis=0) for player, strategies in self.ep_strategies.items()}
[docs] def moving_avg(self, arr: np.ndarray) -> np.ndarray: """Compute the moving average of an array. Args: arr: The array to compute the moving average of. Returns: The moving average of the array. """ avg = np.cumsum(arr, axis=0, dtype=float) discount = np.repeat( np.arange(1, arr.shape[0] + 1), arr.shape[1], axis=0).reshape(arr.shape) return avg / discount
[docs] def make_df(self, strategies: np.ndarray, player_name: str) -> pd.DataFrame: """Make a dataframe from a strategy array. Args: strategies: The strategies to make a dataframe from. player_name: The name of the player. """ actions = list(map(str, self.game.actions)) # type: ignore df = pd.DataFrame(strategies, columns=actions) df["player"] = player_name df["iter"] = df.index return df
[docs] def store_strategies(self, filenames: dict) -> None: """Store the episodic strategies and average strategies of each player in json files. Args: filenames: The names of the json files to store the strategies and average strategies in. Must have key 'strategy_file' and 'avg_strategy_file' and string values corresponding to the file locations. """ dfs = [self.make_df(np.array(strategies), player.name) for player, strategies in self.ep_strategies.items()] avg_dfs = [self.make_df(self.moving_avg(np.array(strategies)), player.name) for player, strategies in self.ep_strategies.items()] df = pd.concat(dfs, ignore_index=True) avg_df = pd.concat(avg_dfs, ignore_index=True) # write json to file df.to_json(filenames['strategy_file'], orient='records', indent=2) avg_df.to_json(filenames['avg_strategy_file'], orient='records', indent=2)
[docs] def store_histories_payoffs(self, filenames: dict) -> None: """Store the episodic histories and payoffs of each player in json files. Args: filenames: The names of the json files to store the histories and payoffs in. Must have key 'history_payoffs_file'. """ df = pd.DataFrame() df["history"] = list(map(str, self.ep_histories)) df["payoffs"] = self.ep_payoffs df.to_json(filenames['history_payoffs_file'], orient='records', indent=2)
[docs] def store_data(self, filenames: dict) -> None: """Record data about the training process. Args: filenames: The names of the json files to store data in. """ self.store_strategies(filenames) self.store_histories_payoffs(filenames)