diff --git a/chesspp/mcts/baysian_mcts.py b/chesspp/mcts/baysian_mcts.py index 3de12f7..0738d60 100644 --- a/chesspp/mcts/baysian_mcts.py +++ b/chesspp/mcts/baysian_mcts.py @@ -1,139 +1,10 @@ -import math - +import chess import torch.distributions as dist -from chesspp.mcts.i_mcts import * + from chesspp.i_strategy import IStrategy -from chesspp.util_gaussian import gaussian_ucb1, max_gaussian, min_gaussian - - -class BayesianMctsNode(IMctsNode): - def __init__(self, board: chess.Board, strategy: IStrategy, color: chess.Color, parent: Self | None, - move: chess.Move | None, - random_state: random.Random, inherit_result: int | None = None, depth: int = 0, visits: int = 0): - super().__init__(board, strategy, parent, move, random_state) - self.color = color # Color of the player whose turn it is - self.visits = visits - self.result = inherit_result if inherit_result is not None else 0 - self._set_mu_sigma() - self.depth = depth - - def _create_child(self, move: chess.Move) -> IMctsNode: - copied_board = self.board.copy() - copied_board.push(move) - return BayesianMctsNode(copied_board, self.strategy, not self.color, self, move, self.random_state, self.result, - self.depth + 1) - - def _set_mu_sigma(self) -> None: - self.mu = self.result - self.sigma = 1 - - def _is_new_ucb1_better(self, current, new) -> bool: - if self.color == chess.WHITE: - # maximize ucb1 - return new > current - else: - # minimize ubc1 - return new < current - - def _select_best_child(self) -> IMctsNode: - """ - Returns the child with the *best* ucb1 score. - It chooses the child with maximum ucb1 for WHITE, and with minimum ucb1 for BLACK. - """ - - if self.board.is_game_over(): - return self - - best_child = self.random_state.choice(self.children) - best_ucb1 = gaussian_ucb1(best_child.mu, best_child.sigma, self.visits) - for child in self.children: - # if child has no visits, prioritize this child. - if child.visits == 0: - best_child = child - break - - # save child if it has a *better* score, than our previous best child. - ucb1 = gaussian_ucb1(child.mu, child.sigma, self.visits) - if self._is_new_ucb1_better(best_ucb1, ucb1): - best_ucb1 = ucb1 - best_child = child - - return best_child - - def update_depth(self, depth: int) -> None: - self.depth = depth - for c in self.children: - c.update_depth(depth + 1) - - def select(self) -> IMctsNode: - if len(self.children) == 0 or self.board.is_game_over(): - return self - - return self._select_best_child().select() - - def expand(self) -> IMctsNode: - if self.visits == 0: - return self - - for move in self.legal_moves: - self.children.append(self._create_child(move)) - - return self._select_best_child() - - def rollout(self, rollout_depth: int = 4) -> int: - copied_board = self.board.copy() - steps = self.depth - for i in range(rollout_depth): - if copied_board.is_game_over(): - break - - m = self.strategy.pick_next_move(copied_board) - if m is None: - break - - copied_board.push(m) - steps += 1 - - steps = max(1, steps) - score = int(self.strategy.analyze_board(copied_board) / (math.log2(steps) + 1)) - self.result = score - return score - - def _combine_gaussians(self, mu1: float, sigma1: float, mu2: float, sigma2: float) -> tuple[float, float]: - if self.color == chess.WHITE: - return max_gaussian(mu1, sigma1, mu2, sigma2) - else: - return min_gaussian(mu1, sigma1, mu2, sigma2) - - def backpropagate(self, score: int | None = None) -> None: - self.visits += 1 - - if score is not None: - self.result = score - - if len(self.children) == 0: - # leaf node - self._set_mu_sigma() - else: - # interior node - shuffled_children = self.random_state.sample(self.children, len(self.children)) - mu = shuffled_children[0].mu - sigma = shuffled_children[0].sigma - for c in shuffled_children[1:]: - mu, sigma = self._combine_gaussians(mu, sigma, c.mu, c.sigma) - - # if max_sigma == 0: - # max_sigma = 0.001 - self.mu = mu - self.sigma = sigma - - if self.parent: - self.parent.backpropagate() - - def print(self, indent=0): - print("\t" * indent + f"move={self.move}, visits={self.visits}, mu={self.mu}, sigma={self.sigma}") - for c in self.children: - c.print(indent + 1) +from chesspp.mcts.baysian_mcts_node import BayesianMctsNode +from chesspp.mcts.i_mcts import IMcts +from chesspp.mcts.i_mcts_node import IMctsNode class BayesianMcts(IMcts): @@ -172,7 +43,7 @@ class BayesianMcts(IMcts): def get_children(self) -> list[IMctsNode]: return self.root.children - def get_moves(self) -> Dict[chess.Move, dist.Normal]: + def get_moves(self) -> dict[chess.Move, dist.Normal]: res = {} for c in self.root.children: res[c.move] = dist.Normal(c.mu, c.sigma) diff --git a/chesspp/mcts/baysian_mcts_node.py b/chesspp/mcts/baysian_mcts_node.py new file mode 100644 index 0000000..a94a38a --- /dev/null +++ b/chesspp/mcts/baysian_mcts_node.py @@ -0,0 +1,139 @@ +import math +import random +from typing import Self + +import chess + +from chesspp.i_strategy import IStrategy +from chesspp.mcts.i_mcts_node import IMctsNode +from chesspp.util_gaussian import gaussian_ucb1, max_gaussian, min_gaussian + + +class BayesianMctsNode(IMctsNode): + def __init__(self, board: chess.Board, strategy: IStrategy, color: chess.Color, parent: Self | None, + move: chess.Move | None, + random_state: random.Random, inherit_result: int | None = None, depth: int = 0, visits: int = 0): + super().__init__(board, strategy, parent, move, random_state) + self.color = color # Color of the player whose turn it is + self.visits = visits + self.result = inherit_result if inherit_result is not None else 0 + self._set_mu_sigma() + self.depth = depth + + def _create_child(self, move: chess.Move) -> IMctsNode: + copied_board = self.board.copy() + copied_board.push(move) + return BayesianMctsNode(copied_board, self.strategy, not self.color, self, move, self.random_state, self.result, + self.depth + 1) + + def _set_mu_sigma(self) -> None: + self.mu = self.result + self.sigma = 1 + + def _is_new_ucb1_better(self, current, new) -> bool: + if self.color == chess.WHITE: + # maximize ucb1 + return new > current + else: + # minimize ubc1 + return new < current + + def _select_best_child(self) -> IMctsNode: + """ + Returns the child with the *best* ucb1 score. + It chooses the child with maximum ucb1 for WHITE, and with minimum ucb1 for BLACK. + """ + + if self.board.is_game_over(): + return self + + best_child = self.random_state.choice(self.children) + best_ucb1 = gaussian_ucb1(best_child.mu, best_child.sigma, self.visits) + for child in self.children: + # if child has no visits, prioritize this child. + if child.visits == 0: + best_child = child + break + + # save child if it has a *better* score, than our previous best child. + ucb1 = gaussian_ucb1(child.mu, child.sigma, self.visits) + if self._is_new_ucb1_better(best_ucb1, ucb1): + best_ucb1 = ucb1 + best_child = child + + return best_child + + def update_depth(self, depth: int) -> None: + self.depth = depth + for c in self.children: + c.update_depth(depth + 1) + + def select(self) -> IMctsNode: + if len(self.children) == 0 or self.board.is_game_over(): + return self + + return self._select_best_child().select() + + def expand(self) -> IMctsNode: + if self.visits == 0: + return self + + for move in self.legal_moves: + self.children.append(self._create_child(move)) + + return self._select_best_child() + + def rollout(self, rollout_depth: int = 4) -> int: + copied_board = self.board.copy() + steps = self.depth + for i in range(rollout_depth): + if copied_board.is_game_over(): + break + + m = self.strategy.pick_next_move(copied_board) + if m is None: + break + + copied_board.push(m) + steps += 1 + + steps = max(1, steps) + score = int(self.strategy.analyze_board(copied_board) / (math.log2(steps) + 1)) + self.result = score + return score + + def _combine_gaussians(self, mu1: float, sigma1: float, mu2: float, sigma2: float) -> tuple[float, float]: + if self.color == chess.WHITE: + return max_gaussian(mu1, sigma1, mu2, sigma2) + else: + return min_gaussian(mu1, sigma1, mu2, sigma2) + + def backpropagate(self, score: int | None = None) -> None: + self.visits += 1 + + if score is not None: + self.result = score + + if len(self.children) == 0: + # leaf node + self._set_mu_sigma() + else: + # interior node + shuffled_children = self.random_state.sample(self.children, len(self.children)) + mu = shuffled_children[0].mu + sigma = shuffled_children[0].sigma + for c in shuffled_children[1:]: + mu, sigma = self._combine_gaussians(mu, sigma, c.mu, c.sigma) + + # if max_sigma == 0: + # max_sigma = 0.001 + self.mu = mu + self.sigma = sigma + + if self.parent: + self.parent.backpropagate() + + def print(self, indent=0): + print("\t" * indent + f"move={self.move}, visits={self.visits}, mu={self.mu}, sigma={self.sigma}") + for c in self.children: + c.print(indent + 1) diff --git a/chesspp/mcts/i_mcts.py b/chesspp/mcts/i_mcts.py index a3a3856..1231495 100644 --- a/chesspp/mcts/i_mcts.py +++ b/chesspp/mcts/i_mcts.py @@ -1,62 +1,11 @@ -import chess import random from abc import ABC, abstractmethod -from typing import Dict, Self +from typing import Dict + +import chess + from chesspp.i_strategy import IStrategy - - -class IMctsNode(ABC): - def __init__(self, board: chess.Board, strategy: IStrategy, parent: Self | None, move: chess.Move | None, - random_state: random.Random): - self.board = board - self.strategy = strategy - self.parent = parent - self.children = [] - self.move = move - self.legal_moves = list(board.legal_moves) - self.random_state = random_state - self.depth = 0 - - @abstractmethod - def select(self) -> Self: - """ - Selects the next node leaf node in the tree - :return: - """ - pass - - @abstractmethod - def expand(self) -> Self: - """ - Expands this node creating X child leaf nodes, i.e., choose an action and apply it to the board - :return: - """ - pass - - @abstractmethod - def rollout(self, rollout_depth: int = 20) -> int: - """ - Rolls out the node by simulating a game for a given depth. - Sometimes this step is called 'simulation' or 'playout'. - :return: the score of the rolled out game - """ - pass - - @abstractmethod - def backpropagate(self, score: float) -> None: - """ - Backpropagates the results of the rollout - :param score: - :return: - """ - pass - - def update_depth(self, depth: int) -> None: - """ - Recursively updates the depth the current node and all it's children - :param depth: new depth for current node - :return: - """ +from chesspp.mcts.i_mcts_node import IMctsNode class IMcts(ABC): diff --git a/chesspp/mcts/i_mcts_node.py b/chesspp/mcts/i_mcts_node.py new file mode 100644 index 0000000..ffb1f05 --- /dev/null +++ b/chesspp/mcts/i_mcts_node.py @@ -0,0 +1,60 @@ +import random +from abc import ABC, abstractmethod +from typing import Self + +import chess + +from chesspp.i_strategy import IStrategy + +class IMctsNode(ABC): + def __init__(self, board: chess.Board, strategy: IStrategy, parent: Self | None, move: chess.Move | None, + random_state: random.Random): + self.board = board + self.strategy = strategy + self.parent = parent + self.children = [] + self.move = move + self.legal_moves = list(board.legal_moves) + self.random_state = random_state + self.depth = 0 + + @abstractmethod + def select(self) -> Self: + """ + Selects the next node leaf node in the tree + :return: + """ + pass + + @abstractmethod + def expand(self) -> Self: + """ + Expands this node creating X child leaf nodes, i.e., choose an action and apply it to the board + :return: + """ + pass + + @abstractmethod + def rollout(self, rollout_depth: int = 20) -> int: + """ + Rolls out the node by simulating a game for a given depth. + Sometimes this step is called 'simulation' or 'playout'. + :return: the score of the rolled out game + """ + pass + + @abstractmethod + def backpropagate(self, score: float | None = None) -> None: + """ + Backpropagates the results of the rollout + :param score: + :return: + """ + pass + + def update_depth(self, depth: int) -> None: + """ + Recursively updates the depth the current node and all it's children + :param depth: new depth for current node + :return: + """