From d43899ecdae316a7e14089c2e259b19ff610f680 Mon Sep 17 00:00:00 2001 From: luk3k Date: Mon, 29 Jan 2024 17:47:00 +0100 Subject: [PATCH] Implemented strategy evaluation for moves and improved scoring for BayesMcts --- chesspp/baysian_mcts.py | 19 +++++++++++++------ chesspp/classic_mcts.py | 16 +++++++--------- chesspp/engine.py | 22 +++++++++++++++------- chesspp/eval.py | 15 ++++++++++----- chesspp/i_mcts.py | 1 + chesspp/i_strategy.py | 4 ++++ chesspp/random_strategy.py | 4 ++++ chesspp/stockfish_strategy.py | 13 ++++++++++--- chesspp/web.py | 5 ++--- main.py | 4 ++-- web.py | 1 - 11 files changed, 68 insertions(+), 36 deletions(-) diff --git a/chesspp/baysian_mcts.py b/chesspp/baysian_mcts.py index e2edbbd..c58a699 100644 --- a/chesspp/baysian_mcts.py +++ b/chesspp/baysian_mcts.py @@ -1,7 +1,9 @@ +import math + +import torch.distributions as dist from chesspp.i_mcts import * from chesspp.i_strategy import IStrategy from chesspp.util_gaussian import gaussian_ucb1, max_gaussian, min_gaussian -from chesspp.eval import score_manual class BayesianMctsNode(IMctsNode): @@ -61,8 +63,9 @@ class BayesianMctsNode(IMctsNode): def select(self) -> IMctsNode: if len(self.children) == 0: return self - else: + elif not self.board.is_game_over(): return self._select_best_child().select() + return self def expand(self) -> IMctsNode: if self.visits == 0: @@ -87,7 +90,8 @@ class BayesianMctsNode(IMctsNode): copied_board.push(m) steps += 1 - score = score_manual(copied_board) // steps + steps = max(1, steps) + score = int(self.strategy.analyze_board(copied_board) / (math.log2(steps) + 1)) self.result = score return score @@ -138,7 +142,9 @@ class BayesianMcts(IMcts): def sample(self, runs: int = 1000) -> None: for i in range(runs): - # print(f"sample {i}") + if self.board.is_game_over(): + break + leaf_node = self.root.select().expand() _ = leaf_node.rollout() leaf_node.backpropagate() @@ -151,6 +157,7 @@ class BayesianMcts(IMcts): for child in self.get_children(): if child.move == move: self.root = child + child.depth = 0 self.root.parent = None return @@ -160,10 +167,10 @@ class BayesianMcts(IMcts): def get_children(self) -> list[IMctsNode]: return self.root.children - def get_moves(self) -> Dict[chess.Move, int]: + def get_moves(self) -> Dict[chess.Move, dist.Normal]: res = {} for c in self.root.children: - res[c.move] = c.mu + res[c.move] = dist.Normal(c.mu, c.sigma) return res def print(self): diff --git a/chesspp/classic_mcts.py b/chesspp/classic_mcts.py index 6335b34..068fc3c 100644 --- a/chesspp/classic_mcts.py +++ b/chesspp/classic_mcts.py @@ -1,19 +1,17 @@ import chess import random import numpy as np - - -from chesspp import eval -from chesspp import util +from chesspp.i_strategy import IStrategy class ClassicMcts: - def __init__(self, board: chess.Board, color: chess.Color, parent=None, move: chess.Move | None = None, + def __init__(self, board: chess.Board, color: chess.Color, strategy: IStrategy, parent=None, move: chess.Move | None = None, random_state: int | None = None): self.random = random.Random(random_state) self.board = board self.color = color + self.strategy = strategy self.parent = parent self.move = move self.children = [] @@ -31,11 +29,11 @@ class ClassicMcts: self.untried_actions.remove(move) next_board = self.board.copy() next_board.push(move) - child_node = ClassicMcts(next_board, color=self.color, parent=self, move=move) + child_node = ClassicMcts(next_board, color=self.color, strategy=self.strategy, parent=self, move=move) self.children.append(child_node) return child_node - def _rollout(self, rollout_depth: int = 3) -> int: + def _rollout(self, rollout_depth: int = 4) -> int: """ Rolls out the node by simulating a game for a given depth. Sometimes this step is called 'simulation' or 'playout'. @@ -47,11 +45,11 @@ class ClassicMcts: if copied_board.is_game_over(): break - m = util.pick_move(copied_board) + m = self.strategy.pick_next_move(copied_board) copied_board.push(m) steps += 1 - return eval.score_manual(copied_board) // steps + return self.strategy.analyze_board(copied_board) // steps def _backpropagate(self, score: float) -> None: """ diff --git a/chesspp/engine.py b/chesspp/engine.py index 8205435..582adab 100644 --- a/chesspp/engine.py +++ b/chesspp/engine.py @@ -2,12 +2,14 @@ import random import time from abc import ABC, abstractmethod +from torch import distributions as dist import chess import chess.engine from chesspp.baysian_mcts import BayesianMcts from chesspp.classic_mcts import ClassicMcts from chesspp.i_strategy import IStrategy +from typing import Dict class Limit: @@ -95,13 +97,20 @@ class BayesMctsEngine(Engine): if len(board.move_stack) != 0: # apply previous move to mcts --> reuse previous simulation results self.mcts.apply_move(board.peek()) limit.run(lambda: self.mcts.sample(1)) - # limit.run(lambda: mcts_root.build_tree()) - best_move = max(self.mcts.get_moves().items(), key=lambda x: x[1])[0] if board.turn == chess.WHITE else ( - min(self.mcts.get_moves().items(), key=lambda x: x[1])[0]) - print(best_move) + best_move = self.get_best_move(self.mcts.get_moves(), board.turn) self.mcts.apply_move(best_move) return chess.engine.PlayResult(move=best_move, ponder=None) + @staticmethod + def get_best_move(possible_moves: Dict[chess.Move, dist.Normal], color: chess.Color) -> chess.Move: + moves = {} + for m, d in possible_moves.items(): + moves[m] = d.sample() + + return max(moves.items(), key=lambda x: x[1])[0] if color == chess.WHITE else ( + min(moves.items(), key=lambda x: x[1])[0]) + + class ClassicMctsEngine(Engine): def __init__(self, board: chess.Board, color: chess.Color, strategy: IStrategy): @@ -112,9 +121,8 @@ class ClassicMctsEngine(Engine): return "ClassicMctsEngine" def play(self, board: chess.Board, limit: Limit) -> chess.engine.PlayResult: - mcts_root = ClassicMcts(board, self.color) - mcts_root.build_tree() - # limit.run(lambda: mcts_root.build_tree()) + mcts_root = ClassicMcts(board, self.color, self.strategy) + limit.run(lambda: mcts_root.build_tree(1)) best_move = max(mcts_root.children, key=lambda x: x.score).move if board.turn == chess.WHITE else ( min(mcts_root.children, key=lambda x: x.score).move) return chess.engine.PlayResult(move=best_move, ponder=None) diff --git a/chesspp/eval.py b/chesspp/eval.py index 9470f88..375b1f5 100644 --- a/chesspp/eval.py +++ b/chesspp/eval.py @@ -173,16 +173,21 @@ def score_manual(board: chess.Board) -> int: return score -def score_stockfish(board: chess.Board) -> chess.engine.PovScore: +def score_stockfish(board: chess.Board, stockfish: chess.engine.SimpleEngine | None = None) -> int: """ Calculate the score of the given board using stockfish :param board: :return: """ - engine = chess.engine.SimpleEngine.popen_uci("/home/luke/projects/pp-project/chess-engine-pp/stockfish/stockfish-ubuntu-x86-64-avx2") - info = engine.analyse(board, chess.engine.Limit(depth=0)) - engine.quit() - return info["score"] + if stockfish is None: + engine = chess.engine.SimpleEngine.popen_uci( + "/home/luke/projects/pp-project/chess-engine-pp/stockfish/stockfish-ubuntu-x86-64-avx2") + info = engine.analyse(board, chess.engine.Limit(depth=0)) + engine.quit() + return info['score'].white().score(mate_score=100_000) + else: + info = stockfish.analyse(board, chess.engine.Limit(depth=0)) + return info['score'].white().score(mate_score=100_000) def score_lc0(board: chess.Board) -> chess.engine.PovScore: diff --git a/chesspp/i_mcts.py b/chesspp/i_mcts.py index 2088860..1df8085 100644 --- a/chesspp/i_mcts.py +++ b/chesspp/i_mcts.py @@ -15,6 +15,7 @@ class IMctsNode(ABC): self.move = move self.legal_moves = list(board.legal_moves) self.random_state = random_state + self.depth = 0 @abstractmethod def select(self) -> Self: diff --git a/chesspp/i_strategy.py b/chesspp/i_strategy.py index cd7229c..d985b79 100644 --- a/chesspp/i_strategy.py +++ b/chesspp/i_strategy.py @@ -9,3 +9,7 @@ class IStrategy(ABC): @abstractmethod def pick_next_move(self, board: chess.Board) -> chess.Move: pass + + @abstractmethod + def analyze_board(self, board: chess.Board) -> int: + pass diff --git a/chesspp/random_strategy.py b/chesspp/random_strategy.py index 09a0988..74193d7 100644 --- a/chesspp/random_strategy.py +++ b/chesspp/random_strategy.py @@ -1,6 +1,7 @@ import chess import random from chesspp.i_strategy import IStrategy +from chesspp.eval import score_manual class RandomStrategy(IStrategy): @@ -11,3 +12,6 @@ class RandomStrategy(IStrategy): if len(list(board.legal_moves)) == 0: return None return self.random_state.choice(list(board.legal_moves)) + + def analyze_board(self, board: chess.Board) -> int: + return score_manual(board) diff --git a/chesspp/stockfish_strategy.py b/chesspp/stockfish_strategy.py index 82ddaf3..4485987 100644 --- a/chesspp/stockfish_strategy.py +++ b/chesspp/stockfish_strategy.py @@ -1,15 +1,21 @@ import os import chess from chesspp.i_strategy import IStrategy +from chesspp.eval import score_stockfish import chess.engine _DIR = os.path.abspath(os.path.dirname(__file__)) + class StockFishStrategy(IStrategy): def __init__(self): self._stockfish = None + def __del__(self): + if self._stockfish is not None: + self._stockfish.quit() + @property def stockfish(self) -> chess.engine.SimpleEngine: if self._stockfish is None: @@ -22,6 +28,7 @@ class StockFishStrategy(IStrategy): self._stockfish = stockfish def pick_next_move(self, board: chess.Board) -> chess.Move | None: - move = self.stockfish.play(board, chess.engine.Limit(depth=4)).move - print("stockfish picked:", move) - return move + return self.stockfish.play(board, chess.engine.Limit(depth=4)).move + + def analyze_board(self, board: chess.Board) -> int: + return score_stockfish(board, self.stockfish) diff --git a/chesspp/web.py b/chesspp/web.py index a1a5608..9a51745 100644 --- a/chesspp/web.py +++ b/chesspp/web.py @@ -1,6 +1,5 @@ import os import asyncio -import random import aiohttp from aiohttp import web @@ -8,7 +7,6 @@ from aiohttp import web import chess from chesspp import engine from chesspp.stockfish_strategy import StockFishStrategy -from chesspp.random_strategy import RandomStrategy _DIR = os.path.abspath(os.path.dirname(__file__)) _DATA_DIR = os.path.abspath(os.path.join(_DIR, "static_data")) @@ -103,6 +101,7 @@ class WebInterface: ]) web.run_app(app) + def run_sample(): - limit = engine.Limit(time=0.5) + limit = engine.Limit(time=1) WebInterface(engine.BayesMctsEngine, engine.ClassicMctsEngine, limit).run_app() diff --git a/main.py b/main.py index b93bf13..c0d14d9 100644 --- a/main.py +++ b/main.py @@ -83,8 +83,8 @@ def test_evaluation(): a = engine.BayesMctsEngine b = engine.ClassicMctsEngine limit = engine.Limit(time=0.5) - evaluator = simulation.Evaluation(a, StockFishStrategy(), b, RandomStrategy(random.Random()), limit) - results = evaluator.run(2) + evaluator = simulation.Evaluation(a, StockFishStrategy(), b, StockFishStrategy(), limit) + results = evaluator.run(24) a_results = len(list(filter(lambda x: x.winner == simulation.Winner.Engine_A, results))) / len(results) * 100 b_results = len(list(filter(lambda x: x.winner == simulation.Winner.Engine_B, results))) / len(results) * 100 draws = len(list(filter(lambda x: x.winner == simulation.Winner.Draw, results))) / len(results) * 100 diff --git a/web.py b/web.py index 5254a06..f2edfd9 100644 --- a/web.py +++ b/web.py @@ -1,4 +1,3 @@ -import chesspp from chesspp import engine from chesspp import web