Implemented strategy evaluation for moves and improved scoring for BayesMcts

2024-01-29 17:47:00 +01:00
parent c5536e08de
commit d43899ecda
11 changed files with 68 additions and 36 deletions
--- a/chesspp/baysian_mcts.py
+++ b/chesspp/baysian_mcts.py
@@ -1,7 +1,9 @@
+import math
+
+import torch.distributions as dist
 from chesspp.i_mcts import *
 from chesspp.i_strategy import IStrategy
 from chesspp.util_gaussian import gaussian_ucb1, max_gaussian, min_gaussian
-from chesspp.eval import score_manual


 class BayesianMctsNode(IMctsNode):
@@ -61,8 +63,9 @@ class BayesianMctsNode(IMctsNode):
    def select(self) -> IMctsNode:
        if len(self.children) == 0:
            return self
-        else:
+        elif not self.board.is_game_over():
            return self._select_best_child().select()
+        return self

    def expand(self) -> IMctsNode:
        if self.visits == 0:
@@ -87,7 +90,8 @@ class BayesianMctsNode(IMctsNode):
            copied_board.push(m)
            steps += 1

-        score = score_manual(copied_board) // steps
+        steps = max(1, steps)
+        score = int(self.strategy.analyze_board(copied_board) / (math.log2(steps) + 1))
        self.result = score
        return score

@@ -138,7 +142,9 @@ class BayesianMcts(IMcts):

    def sample(self, runs: int = 1000) -> None:
        for i in range(runs):
-            # print(f"sample {i}")
+            if self.board.is_game_over():
+                break
+
            leaf_node = self.root.select().expand()
            _ = leaf_node.rollout()
            leaf_node.backpropagate()
@@ -151,6 +157,7 @@ class BayesianMcts(IMcts):
        for child in self.get_children():
            if child.move == move:
                self.root = child
+                child.depth = 0
                self.root.parent = None
                return

@@ -160,10 +167,10 @@ class BayesianMcts(IMcts):
    def get_children(self) -> list[IMctsNode]:
        return self.root.children

-    def get_moves(self) -> Dict[chess.Move, int]:
+    def get_moves(self) -> Dict[chess.Move, dist.Normal]:
        res = {}
        for c in self.root.children:
-            res[c.move] = c.mu
+            res[c.move] = dist.Normal(c.mu, c.sigma)
        return res

    def print(self):
--- a/chesspp/classic_mcts.py
+++ b/chesspp/classic_mcts.py
@@ -1,19 +1,17 @@
 import chess
 import random
 import numpy as np
-
-
-from chesspp import eval
-from chesspp import util
+from chesspp.i_strategy import IStrategy


 class ClassicMcts:

-    def __init__(self, board: chess.Board, color: chess.Color, parent=None, move: chess.Move | None = None,
+    def __init__(self, board: chess.Board, color: chess.Color, strategy: IStrategy, parent=None, move: chess.Move | None = None,
                 random_state: int | None = None):
        self.random = random.Random(random_state)
        self.board = board
        self.color = color
+        self.strategy = strategy
        self.parent = parent
        self.move = move
        self.children = []
@@ -31,11 +29,11 @@ class ClassicMcts:
        self.untried_actions.remove(move)
        next_board = self.board.copy()
        next_board.push(move)
-        child_node = ClassicMcts(next_board, color=self.color, parent=self, move=move)
+        child_node = ClassicMcts(next_board, color=self.color, strategy=self.strategy, parent=self, move=move)
        self.children.append(child_node)
        return child_node

-    def _rollout(self, rollout_depth: int = 3) -> int:
+    def _rollout(self, rollout_depth: int = 4) -> int:
        """
        Rolls out the node by simulating a game for a given depth.
        Sometimes this step is called 'simulation' or 'playout'.
@@ -47,11 +45,11 @@ class ClassicMcts:
            if copied_board.is_game_over():
                break

-            m = util.pick_move(copied_board)
+            m = self.strategy.pick_next_move(copied_board)
            copied_board.push(m)
            steps += 1

-        return eval.score_manual(copied_board) // steps
+        return self.strategy.analyze_board(copied_board) // steps

    def _backpropagate(self, score: float) -> None:
        """
--- a/chesspp/engine.py
+++ b/chesspp/engine.py
@@ -2,12 +2,14 @@ import random
 import time
 from abc import ABC, abstractmethod

+from torch import distributions as dist
 import chess
 import chess.engine

 from chesspp.baysian_mcts import BayesianMcts
 from chesspp.classic_mcts import ClassicMcts
 from chesspp.i_strategy import IStrategy
+from typing import Dict


 class Limit:
@@ -95,13 +97,20 @@ class BayesMctsEngine(Engine):
        if len(board.move_stack) != 0:  # apply previous move to mcts --> reuse previous simulation results
            self.mcts.apply_move(board.peek())
        limit.run(lambda: self.mcts.sample(1))
-        # limit.run(lambda: mcts_root.build_tree())
-        best_move = max(self.mcts.get_moves().items(), key=lambda x: x[1])[0] if board.turn == chess.WHITE else (
-            min(self.mcts.get_moves().items(), key=lambda x: x[1])[0])
-        print(best_move)
+        best_move = self.get_best_move(self.mcts.get_moves(), board.turn)
        self.mcts.apply_move(best_move)
        return chess.engine.PlayResult(move=best_move, ponder=None)

+    @staticmethod
+    def get_best_move(possible_moves: Dict[chess.Move, dist.Normal], color: chess.Color) -> chess.Move:
+        moves = {}
+        for m, d in possible_moves.items():
+            moves[m] = d.sample()
+
+        return max(moves.items(), key=lambda x: x[1])[0] if color == chess.WHITE else (
+            min(moves.items(), key=lambda x: x[1])[0])
+
+

 class ClassicMctsEngine(Engine):
    def __init__(self, board: chess.Board, color: chess.Color, strategy: IStrategy):
@@ -112,9 +121,8 @@ class ClassicMctsEngine(Engine):
        return "ClassicMctsEngine"

    def play(self, board: chess.Board, limit: Limit) -> chess.engine.PlayResult:
-        mcts_root = ClassicMcts(board, self.color)
-        mcts_root.build_tree()
-        # limit.run(lambda: mcts_root.build_tree())
+        mcts_root = ClassicMcts(board, self.color, self.strategy)
+        limit.run(lambda: mcts_root.build_tree(1))
        best_move = max(mcts_root.children, key=lambda x: x.score).move if board.turn == chess.WHITE else (
            min(mcts_root.children, key=lambda x: x.score).move)
        return chess.engine.PlayResult(move=best_move, ponder=None)
--- a/chesspp/eval.py
+++ b/chesspp/eval.py
@@ -173,16 +173,21 @@ def score_manual(board: chess.Board) -> int:
    return score


-def score_stockfish(board: chess.Board) -> chess.engine.PovScore:
+def score_stockfish(board: chess.Board, stockfish: chess.engine.SimpleEngine | None = None) -> int:
    """
    Calculate the score of the given board using stockfish
    :param board:
    :return:
    """
-    engine = chess.engine.SimpleEngine.popen_uci("/home/luke/projects/pp-project/chess-engine-pp/stockfish/stockfish-ubuntu-x86-64-avx2")
+    if stockfish is None:
+        engine = chess.engine.SimpleEngine.popen_uci(
+            "/home/luke/projects/pp-project/chess-engine-pp/stockfish/stockfish-ubuntu-x86-64-avx2")
        info = engine.analyse(board, chess.engine.Limit(depth=0))
        engine.quit()
-    return info["score"]
+        return info['score'].white().score(mate_score=100_000)
+    else:
+        info = stockfish.analyse(board, chess.engine.Limit(depth=0))
+        return info['score'].white().score(mate_score=100_000)


 def score_lc0(board: chess.Board) -> chess.engine.PovScore:
--- a/chesspp/i_mcts.py
+++ b/chesspp/i_mcts.py
@@ -15,6 +15,7 @@ class IMctsNode(ABC):
        self.move = move
        self.legal_moves = list(board.legal_moves)
        self.random_state = random_state
+        self.depth = 0

    @abstractmethod
    def select(self) -> Self:
--- a/chesspp/i_strategy.py
+++ b/chesspp/i_strategy.py
@@ -9,3 +9,7 @@ class IStrategy(ABC):
    @abstractmethod
    def pick_next_move(self, board: chess.Board) -> chess.Move:
        pass
+
+    @abstractmethod
+    def analyze_board(self, board: chess.Board) -> int:
+        pass
--- a/chesspp/random_strategy.py
+++ b/chesspp/random_strategy.py
@@ -1,6 +1,7 @@
 import chess
 import random
 from chesspp.i_strategy import IStrategy
+from chesspp.eval import score_manual


 class RandomStrategy(IStrategy):
@@ -11,3 +12,6 @@ class RandomStrategy(IStrategy):
        if len(list(board.legal_moves)) == 0:
            return None
        return self.random_state.choice(list(board.legal_moves))
+
+    def analyze_board(self, board: chess.Board) -> int:
+        return score_manual(board)
--- a/chesspp/stockfish_strategy.py
+++ b/chesspp/stockfish_strategy.py
@@ -1,15 +1,21 @@
 import os
 import chess
 from chesspp.i_strategy import IStrategy
+from chesspp.eval import score_stockfish
 import chess.engine

 _DIR = os.path.abspath(os.path.dirname(__file__))

+
 class StockFishStrategy(IStrategy):

    def __init__(self):
        self._stockfish = None

+    def __del__(self):
+        if self._stockfish is not None:
+            self._stockfish.quit()
+
    @property
    def stockfish(self) -> chess.engine.SimpleEngine:
        if self._stockfish is None:
@@ -22,6 +28,7 @@ class StockFishStrategy(IStrategy):
        self._stockfish = stockfish

    def pick_next_move(self, board: chess.Board) -> chess.Move | None:
-        move = self.stockfish.play(board, chess.engine.Limit(depth=4)).move
-        print("stockfish picked:", move)
-        return move
+        return self.stockfish.play(board, chess.engine.Limit(depth=4)).move
+
+    def analyze_board(self, board: chess.Board) -> int:
+        return score_stockfish(board, self.stockfish)
--- a/chesspp/web.py
+++ b/chesspp/web.py
@@ -1,6 +1,5 @@
 import os
 import asyncio
-import random

 import aiohttp
 from aiohttp import web
@@ -8,7 +7,6 @@ from aiohttp import web
 import chess
 from chesspp import engine
 from chesspp.stockfish_strategy import StockFishStrategy
-from chesspp.random_strategy import RandomStrategy

 _DIR = os.path.abspath(os.path.dirname(__file__))
 _DATA_DIR = os.path.abspath(os.path.join(_DIR, "static_data"))
@@ -103,6 +101,7 @@ class WebInterface:
        ])
        web.run_app(app)

+
 def run_sample():
-    limit = engine.Limit(time=0.5)
+    limit = engine.Limit(time=1)
    WebInterface(engine.BayesMctsEngine, engine.ClassicMctsEngine, limit).run_app()
--- a/main.py
+++ b/main.py
@@ -83,8 +83,8 @@ def test_evaluation():
    a = engine.BayesMctsEngine
    b = engine.ClassicMctsEngine
    limit = engine.Limit(time=0.5)
-    evaluator = simulation.Evaluation(a, StockFishStrategy(), b, RandomStrategy(random.Random()), limit)
-    results = evaluator.run(2)
+    evaluator = simulation.Evaluation(a, StockFishStrategy(), b, StockFishStrategy(), limit)
+    results = evaluator.run(24)
    a_results = len(list(filter(lambda x: x.winner == simulation.Winner.Engine_A, results))) / len(results) * 100
    b_results = len(list(filter(lambda x: x.winner == simulation.Winner.Engine_B, results))) / len(results) * 100
    draws = len(list(filter(lambda x: x.winner == simulation.Winner.Draw, results))) / len(results) * 100
--- a/web.py
+++ b/web.py
@@ -1,4 +1,3 @@
-import chesspp
 from chesspp import engine
 from chesspp import web