From d43899ecdae316a7e14089c2e259b19ff610f680 Mon Sep 17 00:00:00 2001
From: luk3k <luke.kuess@gmail.com>
Date: Mon, 29 Jan 2024 17:47:00 +0100
Subject: [PATCH] Implemented strategy evaluation for moves and improved
 scoring for BayesMcts

---
 chesspp/baysian_mcts.py       | 19 +++++++++++++------
 chesspp/classic_mcts.py       | 16 +++++++---------
 chesspp/engine.py             | 22 +++++++++++++++-------
 chesspp/eval.py               | 15 ++++++++++-----
 chesspp/i_mcts.py             |  1 +
 chesspp/i_strategy.py         |  4 ++++
 chesspp/random_strategy.py    |  4 ++++
 chesspp/stockfish_strategy.py | 13 ++++++++++---
 chesspp/web.py                |  5 ++---
 main.py                       |  4 ++--
 web.py                        |  1 -
 11 files changed, 68 insertions(+), 36 deletions(-)

diff --git a/chesspp/baysian_mcts.py b/chesspp/baysian_mcts.py
index e2edbbd..c58a699 100644
--- a/chesspp/baysian_mcts.py
+++ b/chesspp/baysian_mcts.py
@@ -1,7 +1,9 @@
+import math
+
+import torch.distributions as dist
 from chesspp.i_mcts import *
 from chesspp.i_strategy import IStrategy
 from chesspp.util_gaussian import gaussian_ucb1, max_gaussian, min_gaussian
-from chesspp.eval import score_manual
 
 
 class BayesianMctsNode(IMctsNode):
@@ -61,8 +63,9 @@ class BayesianMctsNode(IMctsNode):
     def select(self) -> IMctsNode:
         if len(self.children) == 0:
             return self
-        else:
+        elif not self.board.is_game_over():
             return self._select_best_child().select()
+        return self
 
     def expand(self) -> IMctsNode:
         if self.visits == 0:
@@ -87,7 +90,8 @@ class BayesianMctsNode(IMctsNode):
             copied_board.push(m)
             steps += 1
 
-        score = score_manual(copied_board) // steps
+        steps = max(1, steps)
+        score = int(self.strategy.analyze_board(copied_board) / (math.log2(steps) + 1))
         self.result = score
         return score
 
@@ -138,7 +142,9 @@ class BayesianMcts(IMcts):
 
     def sample(self, runs: int = 1000) -> None:
         for i in range(runs):
-            # print(f"sample {i}")
+            if self.board.is_game_over():
+                break
+
             leaf_node = self.root.select().expand()
             _ = leaf_node.rollout()
             leaf_node.backpropagate()
@@ -151,6 +157,7 @@ class BayesianMcts(IMcts):
         for child in self.get_children():
             if child.move == move:
                 self.root = child
+                child.depth = 0
                 self.root.parent = None
                 return
 
@@ -160,10 +167,10 @@ class BayesianMcts(IMcts):
     def get_children(self) -> list[IMctsNode]:
         return self.root.children
 
-    def get_moves(self) -> Dict[chess.Move, int]:
+    def get_moves(self) -> Dict[chess.Move, dist.Normal]:
         res = {}
         for c in self.root.children:
-            res[c.move] = c.mu
+            res[c.move] = dist.Normal(c.mu, c.sigma)
         return res
 
     def print(self):
diff --git a/chesspp/classic_mcts.py b/chesspp/classic_mcts.py
index 6335b34..068fc3c 100644
--- a/chesspp/classic_mcts.py
+++ b/chesspp/classic_mcts.py
@@ -1,19 +1,17 @@
 import chess
 import random
 import numpy as np
-
-
-from chesspp import eval
-from chesspp import util
+from chesspp.i_strategy import IStrategy
 
 
 class ClassicMcts:
 
-    def __init__(self, board: chess.Board, color: chess.Color, parent=None, move: chess.Move | None = None,
+    def __init__(self, board: chess.Board, color: chess.Color, strategy: IStrategy, parent=None, move: chess.Move | None = None,
                  random_state: int | None = None):
         self.random = random.Random(random_state)
         self.board = board
         self.color = color
+        self.strategy = strategy
         self.parent = parent
         self.move = move
         self.children = []
@@ -31,11 +29,11 @@ class ClassicMcts:
         self.untried_actions.remove(move)
         next_board = self.board.copy()
         next_board.push(move)
-        child_node = ClassicMcts(next_board, color=self.color, parent=self, move=move)
+        child_node = ClassicMcts(next_board, color=self.color, strategy=self.strategy, parent=self, move=move)
         self.children.append(child_node)
         return child_node
 
-    def _rollout(self, rollout_depth: int = 3) -> int:
+    def _rollout(self, rollout_depth: int = 4) -> int:
         """
         Rolls out the node by simulating a game for a given depth.
         Sometimes this step is called 'simulation' or 'playout'.
@@ -47,11 +45,11 @@ class ClassicMcts:
             if copied_board.is_game_over():
                 break
 
-            m = util.pick_move(copied_board)
+            m = self.strategy.pick_next_move(copied_board)
             copied_board.push(m)
             steps += 1
 
-        return eval.score_manual(copied_board) // steps
+        return self.strategy.analyze_board(copied_board) // steps
 
     def _backpropagate(self, score: float) -> None:
         """
diff --git a/chesspp/engine.py b/chesspp/engine.py
index 8205435..582adab 100644
--- a/chesspp/engine.py
+++ b/chesspp/engine.py
@@ -2,12 +2,14 @@ import random
 import time
 from abc import ABC, abstractmethod
 
+from torch import distributions as dist
 import chess
 import chess.engine
 
 from chesspp.baysian_mcts import BayesianMcts
 from chesspp.classic_mcts import ClassicMcts
 from chesspp.i_strategy import IStrategy
+from typing import Dict
 
 
 class Limit:
@@ -95,13 +97,20 @@ class BayesMctsEngine(Engine):
         if len(board.move_stack) != 0:  # apply previous move to mcts --> reuse previous simulation results
             self.mcts.apply_move(board.peek())
         limit.run(lambda: self.mcts.sample(1))
-        # limit.run(lambda: mcts_root.build_tree())
-        best_move = max(self.mcts.get_moves().items(), key=lambda x: x[1])[0] if board.turn == chess.WHITE else (
-            min(self.mcts.get_moves().items(), key=lambda x: x[1])[0])
-        print(best_move)
+        best_move = self.get_best_move(self.mcts.get_moves(), board.turn)
         self.mcts.apply_move(best_move)
         return chess.engine.PlayResult(move=best_move, ponder=None)
 
+    @staticmethod
+    def get_best_move(possible_moves: Dict[chess.Move, dist.Normal], color: chess.Color) -> chess.Move:
+        moves = {}
+        for m, d in possible_moves.items():
+            moves[m] = d.sample()
+
+        return max(moves.items(), key=lambda x: x[1])[0] if color == chess.WHITE else (
+            min(moves.items(), key=lambda x: x[1])[0])
+
+
 
 class ClassicMctsEngine(Engine):
     def __init__(self, board: chess.Board, color: chess.Color, strategy: IStrategy):
@@ -112,9 +121,8 @@ class ClassicMctsEngine(Engine):
         return "ClassicMctsEngine"
 
     def play(self, board: chess.Board, limit: Limit) -> chess.engine.PlayResult:
-        mcts_root = ClassicMcts(board, self.color)
-        mcts_root.build_tree()
-        # limit.run(lambda: mcts_root.build_tree())
+        mcts_root = ClassicMcts(board, self.color, self.strategy)
+        limit.run(lambda: mcts_root.build_tree(1))
         best_move = max(mcts_root.children, key=lambda x: x.score).move if board.turn == chess.WHITE else (
             min(mcts_root.children, key=lambda x: x.score).move)
         return chess.engine.PlayResult(move=best_move, ponder=None)
diff --git a/chesspp/eval.py b/chesspp/eval.py
index 9470f88..375b1f5 100644
--- a/chesspp/eval.py
+++ b/chesspp/eval.py
@@ -173,16 +173,21 @@ def score_manual(board: chess.Board) -> int:
     return score
 
 
-def score_stockfish(board: chess.Board) -> chess.engine.PovScore:
+def score_stockfish(board: chess.Board, stockfish: chess.engine.SimpleEngine | None = None) -> int:
     """
     Calculate the score of the given board using stockfish
     :param board:
     :return:
     """
-    engine = chess.engine.SimpleEngine.popen_uci("/home/luke/projects/pp-project/chess-engine-pp/stockfish/stockfish-ubuntu-x86-64-avx2")
-    info = engine.analyse(board, chess.engine.Limit(depth=0))
-    engine.quit()
-    return info["score"]
+    if stockfish is None:
+        engine = chess.engine.SimpleEngine.popen_uci(
+            "/home/luke/projects/pp-project/chess-engine-pp/stockfish/stockfish-ubuntu-x86-64-avx2")
+        info = engine.analyse(board, chess.engine.Limit(depth=0))
+        engine.quit()
+        return info['score'].white().score(mate_score=100_000)
+    else:
+        info = stockfish.analyse(board, chess.engine.Limit(depth=0))
+        return info['score'].white().score(mate_score=100_000)
 
 
 def score_lc0(board: chess.Board) -> chess.engine.PovScore:
diff --git a/chesspp/i_mcts.py b/chesspp/i_mcts.py
index 2088860..1df8085 100644
--- a/chesspp/i_mcts.py
+++ b/chesspp/i_mcts.py
@@ -15,6 +15,7 @@ class IMctsNode(ABC):
         self.move = move
         self.legal_moves = list(board.legal_moves)
         self.random_state = random_state
+        self.depth = 0
 
     @abstractmethod
     def select(self) -> Self:
diff --git a/chesspp/i_strategy.py b/chesspp/i_strategy.py
index cd7229c..d985b79 100644
--- a/chesspp/i_strategy.py
+++ b/chesspp/i_strategy.py
@@ -9,3 +9,7 @@ class IStrategy(ABC):
     @abstractmethod
     def pick_next_move(self, board: chess.Board) -> chess.Move:
         pass
+
+    @abstractmethod
+    def analyze_board(self, board: chess.Board) -> int:
+        pass
diff --git a/chesspp/random_strategy.py b/chesspp/random_strategy.py
index 09a0988..74193d7 100644
--- a/chesspp/random_strategy.py
+++ b/chesspp/random_strategy.py
@@ -1,6 +1,7 @@
 import chess
 import random
 from chesspp.i_strategy import IStrategy
+from chesspp.eval import score_manual
 
 
 class RandomStrategy(IStrategy):
@@ -11,3 +12,6 @@ class RandomStrategy(IStrategy):
         if len(list(board.legal_moves)) == 0:
             return None
         return self.random_state.choice(list(board.legal_moves))
+
+    def analyze_board(self, board: chess.Board) -> int:
+        return score_manual(board)
diff --git a/chesspp/stockfish_strategy.py b/chesspp/stockfish_strategy.py
index 82ddaf3..4485987 100644
--- a/chesspp/stockfish_strategy.py
+++ b/chesspp/stockfish_strategy.py
@@ -1,15 +1,21 @@
 import os
 import chess
 from chesspp.i_strategy import IStrategy
+from chesspp.eval import score_stockfish
 import chess.engine
 
 _DIR = os.path.abspath(os.path.dirname(__file__))
 
+
 class StockFishStrategy(IStrategy):
 
     def __init__(self):
         self._stockfish = None
 
+    def __del__(self):
+        if self._stockfish is not None:
+            self._stockfish.quit()
+
     @property
     def stockfish(self) -> chess.engine.SimpleEngine:
         if self._stockfish is None:
@@ -22,6 +28,7 @@ class StockFishStrategy(IStrategy):
         self._stockfish = stockfish
 
     def pick_next_move(self, board: chess.Board) -> chess.Move | None:
-        move = self.stockfish.play(board, chess.engine.Limit(depth=4)).move
-        print("stockfish picked:", move)
-        return move
+        return self.stockfish.play(board, chess.engine.Limit(depth=4)).move
+
+    def analyze_board(self, board: chess.Board) -> int:
+        return score_stockfish(board, self.stockfish)
diff --git a/chesspp/web.py b/chesspp/web.py
index a1a5608..9a51745 100644
--- a/chesspp/web.py
+++ b/chesspp/web.py
@@ -1,6 +1,5 @@
 import os
 import asyncio
-import random
 
 import aiohttp
 from aiohttp import web
@@ -8,7 +7,6 @@ from aiohttp import web
 import chess
 from chesspp import engine
 from chesspp.stockfish_strategy import StockFishStrategy
-from chesspp.random_strategy import RandomStrategy
 
 _DIR = os.path.abspath(os.path.dirname(__file__))
 _DATA_DIR = os.path.abspath(os.path.join(_DIR, "static_data"))
@@ -103,6 +101,7 @@ class WebInterface:
         ])
         web.run_app(app)
 
+
 def run_sample():
-    limit = engine.Limit(time=0.5)
+    limit = engine.Limit(time=1)
     WebInterface(engine.BayesMctsEngine, engine.ClassicMctsEngine, limit).run_app()
diff --git a/main.py b/main.py
index b93bf13..c0d14d9 100644
--- a/main.py
+++ b/main.py
@@ -83,8 +83,8 @@ def test_evaluation():
     a = engine.BayesMctsEngine
     b = engine.ClassicMctsEngine
     limit = engine.Limit(time=0.5)
-    evaluator = simulation.Evaluation(a, StockFishStrategy(), b, RandomStrategy(random.Random()), limit)
-    results = evaluator.run(2)
+    evaluator = simulation.Evaluation(a, StockFishStrategy(), b, StockFishStrategy(), limit)
+    results = evaluator.run(24)
     a_results = len(list(filter(lambda x: x.winner == simulation.Winner.Engine_A, results))) / len(results) * 100
     b_results = len(list(filter(lambda x: x.winner == simulation.Winner.Engine_B, results))) / len(results) * 100
     draws = len(list(filter(lambda x: x.winner == simulation.Winner.Draw, results))) / len(results) * 100
diff --git a/web.py b/web.py
index 5254a06..f2edfd9 100644
--- a/web.py
+++ b/web.py
@@ -1,4 +1,3 @@
-import chesspp
 from chesspp import engine
 from chesspp import web