Implemented strategy evaluation for moves and improved scoring for BayesMcts

This commit is contained in:
2024-01-29 17:47:00 +01:00
parent c5536e08de
commit d43899ecda
11 changed files with 68 additions and 36 deletions

View File

@@ -1,7 +1,9 @@
import math
import torch.distributions as dist
from chesspp.i_mcts import * from chesspp.i_mcts import *
from chesspp.i_strategy import IStrategy from chesspp.i_strategy import IStrategy
from chesspp.util_gaussian import gaussian_ucb1, max_gaussian, min_gaussian from chesspp.util_gaussian import gaussian_ucb1, max_gaussian, min_gaussian
from chesspp.eval import score_manual
class BayesianMctsNode(IMctsNode): class BayesianMctsNode(IMctsNode):
@@ -61,8 +63,9 @@ class BayesianMctsNode(IMctsNode):
def select(self) -> IMctsNode: def select(self) -> IMctsNode:
if len(self.children) == 0: if len(self.children) == 0:
return self return self
else: elif not self.board.is_game_over():
return self._select_best_child().select() return self._select_best_child().select()
return self
def expand(self) -> IMctsNode: def expand(self) -> IMctsNode:
if self.visits == 0: if self.visits == 0:
@@ -87,7 +90,8 @@ class BayesianMctsNode(IMctsNode):
copied_board.push(m) copied_board.push(m)
steps += 1 steps += 1
score = score_manual(copied_board) // steps steps = max(1, steps)
score = int(self.strategy.analyze_board(copied_board) / (math.log2(steps) + 1))
self.result = score self.result = score
return score return score
@@ -138,7 +142,9 @@ class BayesianMcts(IMcts):
def sample(self, runs: int = 1000) -> None: def sample(self, runs: int = 1000) -> None:
for i in range(runs): for i in range(runs):
# print(f"sample {i}") if self.board.is_game_over():
break
leaf_node = self.root.select().expand() leaf_node = self.root.select().expand()
_ = leaf_node.rollout() _ = leaf_node.rollout()
leaf_node.backpropagate() leaf_node.backpropagate()
@@ -151,6 +157,7 @@ class BayesianMcts(IMcts):
for child in self.get_children(): for child in self.get_children():
if child.move == move: if child.move == move:
self.root = child self.root = child
child.depth = 0
self.root.parent = None self.root.parent = None
return return
@@ -160,10 +167,10 @@ class BayesianMcts(IMcts):
def get_children(self) -> list[IMctsNode]: def get_children(self) -> list[IMctsNode]:
return self.root.children return self.root.children
def get_moves(self) -> Dict[chess.Move, int]: def get_moves(self) -> Dict[chess.Move, dist.Normal]:
res = {} res = {}
for c in self.root.children: for c in self.root.children:
res[c.move] = c.mu res[c.move] = dist.Normal(c.mu, c.sigma)
return res return res
def print(self): def print(self):

View File

@@ -1,19 +1,17 @@
import chess import chess
import random import random
import numpy as np import numpy as np
from chesspp.i_strategy import IStrategy
from chesspp import eval
from chesspp import util
class ClassicMcts: class ClassicMcts:
def __init__(self, board: chess.Board, color: chess.Color, parent=None, move: chess.Move | None = None, def __init__(self, board: chess.Board, color: chess.Color, strategy: IStrategy, parent=None, move: chess.Move | None = None,
random_state: int | None = None): random_state: int | None = None):
self.random = random.Random(random_state) self.random = random.Random(random_state)
self.board = board self.board = board
self.color = color self.color = color
self.strategy = strategy
self.parent = parent self.parent = parent
self.move = move self.move = move
self.children = [] self.children = []
@@ -31,11 +29,11 @@ class ClassicMcts:
self.untried_actions.remove(move) self.untried_actions.remove(move)
next_board = self.board.copy() next_board = self.board.copy()
next_board.push(move) next_board.push(move)
child_node = ClassicMcts(next_board, color=self.color, parent=self, move=move) child_node = ClassicMcts(next_board, color=self.color, strategy=self.strategy, parent=self, move=move)
self.children.append(child_node) self.children.append(child_node)
return child_node return child_node
def _rollout(self, rollout_depth: int = 3) -> int: def _rollout(self, rollout_depth: int = 4) -> int:
""" """
Rolls out the node by simulating a game for a given depth. Rolls out the node by simulating a game for a given depth.
Sometimes this step is called 'simulation' or 'playout'. Sometimes this step is called 'simulation' or 'playout'.
@@ -47,11 +45,11 @@ class ClassicMcts:
if copied_board.is_game_over(): if copied_board.is_game_over():
break break
m = util.pick_move(copied_board) m = self.strategy.pick_next_move(copied_board)
copied_board.push(m) copied_board.push(m)
steps += 1 steps += 1
return eval.score_manual(copied_board) // steps return self.strategy.analyze_board(copied_board) // steps
def _backpropagate(self, score: float) -> None: def _backpropagate(self, score: float) -> None:
""" """

View File

@@ -2,12 +2,14 @@ import random
import time import time
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from torch import distributions as dist
import chess import chess
import chess.engine import chess.engine
from chesspp.baysian_mcts import BayesianMcts from chesspp.baysian_mcts import BayesianMcts
from chesspp.classic_mcts import ClassicMcts from chesspp.classic_mcts import ClassicMcts
from chesspp.i_strategy import IStrategy from chesspp.i_strategy import IStrategy
from typing import Dict
class Limit: class Limit:
@@ -95,13 +97,20 @@ class BayesMctsEngine(Engine):
if len(board.move_stack) != 0: # apply previous move to mcts --> reuse previous simulation results if len(board.move_stack) != 0: # apply previous move to mcts --> reuse previous simulation results
self.mcts.apply_move(board.peek()) self.mcts.apply_move(board.peek())
limit.run(lambda: self.mcts.sample(1)) limit.run(lambda: self.mcts.sample(1))
# limit.run(lambda: mcts_root.build_tree()) best_move = self.get_best_move(self.mcts.get_moves(), board.turn)
best_move = max(self.mcts.get_moves().items(), key=lambda x: x[1])[0] if board.turn == chess.WHITE else (
min(self.mcts.get_moves().items(), key=lambda x: x[1])[0])
print(best_move)
self.mcts.apply_move(best_move) self.mcts.apply_move(best_move)
return chess.engine.PlayResult(move=best_move, ponder=None) return chess.engine.PlayResult(move=best_move, ponder=None)
@staticmethod
def get_best_move(possible_moves: Dict[chess.Move, dist.Normal], color: chess.Color) -> chess.Move:
moves = {}
for m, d in possible_moves.items():
moves[m] = d.sample()
return max(moves.items(), key=lambda x: x[1])[0] if color == chess.WHITE else (
min(moves.items(), key=lambda x: x[1])[0])
class ClassicMctsEngine(Engine): class ClassicMctsEngine(Engine):
def __init__(self, board: chess.Board, color: chess.Color, strategy: IStrategy): def __init__(self, board: chess.Board, color: chess.Color, strategy: IStrategy):
@@ -112,9 +121,8 @@ class ClassicMctsEngine(Engine):
return "ClassicMctsEngine" return "ClassicMctsEngine"
def play(self, board: chess.Board, limit: Limit) -> chess.engine.PlayResult: def play(self, board: chess.Board, limit: Limit) -> chess.engine.PlayResult:
mcts_root = ClassicMcts(board, self.color) mcts_root = ClassicMcts(board, self.color, self.strategy)
mcts_root.build_tree() limit.run(lambda: mcts_root.build_tree(1))
# limit.run(lambda: mcts_root.build_tree())
best_move = max(mcts_root.children, key=lambda x: x.score).move if board.turn == chess.WHITE else ( best_move = max(mcts_root.children, key=lambda x: x.score).move if board.turn == chess.WHITE else (
min(mcts_root.children, key=lambda x: x.score).move) min(mcts_root.children, key=lambda x: x.score).move)
return chess.engine.PlayResult(move=best_move, ponder=None) return chess.engine.PlayResult(move=best_move, ponder=None)

View File

@@ -173,16 +173,21 @@ def score_manual(board: chess.Board) -> int:
return score return score
def score_stockfish(board: chess.Board) -> chess.engine.PovScore: def score_stockfish(board: chess.Board, stockfish: chess.engine.SimpleEngine | None = None) -> int:
""" """
Calculate the score of the given board using stockfish Calculate the score of the given board using stockfish
:param board: :param board:
:return: :return:
""" """
engine = chess.engine.SimpleEngine.popen_uci("/home/luke/projects/pp-project/chess-engine-pp/stockfish/stockfish-ubuntu-x86-64-avx2") if stockfish is None:
engine = chess.engine.SimpleEngine.popen_uci(
"/home/luke/projects/pp-project/chess-engine-pp/stockfish/stockfish-ubuntu-x86-64-avx2")
info = engine.analyse(board, chess.engine.Limit(depth=0)) info = engine.analyse(board, chess.engine.Limit(depth=0))
engine.quit() engine.quit()
return info["score"] return info['score'].white().score(mate_score=100_000)
else:
info = stockfish.analyse(board, chess.engine.Limit(depth=0))
return info['score'].white().score(mate_score=100_000)
def score_lc0(board: chess.Board) -> chess.engine.PovScore: def score_lc0(board: chess.Board) -> chess.engine.PovScore:

View File

@@ -15,6 +15,7 @@ class IMctsNode(ABC):
self.move = move self.move = move
self.legal_moves = list(board.legal_moves) self.legal_moves = list(board.legal_moves)
self.random_state = random_state self.random_state = random_state
self.depth = 0
@abstractmethod @abstractmethod
def select(self) -> Self: def select(self) -> Self:

View File

@@ -9,3 +9,7 @@ class IStrategy(ABC):
@abstractmethod @abstractmethod
def pick_next_move(self, board: chess.Board) -> chess.Move: def pick_next_move(self, board: chess.Board) -> chess.Move:
pass pass
@abstractmethod
def analyze_board(self, board: chess.Board) -> int:
pass

View File

@@ -1,6 +1,7 @@
import chess import chess
import random import random
from chesspp.i_strategy import IStrategy from chesspp.i_strategy import IStrategy
from chesspp.eval import score_manual
class RandomStrategy(IStrategy): class RandomStrategy(IStrategy):
@@ -11,3 +12,6 @@ class RandomStrategy(IStrategy):
if len(list(board.legal_moves)) == 0: if len(list(board.legal_moves)) == 0:
return None return None
return self.random_state.choice(list(board.legal_moves)) return self.random_state.choice(list(board.legal_moves))
def analyze_board(self, board: chess.Board) -> int:
return score_manual(board)

View File

@@ -1,15 +1,21 @@
import os import os
import chess import chess
from chesspp.i_strategy import IStrategy from chesspp.i_strategy import IStrategy
from chesspp.eval import score_stockfish
import chess.engine import chess.engine
_DIR = os.path.abspath(os.path.dirname(__file__)) _DIR = os.path.abspath(os.path.dirname(__file__))
class StockFishStrategy(IStrategy): class StockFishStrategy(IStrategy):
def __init__(self): def __init__(self):
self._stockfish = None self._stockfish = None
def __del__(self):
if self._stockfish is not None:
self._stockfish.quit()
@property @property
def stockfish(self) -> chess.engine.SimpleEngine: def stockfish(self) -> chess.engine.SimpleEngine:
if self._stockfish is None: if self._stockfish is None:
@@ -22,6 +28,7 @@ class StockFishStrategy(IStrategy):
self._stockfish = stockfish self._stockfish = stockfish
def pick_next_move(self, board: chess.Board) -> chess.Move | None: def pick_next_move(self, board: chess.Board) -> chess.Move | None:
move = self.stockfish.play(board, chess.engine.Limit(depth=4)).move return self.stockfish.play(board, chess.engine.Limit(depth=4)).move
print("stockfish picked:", move)
return move def analyze_board(self, board: chess.Board) -> int:
return score_stockfish(board, self.stockfish)

View File

@@ -1,6 +1,5 @@
import os import os
import asyncio import asyncio
import random
import aiohttp import aiohttp
from aiohttp import web from aiohttp import web
@@ -8,7 +7,6 @@ from aiohttp import web
import chess import chess
from chesspp import engine from chesspp import engine
from chesspp.stockfish_strategy import StockFishStrategy from chesspp.stockfish_strategy import StockFishStrategy
from chesspp.random_strategy import RandomStrategy
_DIR = os.path.abspath(os.path.dirname(__file__)) _DIR = os.path.abspath(os.path.dirname(__file__))
_DATA_DIR = os.path.abspath(os.path.join(_DIR, "static_data")) _DATA_DIR = os.path.abspath(os.path.join(_DIR, "static_data"))
@@ -103,6 +101,7 @@ class WebInterface:
]) ])
web.run_app(app) web.run_app(app)
def run_sample(): def run_sample():
limit = engine.Limit(time=0.5) limit = engine.Limit(time=1)
WebInterface(engine.BayesMctsEngine, engine.ClassicMctsEngine, limit).run_app() WebInterface(engine.BayesMctsEngine, engine.ClassicMctsEngine, limit).run_app()

View File

@@ -83,8 +83,8 @@ def test_evaluation():
a = engine.BayesMctsEngine a = engine.BayesMctsEngine
b = engine.ClassicMctsEngine b = engine.ClassicMctsEngine
limit = engine.Limit(time=0.5) limit = engine.Limit(time=0.5)
evaluator = simulation.Evaluation(a, StockFishStrategy(), b, RandomStrategy(random.Random()), limit) evaluator = simulation.Evaluation(a, StockFishStrategy(), b, StockFishStrategy(), limit)
results = evaluator.run(2) results = evaluator.run(24)
a_results = len(list(filter(lambda x: x.winner == simulation.Winner.Engine_A, results))) / len(results) * 100 a_results = len(list(filter(lambda x: x.winner == simulation.Winner.Engine_A, results))) / len(results) * 100
b_results = len(list(filter(lambda x: x.winner == simulation.Winner.Engine_B, results))) / len(results) * 100 b_results = len(list(filter(lambda x: x.winner == simulation.Winner.Engine_B, results))) / len(results) * 100
draws = len(list(filter(lambda x: x.winner == simulation.Winner.Draw, results))) / len(results) * 100 draws = len(list(filter(lambda x: x.winner == simulation.Winner.Draw, results))) / len(results) * 100

1
web.py
View File

@@ -1,4 +1,3 @@
import chesspp
from chesspp import engine from chesspp import engine
from chesspp import web from chesspp import web