Implemented strategy evaluation for moves and improved scoring for BayesMcts

This commit is contained in:
2024-01-29 17:47:00 +01:00
parent c5536e08de
commit d43899ecda
11 changed files with 68 additions and 36 deletions

View File

@@ -1,7 +1,9 @@
import math
import torch.distributions as dist
from chesspp.i_mcts import *
from chesspp.i_strategy import IStrategy
from chesspp.util_gaussian import gaussian_ucb1, max_gaussian, min_gaussian
from chesspp.eval import score_manual
class BayesianMctsNode(IMctsNode):
@@ -61,8 +63,9 @@ class BayesianMctsNode(IMctsNode):
def select(self) -> IMctsNode:
if len(self.children) == 0:
return self
else:
elif not self.board.is_game_over():
return self._select_best_child().select()
return self
def expand(self) -> IMctsNode:
if self.visits == 0:
@@ -87,7 +90,8 @@ class BayesianMctsNode(IMctsNode):
copied_board.push(m)
steps += 1
score = score_manual(copied_board) // steps
steps = max(1, steps)
score = int(self.strategy.analyze_board(copied_board) / (math.log2(steps) + 1))
self.result = score
return score
@@ -138,7 +142,9 @@ class BayesianMcts(IMcts):
def sample(self, runs: int = 1000) -> None:
for i in range(runs):
# print(f"sample {i}")
if self.board.is_game_over():
break
leaf_node = self.root.select().expand()
_ = leaf_node.rollout()
leaf_node.backpropagate()
@@ -151,6 +157,7 @@ class BayesianMcts(IMcts):
for child in self.get_children():
if child.move == move:
self.root = child
child.depth = 0
self.root.parent = None
return
@@ -160,10 +167,10 @@ class BayesianMcts(IMcts):
def get_children(self) -> list[IMctsNode]:
return self.root.children
def get_moves(self) -> Dict[chess.Move, int]:
def get_moves(self) -> Dict[chess.Move, dist.Normal]:
res = {}
for c in self.root.children:
res[c.move] = c.mu
res[c.move] = dist.Normal(c.mu, c.sigma)
return res
def print(self):

View File

@@ -1,19 +1,17 @@
import chess
import random
import numpy as np
from chesspp import eval
from chesspp import util
from chesspp.i_strategy import IStrategy
class ClassicMcts:
def __init__(self, board: chess.Board, color: chess.Color, parent=None, move: chess.Move | None = None,
def __init__(self, board: chess.Board, color: chess.Color, strategy: IStrategy, parent=None, move: chess.Move | None = None,
random_state: int | None = None):
self.random = random.Random(random_state)
self.board = board
self.color = color
self.strategy = strategy
self.parent = parent
self.move = move
self.children = []
@@ -31,11 +29,11 @@ class ClassicMcts:
self.untried_actions.remove(move)
next_board = self.board.copy()
next_board.push(move)
child_node = ClassicMcts(next_board, color=self.color, parent=self, move=move)
child_node = ClassicMcts(next_board, color=self.color, strategy=self.strategy, parent=self, move=move)
self.children.append(child_node)
return child_node
def _rollout(self, rollout_depth: int = 3) -> int:
def _rollout(self, rollout_depth: int = 4) -> int:
"""
Rolls out the node by simulating a game for a given depth.
Sometimes this step is called 'simulation' or 'playout'.
@@ -47,11 +45,11 @@ class ClassicMcts:
if copied_board.is_game_over():
break
m = util.pick_move(copied_board)
m = self.strategy.pick_next_move(copied_board)
copied_board.push(m)
steps += 1
return eval.score_manual(copied_board) // steps
return self.strategy.analyze_board(copied_board) // steps
def _backpropagate(self, score: float) -> None:
"""

View File

@@ -2,12 +2,14 @@ import random
import time
from abc import ABC, abstractmethod
from torch import distributions as dist
import chess
import chess.engine
from chesspp.baysian_mcts import BayesianMcts
from chesspp.classic_mcts import ClassicMcts
from chesspp.i_strategy import IStrategy
from typing import Dict
class Limit:
@@ -95,13 +97,20 @@ class BayesMctsEngine(Engine):
if len(board.move_stack) != 0: # apply previous move to mcts --> reuse previous simulation results
self.mcts.apply_move(board.peek())
limit.run(lambda: self.mcts.sample(1))
# limit.run(lambda: mcts_root.build_tree())
best_move = max(self.mcts.get_moves().items(), key=lambda x: x[1])[0] if board.turn == chess.WHITE else (
min(self.mcts.get_moves().items(), key=lambda x: x[1])[0])
print(best_move)
best_move = self.get_best_move(self.mcts.get_moves(), board.turn)
self.mcts.apply_move(best_move)
return chess.engine.PlayResult(move=best_move, ponder=None)
@staticmethod
def get_best_move(possible_moves: Dict[chess.Move, dist.Normal], color: chess.Color) -> chess.Move:
moves = {}
for m, d in possible_moves.items():
moves[m] = d.sample()
return max(moves.items(), key=lambda x: x[1])[0] if color == chess.WHITE else (
min(moves.items(), key=lambda x: x[1])[0])
class ClassicMctsEngine(Engine):
def __init__(self, board: chess.Board, color: chess.Color, strategy: IStrategy):
@@ -112,9 +121,8 @@ class ClassicMctsEngine(Engine):
return "ClassicMctsEngine"
def play(self, board: chess.Board, limit: Limit) -> chess.engine.PlayResult:
mcts_root = ClassicMcts(board, self.color)
mcts_root.build_tree()
# limit.run(lambda: mcts_root.build_tree())
mcts_root = ClassicMcts(board, self.color, self.strategy)
limit.run(lambda: mcts_root.build_tree(1))
best_move = max(mcts_root.children, key=lambda x: x.score).move if board.turn == chess.WHITE else (
min(mcts_root.children, key=lambda x: x.score).move)
return chess.engine.PlayResult(move=best_move, ponder=None)

View File

@@ -173,16 +173,21 @@ def score_manual(board: chess.Board) -> int:
return score
def score_stockfish(board: chess.Board) -> chess.engine.PovScore:
def score_stockfish(board: chess.Board, stockfish: chess.engine.SimpleEngine | None = None) -> int:
"""
Calculate the score of the given board using stockfish
:param board:
:return:
"""
engine = chess.engine.SimpleEngine.popen_uci("/home/luke/projects/pp-project/chess-engine-pp/stockfish/stockfish-ubuntu-x86-64-avx2")
info = engine.analyse(board, chess.engine.Limit(depth=0))
engine.quit()
return info["score"]
if stockfish is None:
engine = chess.engine.SimpleEngine.popen_uci(
"/home/luke/projects/pp-project/chess-engine-pp/stockfish/stockfish-ubuntu-x86-64-avx2")
info = engine.analyse(board, chess.engine.Limit(depth=0))
engine.quit()
return info['score'].white().score(mate_score=100_000)
else:
info = stockfish.analyse(board, chess.engine.Limit(depth=0))
return info['score'].white().score(mate_score=100_000)
def score_lc0(board: chess.Board) -> chess.engine.PovScore:

View File

@@ -15,6 +15,7 @@ class IMctsNode(ABC):
self.move = move
self.legal_moves = list(board.legal_moves)
self.random_state = random_state
self.depth = 0
@abstractmethod
def select(self) -> Self:

View File

@@ -9,3 +9,7 @@ class IStrategy(ABC):
@abstractmethod
def pick_next_move(self, board: chess.Board) -> chess.Move:
pass
@abstractmethod
def analyze_board(self, board: chess.Board) -> int:
pass

View File

@@ -1,6 +1,7 @@
import chess
import random
from chesspp.i_strategy import IStrategy
from chesspp.eval import score_manual
class RandomStrategy(IStrategy):
@@ -11,3 +12,6 @@ class RandomStrategy(IStrategy):
if len(list(board.legal_moves)) == 0:
return None
return self.random_state.choice(list(board.legal_moves))
def analyze_board(self, board: chess.Board) -> int:
return score_manual(board)

View File

@@ -1,15 +1,21 @@
import os
import chess
from chesspp.i_strategy import IStrategy
from chesspp.eval import score_stockfish
import chess.engine
_DIR = os.path.abspath(os.path.dirname(__file__))
class StockFishStrategy(IStrategy):
def __init__(self):
self._stockfish = None
def __del__(self):
if self._stockfish is not None:
self._stockfish.quit()
@property
def stockfish(self) -> chess.engine.SimpleEngine:
if self._stockfish is None:
@@ -22,6 +28,7 @@ class StockFishStrategy(IStrategy):
self._stockfish = stockfish
def pick_next_move(self, board: chess.Board) -> chess.Move | None:
move = self.stockfish.play(board, chess.engine.Limit(depth=4)).move
print("stockfish picked:", move)
return move
return self.stockfish.play(board, chess.engine.Limit(depth=4)).move
def analyze_board(self, board: chess.Board) -> int:
return score_stockfish(board, self.stockfish)

View File

@@ -1,6 +1,5 @@
import os
import asyncio
import random
import aiohttp
from aiohttp import web
@@ -8,7 +7,6 @@ from aiohttp import web
import chess
from chesspp import engine
from chesspp.stockfish_strategy import StockFishStrategy
from chesspp.random_strategy import RandomStrategy
_DIR = os.path.abspath(os.path.dirname(__file__))
_DATA_DIR = os.path.abspath(os.path.join(_DIR, "static_data"))
@@ -103,6 +101,7 @@ class WebInterface:
])
web.run_app(app)
def run_sample():
limit = engine.Limit(time=0.5)
limit = engine.Limit(time=1)
WebInterface(engine.BayesMctsEngine, engine.ClassicMctsEngine, limit).run_app()