Implemented strategy evaluation for moves and improved scoring for BayesMcts
This commit is contained in:
@@ -1,7 +1,9 @@
|
||||
import math
|
||||
|
||||
import torch.distributions as dist
|
||||
from chesspp.i_mcts import *
|
||||
from chesspp.i_strategy import IStrategy
|
||||
from chesspp.util_gaussian import gaussian_ucb1, max_gaussian, min_gaussian
|
||||
from chesspp.eval import score_manual
|
||||
|
||||
|
||||
class BayesianMctsNode(IMctsNode):
|
||||
@@ -61,8 +63,9 @@ class BayesianMctsNode(IMctsNode):
|
||||
def select(self) -> IMctsNode:
|
||||
if len(self.children) == 0:
|
||||
return self
|
||||
else:
|
||||
elif not self.board.is_game_over():
|
||||
return self._select_best_child().select()
|
||||
return self
|
||||
|
||||
def expand(self) -> IMctsNode:
|
||||
if self.visits == 0:
|
||||
@@ -87,7 +90,8 @@ class BayesianMctsNode(IMctsNode):
|
||||
copied_board.push(m)
|
||||
steps += 1
|
||||
|
||||
score = score_manual(copied_board) // steps
|
||||
steps = max(1, steps)
|
||||
score = int(self.strategy.analyze_board(copied_board) / (math.log2(steps) + 1))
|
||||
self.result = score
|
||||
return score
|
||||
|
||||
@@ -138,7 +142,9 @@ class BayesianMcts(IMcts):
|
||||
|
||||
def sample(self, runs: int = 1000) -> None:
|
||||
for i in range(runs):
|
||||
# print(f"sample {i}")
|
||||
if self.board.is_game_over():
|
||||
break
|
||||
|
||||
leaf_node = self.root.select().expand()
|
||||
_ = leaf_node.rollout()
|
||||
leaf_node.backpropagate()
|
||||
@@ -151,6 +157,7 @@ class BayesianMcts(IMcts):
|
||||
for child in self.get_children():
|
||||
if child.move == move:
|
||||
self.root = child
|
||||
child.depth = 0
|
||||
self.root.parent = None
|
||||
return
|
||||
|
||||
@@ -160,10 +167,10 @@ class BayesianMcts(IMcts):
|
||||
def get_children(self) -> list[IMctsNode]:
|
||||
return self.root.children
|
||||
|
||||
def get_moves(self) -> Dict[chess.Move, int]:
|
||||
def get_moves(self) -> Dict[chess.Move, dist.Normal]:
|
||||
res = {}
|
||||
for c in self.root.children:
|
||||
res[c.move] = c.mu
|
||||
res[c.move] = dist.Normal(c.mu, c.sigma)
|
||||
return res
|
||||
|
||||
def print(self):
|
||||
|
||||
@@ -1,19 +1,17 @@
|
||||
import chess
|
||||
import random
|
||||
import numpy as np
|
||||
|
||||
|
||||
from chesspp import eval
|
||||
from chesspp import util
|
||||
from chesspp.i_strategy import IStrategy
|
||||
|
||||
|
||||
class ClassicMcts:
|
||||
|
||||
def __init__(self, board: chess.Board, color: chess.Color, parent=None, move: chess.Move | None = None,
|
||||
def __init__(self, board: chess.Board, color: chess.Color, strategy: IStrategy, parent=None, move: chess.Move | None = None,
|
||||
random_state: int | None = None):
|
||||
self.random = random.Random(random_state)
|
||||
self.board = board
|
||||
self.color = color
|
||||
self.strategy = strategy
|
||||
self.parent = parent
|
||||
self.move = move
|
||||
self.children = []
|
||||
@@ -31,11 +29,11 @@ class ClassicMcts:
|
||||
self.untried_actions.remove(move)
|
||||
next_board = self.board.copy()
|
||||
next_board.push(move)
|
||||
child_node = ClassicMcts(next_board, color=self.color, parent=self, move=move)
|
||||
child_node = ClassicMcts(next_board, color=self.color, strategy=self.strategy, parent=self, move=move)
|
||||
self.children.append(child_node)
|
||||
return child_node
|
||||
|
||||
def _rollout(self, rollout_depth: int = 3) -> int:
|
||||
def _rollout(self, rollout_depth: int = 4) -> int:
|
||||
"""
|
||||
Rolls out the node by simulating a game for a given depth.
|
||||
Sometimes this step is called 'simulation' or 'playout'.
|
||||
@@ -47,11 +45,11 @@ class ClassicMcts:
|
||||
if copied_board.is_game_over():
|
||||
break
|
||||
|
||||
m = util.pick_move(copied_board)
|
||||
m = self.strategy.pick_next_move(copied_board)
|
||||
copied_board.push(m)
|
||||
steps += 1
|
||||
|
||||
return eval.score_manual(copied_board) // steps
|
||||
return self.strategy.analyze_board(copied_board) // steps
|
||||
|
||||
def _backpropagate(self, score: float) -> None:
|
||||
"""
|
||||
|
||||
@@ -2,12 +2,14 @@ import random
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from torch import distributions as dist
|
||||
import chess
|
||||
import chess.engine
|
||||
|
||||
from chesspp.baysian_mcts import BayesianMcts
|
||||
from chesspp.classic_mcts import ClassicMcts
|
||||
from chesspp.i_strategy import IStrategy
|
||||
from typing import Dict
|
||||
|
||||
|
||||
class Limit:
|
||||
@@ -95,13 +97,20 @@ class BayesMctsEngine(Engine):
|
||||
if len(board.move_stack) != 0: # apply previous move to mcts --> reuse previous simulation results
|
||||
self.mcts.apply_move(board.peek())
|
||||
limit.run(lambda: self.mcts.sample(1))
|
||||
# limit.run(lambda: mcts_root.build_tree())
|
||||
best_move = max(self.mcts.get_moves().items(), key=lambda x: x[1])[0] if board.turn == chess.WHITE else (
|
||||
min(self.mcts.get_moves().items(), key=lambda x: x[1])[0])
|
||||
print(best_move)
|
||||
best_move = self.get_best_move(self.mcts.get_moves(), board.turn)
|
||||
self.mcts.apply_move(best_move)
|
||||
return chess.engine.PlayResult(move=best_move, ponder=None)
|
||||
|
||||
@staticmethod
|
||||
def get_best_move(possible_moves: Dict[chess.Move, dist.Normal], color: chess.Color) -> chess.Move:
|
||||
moves = {}
|
||||
for m, d in possible_moves.items():
|
||||
moves[m] = d.sample()
|
||||
|
||||
return max(moves.items(), key=lambda x: x[1])[0] if color == chess.WHITE else (
|
||||
min(moves.items(), key=lambda x: x[1])[0])
|
||||
|
||||
|
||||
|
||||
class ClassicMctsEngine(Engine):
|
||||
def __init__(self, board: chess.Board, color: chess.Color, strategy: IStrategy):
|
||||
@@ -112,9 +121,8 @@ class ClassicMctsEngine(Engine):
|
||||
return "ClassicMctsEngine"
|
||||
|
||||
def play(self, board: chess.Board, limit: Limit) -> chess.engine.PlayResult:
|
||||
mcts_root = ClassicMcts(board, self.color)
|
||||
mcts_root.build_tree()
|
||||
# limit.run(lambda: mcts_root.build_tree())
|
||||
mcts_root = ClassicMcts(board, self.color, self.strategy)
|
||||
limit.run(lambda: mcts_root.build_tree(1))
|
||||
best_move = max(mcts_root.children, key=lambda x: x.score).move if board.turn == chess.WHITE else (
|
||||
min(mcts_root.children, key=lambda x: x.score).move)
|
||||
return chess.engine.PlayResult(move=best_move, ponder=None)
|
||||
|
||||
@@ -173,16 +173,21 @@ def score_manual(board: chess.Board) -> int:
|
||||
return score
|
||||
|
||||
|
||||
def score_stockfish(board: chess.Board) -> chess.engine.PovScore:
|
||||
def score_stockfish(board: chess.Board, stockfish: chess.engine.SimpleEngine | None = None) -> int:
|
||||
"""
|
||||
Calculate the score of the given board using stockfish
|
||||
:param board:
|
||||
:return:
|
||||
"""
|
||||
engine = chess.engine.SimpleEngine.popen_uci("/home/luke/projects/pp-project/chess-engine-pp/stockfish/stockfish-ubuntu-x86-64-avx2")
|
||||
if stockfish is None:
|
||||
engine = chess.engine.SimpleEngine.popen_uci(
|
||||
"/home/luke/projects/pp-project/chess-engine-pp/stockfish/stockfish-ubuntu-x86-64-avx2")
|
||||
info = engine.analyse(board, chess.engine.Limit(depth=0))
|
||||
engine.quit()
|
||||
return info["score"]
|
||||
return info['score'].white().score(mate_score=100_000)
|
||||
else:
|
||||
info = stockfish.analyse(board, chess.engine.Limit(depth=0))
|
||||
return info['score'].white().score(mate_score=100_000)
|
||||
|
||||
|
||||
def score_lc0(board: chess.Board) -> chess.engine.PovScore:
|
||||
|
||||
@@ -15,6 +15,7 @@ class IMctsNode(ABC):
|
||||
self.move = move
|
||||
self.legal_moves = list(board.legal_moves)
|
||||
self.random_state = random_state
|
||||
self.depth = 0
|
||||
|
||||
@abstractmethod
|
||||
def select(self) -> Self:
|
||||
|
||||
@@ -9,3 +9,7 @@ class IStrategy(ABC):
|
||||
@abstractmethod
|
||||
def pick_next_move(self, board: chess.Board) -> chess.Move:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def analyze_board(self, board: chess.Board) -> int:
|
||||
pass
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import chess
|
||||
import random
|
||||
from chesspp.i_strategy import IStrategy
|
||||
from chesspp.eval import score_manual
|
||||
|
||||
|
||||
class RandomStrategy(IStrategy):
|
||||
@@ -11,3 +12,6 @@ class RandomStrategy(IStrategy):
|
||||
if len(list(board.legal_moves)) == 0:
|
||||
return None
|
||||
return self.random_state.choice(list(board.legal_moves))
|
||||
|
||||
def analyze_board(self, board: chess.Board) -> int:
|
||||
return score_manual(board)
|
||||
|
||||
@@ -1,15 +1,21 @@
|
||||
import os
|
||||
import chess
|
||||
from chesspp.i_strategy import IStrategy
|
||||
from chesspp.eval import score_stockfish
|
||||
import chess.engine
|
||||
|
||||
_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
|
||||
class StockFishStrategy(IStrategy):
|
||||
|
||||
def __init__(self):
|
||||
self._stockfish = None
|
||||
|
||||
def __del__(self):
|
||||
if self._stockfish is not None:
|
||||
self._stockfish.quit()
|
||||
|
||||
@property
|
||||
def stockfish(self) -> chess.engine.SimpleEngine:
|
||||
if self._stockfish is None:
|
||||
@@ -22,6 +28,7 @@ class StockFishStrategy(IStrategy):
|
||||
self._stockfish = stockfish
|
||||
|
||||
def pick_next_move(self, board: chess.Board) -> chess.Move | None:
|
||||
move = self.stockfish.play(board, chess.engine.Limit(depth=4)).move
|
||||
print("stockfish picked:", move)
|
||||
return move
|
||||
return self.stockfish.play(board, chess.engine.Limit(depth=4)).move
|
||||
|
||||
def analyze_board(self, board: chess.Board) -> int:
|
||||
return score_stockfish(board, self.stockfish)
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import os
|
||||
import asyncio
|
||||
import random
|
||||
|
||||
import aiohttp
|
||||
from aiohttp import web
|
||||
@@ -8,7 +7,6 @@ from aiohttp import web
|
||||
import chess
|
||||
from chesspp import engine
|
||||
from chesspp.stockfish_strategy import StockFishStrategy
|
||||
from chesspp.random_strategy import RandomStrategy
|
||||
|
||||
_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||
_DATA_DIR = os.path.abspath(os.path.join(_DIR, "static_data"))
|
||||
@@ -103,6 +101,7 @@ class WebInterface:
|
||||
])
|
||||
web.run_app(app)
|
||||
|
||||
|
||||
def run_sample():
|
||||
limit = engine.Limit(time=0.5)
|
||||
limit = engine.Limit(time=1)
|
||||
WebInterface(engine.BayesMctsEngine, engine.ClassicMctsEngine, limit).run_app()
|
||||
|
||||
4
main.py
4
main.py
@@ -83,8 +83,8 @@ def test_evaluation():
|
||||
a = engine.BayesMctsEngine
|
||||
b = engine.ClassicMctsEngine
|
||||
limit = engine.Limit(time=0.5)
|
||||
evaluator = simulation.Evaluation(a, StockFishStrategy(), b, RandomStrategy(random.Random()), limit)
|
||||
results = evaluator.run(2)
|
||||
evaluator = simulation.Evaluation(a, StockFishStrategy(), b, StockFishStrategy(), limit)
|
||||
results = evaluator.run(24)
|
||||
a_results = len(list(filter(lambda x: x.winner == simulation.Winner.Engine_A, results))) / len(results) * 100
|
||||
b_results = len(list(filter(lambda x: x.winner == simulation.Winner.Engine_B, results))) / len(results) * 100
|
||||
draws = len(list(filter(lambda x: x.winner == simulation.Winner.Draw, results))) / len(results) * 100
|
||||
|
||||
Reference in New Issue
Block a user