Implemented strategy evaluation for moves and improved scoring for BayesMcts
This commit is contained in:
@@ -1,7 +1,9 @@
|
|||||||
|
import math
|
||||||
|
|
||||||
|
import torch.distributions as dist
|
||||||
from chesspp.i_mcts import *
|
from chesspp.i_mcts import *
|
||||||
from chesspp.i_strategy import IStrategy
|
from chesspp.i_strategy import IStrategy
|
||||||
from chesspp.util_gaussian import gaussian_ucb1, max_gaussian, min_gaussian
|
from chesspp.util_gaussian import gaussian_ucb1, max_gaussian, min_gaussian
|
||||||
from chesspp.eval import score_manual
|
|
||||||
|
|
||||||
|
|
||||||
class BayesianMctsNode(IMctsNode):
|
class BayesianMctsNode(IMctsNode):
|
||||||
@@ -61,8 +63,9 @@ class BayesianMctsNode(IMctsNode):
|
|||||||
def select(self) -> IMctsNode:
|
def select(self) -> IMctsNode:
|
||||||
if len(self.children) == 0:
|
if len(self.children) == 0:
|
||||||
return self
|
return self
|
||||||
else:
|
elif not self.board.is_game_over():
|
||||||
return self._select_best_child().select()
|
return self._select_best_child().select()
|
||||||
|
return self
|
||||||
|
|
||||||
def expand(self) -> IMctsNode:
|
def expand(self) -> IMctsNode:
|
||||||
if self.visits == 0:
|
if self.visits == 0:
|
||||||
@@ -87,7 +90,8 @@ class BayesianMctsNode(IMctsNode):
|
|||||||
copied_board.push(m)
|
copied_board.push(m)
|
||||||
steps += 1
|
steps += 1
|
||||||
|
|
||||||
score = score_manual(copied_board) // steps
|
steps = max(1, steps)
|
||||||
|
score = int(self.strategy.analyze_board(copied_board) / (math.log2(steps) + 1))
|
||||||
self.result = score
|
self.result = score
|
||||||
return score
|
return score
|
||||||
|
|
||||||
@@ -138,7 +142,9 @@ class BayesianMcts(IMcts):
|
|||||||
|
|
||||||
def sample(self, runs: int = 1000) -> None:
|
def sample(self, runs: int = 1000) -> None:
|
||||||
for i in range(runs):
|
for i in range(runs):
|
||||||
# print(f"sample {i}")
|
if self.board.is_game_over():
|
||||||
|
break
|
||||||
|
|
||||||
leaf_node = self.root.select().expand()
|
leaf_node = self.root.select().expand()
|
||||||
_ = leaf_node.rollout()
|
_ = leaf_node.rollout()
|
||||||
leaf_node.backpropagate()
|
leaf_node.backpropagate()
|
||||||
@@ -151,6 +157,7 @@ class BayesianMcts(IMcts):
|
|||||||
for child in self.get_children():
|
for child in self.get_children():
|
||||||
if child.move == move:
|
if child.move == move:
|
||||||
self.root = child
|
self.root = child
|
||||||
|
child.depth = 0
|
||||||
self.root.parent = None
|
self.root.parent = None
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -160,10 +167,10 @@ class BayesianMcts(IMcts):
|
|||||||
def get_children(self) -> list[IMctsNode]:
|
def get_children(self) -> list[IMctsNode]:
|
||||||
return self.root.children
|
return self.root.children
|
||||||
|
|
||||||
def get_moves(self) -> Dict[chess.Move, int]:
|
def get_moves(self) -> Dict[chess.Move, dist.Normal]:
|
||||||
res = {}
|
res = {}
|
||||||
for c in self.root.children:
|
for c in self.root.children:
|
||||||
res[c.move] = c.mu
|
res[c.move] = dist.Normal(c.mu, c.sigma)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def print(self):
|
def print(self):
|
||||||
|
|||||||
@@ -1,19 +1,17 @@
|
|||||||
import chess
|
import chess
|
||||||
import random
|
import random
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from chesspp.i_strategy import IStrategy
|
||||||
|
|
||||||
from chesspp import eval
|
|
||||||
from chesspp import util
|
|
||||||
|
|
||||||
|
|
||||||
class ClassicMcts:
|
class ClassicMcts:
|
||||||
|
|
||||||
def __init__(self, board: chess.Board, color: chess.Color, parent=None, move: chess.Move | None = None,
|
def __init__(self, board: chess.Board, color: chess.Color, strategy: IStrategy, parent=None, move: chess.Move | None = None,
|
||||||
random_state: int | None = None):
|
random_state: int | None = None):
|
||||||
self.random = random.Random(random_state)
|
self.random = random.Random(random_state)
|
||||||
self.board = board
|
self.board = board
|
||||||
self.color = color
|
self.color = color
|
||||||
|
self.strategy = strategy
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
self.move = move
|
self.move = move
|
||||||
self.children = []
|
self.children = []
|
||||||
@@ -31,11 +29,11 @@ class ClassicMcts:
|
|||||||
self.untried_actions.remove(move)
|
self.untried_actions.remove(move)
|
||||||
next_board = self.board.copy()
|
next_board = self.board.copy()
|
||||||
next_board.push(move)
|
next_board.push(move)
|
||||||
child_node = ClassicMcts(next_board, color=self.color, parent=self, move=move)
|
child_node = ClassicMcts(next_board, color=self.color, strategy=self.strategy, parent=self, move=move)
|
||||||
self.children.append(child_node)
|
self.children.append(child_node)
|
||||||
return child_node
|
return child_node
|
||||||
|
|
||||||
def _rollout(self, rollout_depth: int = 3) -> int:
|
def _rollout(self, rollout_depth: int = 4) -> int:
|
||||||
"""
|
"""
|
||||||
Rolls out the node by simulating a game for a given depth.
|
Rolls out the node by simulating a game for a given depth.
|
||||||
Sometimes this step is called 'simulation' or 'playout'.
|
Sometimes this step is called 'simulation' or 'playout'.
|
||||||
@@ -47,11 +45,11 @@ class ClassicMcts:
|
|||||||
if copied_board.is_game_over():
|
if copied_board.is_game_over():
|
||||||
break
|
break
|
||||||
|
|
||||||
m = util.pick_move(copied_board)
|
m = self.strategy.pick_next_move(copied_board)
|
||||||
copied_board.push(m)
|
copied_board.push(m)
|
||||||
steps += 1
|
steps += 1
|
||||||
|
|
||||||
return eval.score_manual(copied_board) // steps
|
return self.strategy.analyze_board(copied_board) // steps
|
||||||
|
|
||||||
def _backpropagate(self, score: float) -> None:
|
def _backpropagate(self, score: float) -> None:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -2,12 +2,14 @@ import random
|
|||||||
import time
|
import time
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
from torch import distributions as dist
|
||||||
import chess
|
import chess
|
||||||
import chess.engine
|
import chess.engine
|
||||||
|
|
||||||
from chesspp.baysian_mcts import BayesianMcts
|
from chesspp.baysian_mcts import BayesianMcts
|
||||||
from chesspp.classic_mcts import ClassicMcts
|
from chesspp.classic_mcts import ClassicMcts
|
||||||
from chesspp.i_strategy import IStrategy
|
from chesspp.i_strategy import IStrategy
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
|
||||||
class Limit:
|
class Limit:
|
||||||
@@ -95,13 +97,20 @@ class BayesMctsEngine(Engine):
|
|||||||
if len(board.move_stack) != 0: # apply previous move to mcts --> reuse previous simulation results
|
if len(board.move_stack) != 0: # apply previous move to mcts --> reuse previous simulation results
|
||||||
self.mcts.apply_move(board.peek())
|
self.mcts.apply_move(board.peek())
|
||||||
limit.run(lambda: self.mcts.sample(1))
|
limit.run(lambda: self.mcts.sample(1))
|
||||||
# limit.run(lambda: mcts_root.build_tree())
|
best_move = self.get_best_move(self.mcts.get_moves(), board.turn)
|
||||||
best_move = max(self.mcts.get_moves().items(), key=lambda x: x[1])[0] if board.turn == chess.WHITE else (
|
|
||||||
min(self.mcts.get_moves().items(), key=lambda x: x[1])[0])
|
|
||||||
print(best_move)
|
|
||||||
self.mcts.apply_move(best_move)
|
self.mcts.apply_move(best_move)
|
||||||
return chess.engine.PlayResult(move=best_move, ponder=None)
|
return chess.engine.PlayResult(move=best_move, ponder=None)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_best_move(possible_moves: Dict[chess.Move, dist.Normal], color: chess.Color) -> chess.Move:
|
||||||
|
moves = {}
|
||||||
|
for m, d in possible_moves.items():
|
||||||
|
moves[m] = d.sample()
|
||||||
|
|
||||||
|
return max(moves.items(), key=lambda x: x[1])[0] if color == chess.WHITE else (
|
||||||
|
min(moves.items(), key=lambda x: x[1])[0])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ClassicMctsEngine(Engine):
|
class ClassicMctsEngine(Engine):
|
||||||
def __init__(self, board: chess.Board, color: chess.Color, strategy: IStrategy):
|
def __init__(self, board: chess.Board, color: chess.Color, strategy: IStrategy):
|
||||||
@@ -112,9 +121,8 @@ class ClassicMctsEngine(Engine):
|
|||||||
return "ClassicMctsEngine"
|
return "ClassicMctsEngine"
|
||||||
|
|
||||||
def play(self, board: chess.Board, limit: Limit) -> chess.engine.PlayResult:
|
def play(self, board: chess.Board, limit: Limit) -> chess.engine.PlayResult:
|
||||||
mcts_root = ClassicMcts(board, self.color)
|
mcts_root = ClassicMcts(board, self.color, self.strategy)
|
||||||
mcts_root.build_tree()
|
limit.run(lambda: mcts_root.build_tree(1))
|
||||||
# limit.run(lambda: mcts_root.build_tree())
|
|
||||||
best_move = max(mcts_root.children, key=lambda x: x.score).move if board.turn == chess.WHITE else (
|
best_move = max(mcts_root.children, key=lambda x: x.score).move if board.turn == chess.WHITE else (
|
||||||
min(mcts_root.children, key=lambda x: x.score).move)
|
min(mcts_root.children, key=lambda x: x.score).move)
|
||||||
return chess.engine.PlayResult(move=best_move, ponder=None)
|
return chess.engine.PlayResult(move=best_move, ponder=None)
|
||||||
|
|||||||
@@ -173,16 +173,21 @@ def score_manual(board: chess.Board) -> int:
|
|||||||
return score
|
return score
|
||||||
|
|
||||||
|
|
||||||
def score_stockfish(board: chess.Board) -> chess.engine.PovScore:
|
def score_stockfish(board: chess.Board, stockfish: chess.engine.SimpleEngine | None = None) -> int:
|
||||||
"""
|
"""
|
||||||
Calculate the score of the given board using stockfish
|
Calculate the score of the given board using stockfish
|
||||||
:param board:
|
:param board:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
engine = chess.engine.SimpleEngine.popen_uci("/home/luke/projects/pp-project/chess-engine-pp/stockfish/stockfish-ubuntu-x86-64-avx2")
|
if stockfish is None:
|
||||||
info = engine.analyse(board, chess.engine.Limit(depth=0))
|
engine = chess.engine.SimpleEngine.popen_uci(
|
||||||
engine.quit()
|
"/home/luke/projects/pp-project/chess-engine-pp/stockfish/stockfish-ubuntu-x86-64-avx2")
|
||||||
return info["score"]
|
info = engine.analyse(board, chess.engine.Limit(depth=0))
|
||||||
|
engine.quit()
|
||||||
|
return info['score'].white().score(mate_score=100_000)
|
||||||
|
else:
|
||||||
|
info = stockfish.analyse(board, chess.engine.Limit(depth=0))
|
||||||
|
return info['score'].white().score(mate_score=100_000)
|
||||||
|
|
||||||
|
|
||||||
def score_lc0(board: chess.Board) -> chess.engine.PovScore:
|
def score_lc0(board: chess.Board) -> chess.engine.PovScore:
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ class IMctsNode(ABC):
|
|||||||
self.move = move
|
self.move = move
|
||||||
self.legal_moves = list(board.legal_moves)
|
self.legal_moves = list(board.legal_moves)
|
||||||
self.random_state = random_state
|
self.random_state = random_state
|
||||||
|
self.depth = 0
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def select(self) -> Self:
|
def select(self) -> Self:
|
||||||
|
|||||||
@@ -9,3 +9,7 @@ class IStrategy(ABC):
|
|||||||
@abstractmethod
|
@abstractmethod
|
||||||
def pick_next_move(self, board: chess.Board) -> chess.Move:
|
def pick_next_move(self, board: chess.Board) -> chess.Move:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def analyze_board(self, board: chess.Board) -> int:
|
||||||
|
pass
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import chess
|
import chess
|
||||||
import random
|
import random
|
||||||
from chesspp.i_strategy import IStrategy
|
from chesspp.i_strategy import IStrategy
|
||||||
|
from chesspp.eval import score_manual
|
||||||
|
|
||||||
|
|
||||||
class RandomStrategy(IStrategy):
|
class RandomStrategy(IStrategy):
|
||||||
@@ -11,3 +12,6 @@ class RandomStrategy(IStrategy):
|
|||||||
if len(list(board.legal_moves)) == 0:
|
if len(list(board.legal_moves)) == 0:
|
||||||
return None
|
return None
|
||||||
return self.random_state.choice(list(board.legal_moves))
|
return self.random_state.choice(list(board.legal_moves))
|
||||||
|
|
||||||
|
def analyze_board(self, board: chess.Board) -> int:
|
||||||
|
return score_manual(board)
|
||||||
|
|||||||
@@ -1,15 +1,21 @@
|
|||||||
import os
|
import os
|
||||||
import chess
|
import chess
|
||||||
from chesspp.i_strategy import IStrategy
|
from chesspp.i_strategy import IStrategy
|
||||||
|
from chesspp.eval import score_stockfish
|
||||||
import chess.engine
|
import chess.engine
|
||||||
|
|
||||||
_DIR = os.path.abspath(os.path.dirname(__file__))
|
_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
|
||||||
|
|
||||||
class StockFishStrategy(IStrategy):
|
class StockFishStrategy(IStrategy):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._stockfish = None
|
self._stockfish = None
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
if self._stockfish is not None:
|
||||||
|
self._stockfish.quit()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def stockfish(self) -> chess.engine.SimpleEngine:
|
def stockfish(self) -> chess.engine.SimpleEngine:
|
||||||
if self._stockfish is None:
|
if self._stockfish is None:
|
||||||
@@ -22,6 +28,7 @@ class StockFishStrategy(IStrategy):
|
|||||||
self._stockfish = stockfish
|
self._stockfish = stockfish
|
||||||
|
|
||||||
def pick_next_move(self, board: chess.Board) -> chess.Move | None:
|
def pick_next_move(self, board: chess.Board) -> chess.Move | None:
|
||||||
move = self.stockfish.play(board, chess.engine.Limit(depth=4)).move
|
return self.stockfish.play(board, chess.engine.Limit(depth=4)).move
|
||||||
print("stockfish picked:", move)
|
|
||||||
return move
|
def analyze_board(self, board: chess.Board) -> int:
|
||||||
|
return score_stockfish(board, self.stockfish)
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
import asyncio
|
import asyncio
|
||||||
import random
|
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
from aiohttp import web
|
from aiohttp import web
|
||||||
@@ -8,7 +7,6 @@ from aiohttp import web
|
|||||||
import chess
|
import chess
|
||||||
from chesspp import engine
|
from chesspp import engine
|
||||||
from chesspp.stockfish_strategy import StockFishStrategy
|
from chesspp.stockfish_strategy import StockFishStrategy
|
||||||
from chesspp.random_strategy import RandomStrategy
|
|
||||||
|
|
||||||
_DIR = os.path.abspath(os.path.dirname(__file__))
|
_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||||
_DATA_DIR = os.path.abspath(os.path.join(_DIR, "static_data"))
|
_DATA_DIR = os.path.abspath(os.path.join(_DIR, "static_data"))
|
||||||
@@ -103,6 +101,7 @@ class WebInterface:
|
|||||||
])
|
])
|
||||||
web.run_app(app)
|
web.run_app(app)
|
||||||
|
|
||||||
|
|
||||||
def run_sample():
|
def run_sample():
|
||||||
limit = engine.Limit(time=0.5)
|
limit = engine.Limit(time=1)
|
||||||
WebInterface(engine.BayesMctsEngine, engine.ClassicMctsEngine, limit).run_app()
|
WebInterface(engine.BayesMctsEngine, engine.ClassicMctsEngine, limit).run_app()
|
||||||
|
|||||||
4
main.py
4
main.py
@@ -83,8 +83,8 @@ def test_evaluation():
|
|||||||
a = engine.BayesMctsEngine
|
a = engine.BayesMctsEngine
|
||||||
b = engine.ClassicMctsEngine
|
b = engine.ClassicMctsEngine
|
||||||
limit = engine.Limit(time=0.5)
|
limit = engine.Limit(time=0.5)
|
||||||
evaluator = simulation.Evaluation(a, StockFishStrategy(), b, RandomStrategy(random.Random()), limit)
|
evaluator = simulation.Evaluation(a, StockFishStrategy(), b, StockFishStrategy(), limit)
|
||||||
results = evaluator.run(2)
|
results = evaluator.run(24)
|
||||||
a_results = len(list(filter(lambda x: x.winner == simulation.Winner.Engine_A, results))) / len(results) * 100
|
a_results = len(list(filter(lambda x: x.winner == simulation.Winner.Engine_A, results))) / len(results) * 100
|
||||||
b_results = len(list(filter(lambda x: x.winner == simulation.Winner.Engine_B, results))) / len(results) * 100
|
b_results = len(list(filter(lambda x: x.winner == simulation.Winner.Engine_B, results))) / len(results) * 100
|
||||||
draws = len(list(filter(lambda x: x.winner == simulation.Winner.Draw, results))) / len(results) * 100
|
draws = len(list(filter(lambda x: x.winner == simulation.Winner.Draw, results))) / len(results) * 100
|
||||||
|
|||||||
Reference in New Issue
Block a user