From f53e149af0f4304cda47097fc1f1dd41cbe6d343 Mon Sep 17 00:00:00 2001 From: nocturn9x Date: Thu, 16 May 2024 14:14:25 +0200 Subject: [PATCH] Update evaluation and tuner --- Chess/README.md | 3 + Chess/nimfish/nimfishpkg/eval.nim | 102 ++++++++++++++++++---------- Chess/nimfish/nimfishpkg/search.nim | 16 ++--- Chess/nimfish/nimfishpkg/tune.py | 2 +- Chess/nimfish/nimfishpkg/uci.nim | 2 +- 5 files changed, 78 insertions(+), 47 deletions(-) diff --git a/Chess/README.md b/Chess/README.md index 88e86da..db2c6a3 100644 --- a/Chess/README.md +++ b/Chess/README.md @@ -47,12 +47,14 @@ Estimated elo: ~2100 - [X] History heuristic - [X] History gravity - [ ] History malus + - [ ] History aging - [X] Killer heuristic - [X] Null-window search - [ ] Capture history - [ ] Continuation history - [X] Parallel search (lazy SMP) - [X] Pondering +- [ ] Counter moves ### Eval @@ -60,6 +62,7 @@ Estimated elo: ~2100 - [X] Piece-square tables - [X] Tempo - [ ] King safety +- [ ] Pawn shield - [ ] Mobility - [ ] Minor piece outpost - [ ] Bishop pair diff --git a/Chess/nimfish/nimfishpkg/eval.nim b/Chess/nimfish/nimfishpkg/eval.nim index e789dd1..cfd44c9 100644 --- a/Chess/nimfish/nimfishpkg/eval.nim +++ b/Chess/nimfish/nimfishpkg/eval.nim @@ -32,7 +32,7 @@ type # (and maluses), as well as the piece values themselves. We # have one for each game phase (middle and end game) and one # for each piece (one per color) - psqts: array[PieceColor.White..PieceColor.Black, array[PieceKind.Bishop..PieceKind.Rook, array[Square(0)..Square(63), float]]] + psqts: array[PieceColor.White..PieceColor.Black, array[PieceKind.Bishop..PieceKind.Rook, array[Square(0)..Square(63), tuple[mg, eg: float]]]] # Tempo bonus for the side to move tempo: float @@ -199,21 +199,26 @@ const # First entry is the middle game score, second entry is the endgame score. Half stolen # and half eyeballed. Tune these! - QUEEN_SEMI_OPEN_FILE_BONUS: array[2, Score] = [-5, 0] - QUEEN_OPEN_FILE_BONUS: array[2, Score] = [10, 0] - ROOK_SEMI_OPEN_FILE_BONUS: array[2, Score] = [40, 0] - ROOK_OPEN_FILE_BONUS: array[2, Score] = [60, 0] - BISHOP_PAIR_BONUS: array[2, Score] = [35, 100] + QUEEN_SEMI_OPEN_FILE_BONUS {.used.}: array[2, Score] = [-5, 0] + QUEEN_OPEN_FILE_BONUS {.used.}: array[2, Score] = [10, 0] + ROOK_SEMI_OPEN_FILE_BONUS {.used.}: array[2, Score] = [40, 0] + ROOK_OPEN_FILE_BONUS {.used.}: array[2, Score] = [60, 0] + BISHOP_PAIR_BONUS {.used.}: array[2, Score] = [35, 100] # Mobility bonuses (TODO: Refactor this into mobility tables. Need a tuner for that) - MIDDLEGAME_MOBILITY_BONUS: array[PieceKind.Bishop..PieceKind.Rook, Score] = [4, 0, 3, 0, 4, 5] - ENDGAME_MOBILITY_BONUS: array[PieceKind.Bishop..PieceKind.Rook, Score] = [6, 0, 5, 0, 8, 8] + MIDDLEGAME_MOBILITY_BONUS {.used.}: array[PieceKind.Bishop..PieceKind.Rook, Score] = [4, 0, 3, 0, 4, 5] + ENDGAME_MOBILITY_BONUS {.used.}: array[PieceKind.Bishop..PieceKind.Rook, Score] = [6, 0, 5, 0, 8, 8] # TODO: Don't use these. Make them tapered. Also tune them # DOUBLED_PAWNS_MALUS: array[9, Score] = [0, -5, -10, -20, -30, -30, -30, -30, -30] # ISOLATED_PAWN_MALUS: array[9, Score] = [0, -10, -25, -50, -75, -75, -75, -75, -75] # STRONG_PAWN_BONUS: array[9, Score] = [0, 5, 10, 20, 30, 30, 30, 30, 30] - # PASSED_PAWN_BONUS: array[7, Score] = [0, 120, 80, 50, 30, 15, 15] + MIDDLEGAME_PASSED_PAWN_BONUS {.used.}: array[7, Score] = [0, 120, 80, 50, 30, 15, 15] + ENDGAME_PASSED_PAWN_BONUS {.used.}: array[7, Score] = [0, 250, 150, 50, 30, 15, 15] + MIDDLEGAME_ISOLATED_PAWN_MALUS {.used.}: array[9, Score] = [0, -10, -25, -50, -75, -75, -75, -75, -75] + PASSED_PAWN_BONUS = Score(30) + ENDGAME_ISOLATED_PAWN_MALUS {.used.}: array[9, Score] = [0, -10, -25, -50, -75, -75, -75, -75, -75] #\[0, -40, -55, -90, -125, -125, -125, -125, -125] + var MIDDLEGAME_VALUE_TABLES: array[PieceColor.White..PieceColor.Black, array[PieceKind.Bishop..PieceKind.Rook, array[Square(0)..Square(63), Score]]] @@ -325,7 +330,7 @@ proc evaluate*(position: Position, mode: static EvalMode, features: Features = F nonSideToMove = sideToMove.opposite() middleGamePhase = position.getGamePhase() occupancy = position.getOccupancy() - pawns {.used.} = position.getBitboard(Pawn, White) or position.getBitboard(Pawn, Black) + pawns: array[PieceColor.White..PieceColor.Black, Bitboard] = [position.getBitboard(Pawn, White), position.getBitboard(Pawn, Black)] endGamePhase = 24 - middleGamePhase var middleGameScores: array[PieceColor.White..PieceColor.Black, Score] = [0, 0] @@ -338,8 +343,8 @@ proc evaluate*(position: Position, mode: static EvalMode, features: Features = F middleGameScores[piece.color] += MIDDLEGAME_VALUE_TABLES[piece.color][piece.kind][sq] endGameScores[piece.color] += ENDGAME_VALUE_TABLES[piece.color][piece.kind][sq] else: - features.psqts[piece.color][piece.kind][sq] = 1.0 - + features.psqts[piece.color][piece.kind][sq].mg = middleGamePhase / 24 + features.psqts[piece.color][piece.kind][sq].mg = endGamePhase / 24 # Final score computation let middleGameScore = middleGameScores[sideToMove] - middleGameScores[nonSideToMove] @@ -353,6 +358,49 @@ proc evaluate*(position: Position, mode: static EvalMode, features: Features = F else: features.tempo = 1.0 + + # Pawn structure + + when defined(evalPawns): + for color in PieceColor.White..PieceColor.Black: + for pawn in pawns[color]: + if (getPassedPawnMask(color, pawn) and pawns[color.opposite()]) == 0: + middleGameScores[color] += PASSED_PAWN_BONUS + endGameScores[color] += PASSED_PAWN_BONUS * 2 + #[ + # Isolated pawns + for file in 0..7: + if (pawns[color] and getIsolatedPawnMask(file)) == 0: + let onFile = pawns[color] and getFileMask(file) + middleGameScores[color] += MIDDLEGAME_ISOLATED_PAWN_MALUS[onFile.countSquares()] + endGameScores[color] += ENDGAME_ISOLATED_PAWN_MALUS[onFile.countSquares()] + ]# + + when defined(evalFiles): + let rooks: array[PieceColor.White..PieceColor.Black, Bitboard] = [position.getBitboard(Rook, White), position.getBitboard(Rook, Black)] + let queens: array[PieceColor.White..PieceColor.Black, Bitboard] = [position.getBitboard(Queen, White), position.getBitboard(Queen, Black)] + for color in PieceColor.White..PieceColor.Black: + when defined(evalFiles): + # Rooks and queens on (semi-)open files + for file in 0..7: + let fileMask = getFileMask(file) + if (fileMask and (pawns[White] or pawns[Black])).countSquares() == 0: + # Open file (no pawns in the way) + for rook in rooks[color] and fileMask: + middleGameScores[color] += ROOK_OPEN_FILE_BONUS[0] + endGameScores[color] += ROOK_OPEN_FILE_BONUS[1] + for queen in queens[color] and fileMask: + middleGameScores[color] += QUEEN_OPEN_FILE_BONUS[0] + endGameScores[color] += QUEEN_OPEN_FILE_BONUS[1] + + if (fileMask and pawns[color]).countSquares() == 0 and (fileMask and pawns[color.opposite()]).countSquares() != 0: + # Semi-open file (no friendly pawns and at least one enemy pawn in the way) + for rook in rooks[color] and fileMask: + middleGameScores[color] += ROOK_SEMI_OPEN_FILE_BONUS[0] + endGameScores[color] += ROOK_SEMI_OPEN_FILE_BONUS[1] + for queen in queens[color] and fileMask: + middleGameScores[color] += QUEEN_SEMI_OPEN_FILE_BONUS[0] + endGameScores[color] += QUEEN_SEMI_OPEN_FILE_BONUS[1] #[ for color in PieceColor.White..PieceColor.Black: let @@ -364,27 +412,6 @@ proc evaluate*(position: Position, mode: static EvalMode, features: Features = F friendlyKnights {.used.} = position.getBitboard(Bishop, color) friendlyKing {.used.} = position.getBitboard(King, color) - when defined(evalFiles): - # Rooks and queens on (semi-)open files - for file in 0..7: - let fileMask = getFileMask(file) - if (fileMask and pawns).countSquares() == 0: - # Open file (no pawns in the way) - for rook in friendlyRooks and fileMask: - middleGameScores[color] += ROOK_OPEN_FILE_BONUS[0] - endGameScores[color] += ROOK_OPEN_FILE_BONUS[1] - for queen in friendlyQueens and fileMask: - middleGameScores[color] += QUEEN_OPEN_FILE_BONUS[0] - endGameScores[color] += QUEEN_OPEN_FILE_BONUS[1] - if (fileMask and friendlyPawns).countSquares() == 0 and (fileMask and enemyPawns).countSquares() == 1: - # Semi-open file (only one enemy pawn in the way) - for rook in friendlyRooks and fileMask: - middleGameScores[color] += ROOK_SEMI_OPEN_FILE_BONUS[0] - endGameScores[color] += ROOK_SEMI_OPEN_FILE_BONUS[1] - for queen in friendlyQueens and fileMask: - middleGameScores[color] += QUEEN_SEMI_OPEN_FILE_BONUS[0] - endGameScores[color] += QUEEN_SEMI_OPEN_FILE_BONUS[1] - when defined(evalBishops): # Bishop pair @@ -435,9 +462,9 @@ func featureCount*(self: Features): int {.exportpy.} = ## the evaluation # 2 piece square tables for each of the 6 - # pieces for 2 game phases for 2 colors - # plus tempo - return ((64 * 6) * 2) * 2 + 1 + # pieces for 2 game phases plus one extra + # for tempo + return 64 * 6 * 2 * 2 + 1 proc extract*(self: Features, fen: string): Tensor[float] = @@ -453,7 +480,8 @@ proc extract*(self: Features, fen: string): Tensor[float] = var idx = color.int * len(self.psqts[color]) * len(self.psqts[color][piece]) idx += piece.int * len(self.psqts[color][piece]) idx += square.int - result[0, idx] = self.psqts[color][piece][square] + result[0, idx] = self.psqts[color][piece][square].eg + result[0, idx + 1] = self.psqts[color][piece][square].mg result[0, ^1] = self.tempo diff --git a/Chess/nimfish/nimfishpkg/search.nim b/Chess/nimfish/nimfishpkg/search.nim index ef37240..4ba8f7a 100644 --- a/Chess/nimfish/nimfishpkg/search.nim +++ b/Chess/nimfish/nimfishpkg/search.nim @@ -650,6 +650,14 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV: # This move was too good for us, opponent will not search it break + else: + when defined(historyPenalty): + # History penalty: apply a penalty to moves that don't fail high. + # We only actually apply this penalty in the event of a beta cutoff, + # because it doesn't really make sense to look at moves we know are + # bad if there's other ones down the list that leads to a fail high + # (the earlier we can cause a beta cutoff the better!) + failedLow.add((move, score)) if score > alpha: alpha = score bestMove = move @@ -665,14 +673,6 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV: break self.pvMoves[ply][i + 1] = pv self.pvMoves[ply][0] = move - else: - when defined(historyPenalty): - # History penalty: apply a penalty to moves that fail to raise alpha - # because, let's be honest, they kinda suck. We only apply this penalty - # in the event of a beta cutoff, because it doesn't really make sense to - # look at moves we know are bad if there's other ones down the list that - # lead to a fail high (the earlier we can cause a beta cutoff the better!) - failedLow.add((move, score)) else: discard # Store the best move in the transposition table so we can find it later diff --git a/Chess/nimfish/nimfishpkg/tune.py b/Chess/nimfish/nimfishpkg/tune.py index 5d7c359..288d384 100644 --- a/Chess/nimfish/nimfishpkg/tune.py +++ b/Chess/nimfish/nimfishpkg/tune.py @@ -103,7 +103,7 @@ def main(batch_size: int, dataset_path: Path, epoch_size: int): BATCH_SIZE = 16384 DATASET_PATH = Path.cwd() / "nimfish" / "nimfishpkg" / "resources" / "lichess-big3-resolved.book" -EPOCH_SIZE = 15 +EPOCH_SIZE = 10 if __name__ == "__main__": diff --git a/Chess/nimfish/nimfishpkg/uci.nim b/Chess/nimfish/nimfishpkg/uci.nim index c05db20..264f815 100644 --- a/Chess/nimfish/nimfishpkg/uci.nim +++ b/Chess/nimfish/nimfishpkg/uci.nim @@ -432,7 +432,7 @@ proc startUCISession* = session.searchState[].stopPondering() else: when not defined(historyPenalty): - # Scale our history coefficients + # History aging for color in PieceColor.White..PieceColor.Black: for source in Square(0)..Square(63): for target in Square(0)..Square(63):