Update evaluation and tuner

2024-05-16 14:14:25 +02:00 · 2024-05-16 14:14:25 +02:00 · f53e149af0
parent 46be23dbb7
commit f53e149af0
5 changed files with 78 additions and 47 deletions
--- a/Chess/README.md
+++ b/Chess/README.md
@ -47,12 +47,14 @@ Estimated elo: ~2100
 - [X] History heuristic
    - [X] History gravity
    - [ ] History malus
    - [ ] History aging
 - [X] Killer heuristic
 - [X] Null-window search
 - [ ] Capture history
 - [ ] Continuation history
 - [X] Parallel search (lazy SMP)
 - [X] Pondering
 - [ ] Counter moves
 ### Eval
@ -60,6 +62,7 @@ Estimated elo: ~2100
 - [X] Piece-square tables
 - [X] Tempo
 - [ ] King safety
 - [ ] Pawn shield
 - [ ] Mobility
 - [ ] Minor piece outpost
 - [ ] Bishop pair
--- a/Chess/nimfish/nimfishpkg/eval.nim
+++ b/Chess/nimfish/nimfishpkg/eval.nim
@ -32,7 +32,7 @@ type
        # (and maluses), as well as the piece values themselves. We
        # have one for each game phase (middle and end game) and one
        # for each piece (one per color)
-        psqts: array[PieceColor.White..PieceColor.Black, array[PieceKind.Bishop..PieceKind.Rook, array[Square(0)..Square(63), float]]]
+        psqts: array[PieceColor.White..PieceColor.Black, array[PieceKind.Bishop..PieceKind.Rook, array[Square(0)..Square(63), tuple[mg, eg: float]]]]
        # Tempo bonus for the side to move
        tempo: float
@ -199,21 +199,26 @@ const
    # First entry is the middle game score, second entry is the endgame score. Half stolen
    # and half eyeballed. Tune these!
-    QUEEN_SEMI_OPEN_FILE_BONUS: array[2, Score] = [-5, 0]
+    QUEEN_SEMI_OPEN_FILE_BONUS {.used.}: array[2, Score] = [-5, 0]
-    QUEEN_OPEN_FILE_BONUS: array[2, Score] = [10, 0]
+    QUEEN_OPEN_FILE_BONUS {.used.}: array[2, Score] = [10, 0]
-    ROOK_SEMI_OPEN_FILE_BONUS: array[2, Score] = [40, 0]
+    ROOK_SEMI_OPEN_FILE_BONUS {.used.}: array[2, Score] = [40, 0]
-    ROOK_OPEN_FILE_BONUS: array[2, Score] = [60, 0]
+    ROOK_OPEN_FILE_BONUS {.used.}: array[2, Score] = [60, 0]
-    BISHOP_PAIR_BONUS: array[2, Score] = [35, 100]
+    BISHOP_PAIR_BONUS {.used.}: array[2, Score] = [35, 100]
    # Mobility bonuses (TODO: Refactor this into mobility tables. Need a tuner for that)
-    MIDDLEGAME_MOBILITY_BONUS: array[PieceKind.Bishop..PieceKind.Rook, Score] = [4, 0, 3, 0, 4, 5]
+    MIDDLEGAME_MOBILITY_BONUS {.used.}: array[PieceKind.Bishop..PieceKind.Rook, Score] = [4, 0, 3, 0, 4, 5]
-    ENDGAME_MOBILITY_BONUS: array[PieceKind.Bishop..PieceKind.Rook, Score] = [6, 0, 5, 0, 8, 8] 
+    ENDGAME_MOBILITY_BONUS {.used.}: array[PieceKind.Bishop..PieceKind.Rook, Score] = [6, 0, 5, 0, 8, 8] 
    # TODO: Don't use these. Make them tapered. Also tune them
    # DOUBLED_PAWNS_MALUS: array[9, Score] = [0, -5, -10, -20, -30, -30, -30, -30, -30]
    # ISOLATED_PAWN_MALUS: array[9, Score] = [0, -10, -25, -50, -75, -75, -75, -75, -75]
    # STRONG_PAWN_BONUS: array[9, Score] = [0, 5, 10, 20, 30, 30, 30, 30, 30]
-    # PASSED_PAWN_BONUS: array[7, Score] = [0, 120, 80, 50, 30, 15, 15]
+    MIDDLEGAME_PASSED_PAWN_BONUS {.used.}: array[7, Score] = [0, 120, 80, 50, 30, 15, 15]
    ENDGAME_PASSED_PAWN_BONUS {.used.}: array[7, Score] = [0, 250, 150, 50, 30, 15, 15]
    MIDDLEGAME_ISOLATED_PAWN_MALUS {.used.}: array[9, Score] = [0, -10, -25, -50, -75, -75, -75, -75, -75]
    PASSED_PAWN_BONUS = Score(30)
    ENDGAME_ISOLATED_PAWN_MALUS {.used.}: array[9, Score] = [0, -10, -25, -50, -75, -75, -75, -75, -75] #\[0, -40, -55, -90, -125, -125, -125, -125, -125]
 var
    MIDDLEGAME_VALUE_TABLES: array[PieceColor.White..PieceColor.Black, array[PieceKind.Bishop..PieceKind.Rook, array[Square(0)..Square(63), Score]]]
@ -325,7 +330,7 @@ proc evaluate*(position: Position, mode: static EvalMode, features: Features = F
        nonSideToMove = sideToMove.opposite()
        middleGamePhase = position.getGamePhase()
        occupancy = position.getOccupancy()
-        pawns {.used.} = position.getBitboard(Pawn, White) or position.getBitboard(Pawn, Black)
+        pawns: array[PieceColor.White..PieceColor.Black, Bitboard] = [position.getBitboard(Pawn, White), position.getBitboard(Pawn, Black)]
        endGamePhase = 24 - middleGamePhase
    var
        middleGameScores: array[PieceColor.White..PieceColor.Black, Score] = [0, 0]
@ -338,8 +343,8 @@ proc evaluate*(position: Position, mode: static EvalMode, features: Features = F
            middleGameScores[piece.color] += MIDDLEGAME_VALUE_TABLES[piece.color][piece.kind][sq]
            endGameScores[piece.color] += ENDGAME_VALUE_TABLES[piece.color][piece.kind][sq]
        else:
-            features.psqts[piece.color][piece.kind][sq] = 1.0
+            features.psqts[piece.color][piece.kind][sq].mg = middleGamePhase / 24
-
+            features.psqts[piece.color][piece.kind][sq].mg = endGamePhase / 24
    # Final score computation
    let 
        middleGameScore = middleGameScores[sideToMove] - middleGameScores[nonSideToMove]
@ -353,6 +358,49 @@ proc evaluate*(position: Position, mode: static EvalMode, features: Features = F
    else:
        features.tempo = 1.0
    # Pawn structure
    when defined(evalPawns):        
        for color in PieceColor.White..PieceColor.Black:
            for pawn in pawns[color]:
                if (getPassedPawnMask(color, pawn) and pawns[color.opposite()]) == 0:
                    middleGameScores[color] += PASSED_PAWN_BONUS
                    endGameScores[color] += PASSED_PAWN_BONUS * 2
            #[
            # Isolated pawns
            for file in 0..7:
                if (pawns[color] and getIsolatedPawnMask(file)) == 0:
                    let onFile = pawns[color] and getFileMask(file)
                    middleGameScores[color] += MIDDLEGAME_ISOLATED_PAWN_MALUS[onFile.countSquares()]
                    endGameScores[color] += ENDGAME_ISOLATED_PAWN_MALUS[onFile.countSquares()]
            ]#
    when defined(evalFiles):
        let rooks: array[PieceColor.White..PieceColor.Black, Bitboard] = [position.getBitboard(Rook, White), position.getBitboard(Rook, Black)]
        let queens: array[PieceColor.White..PieceColor.Black, Bitboard] = [position.getBitboard(Queen, White), position.getBitboard(Queen, Black)]
        for color in PieceColor.White..PieceColor.Black:
            when defined(evalFiles):
                # Rooks and queens on (semi-)open files
                for file in 0..7:
                    let fileMask = getFileMask(file)
                    if (fileMask and (pawns[White] or pawns[Black])).countSquares() == 0:
                        # Open file (no pawns in the way)
                        for rook in rooks[color] and fileMask:
                            middleGameScores[color] += ROOK_OPEN_FILE_BONUS[0]
                            endGameScores[color] += ROOK_OPEN_FILE_BONUS[1]
                        for queen in queens[color] and fileMask:
                            middleGameScores[color] += QUEEN_OPEN_FILE_BONUS[0]
                            endGameScores[color] += QUEEN_OPEN_FILE_BONUS[1]
                    if (fileMask and pawns[color]).countSquares() == 0 and (fileMask and pawns[color.opposite()]).countSquares() != 0:
                        # Semi-open file (no friendly pawns and at least one enemy pawn in the way)
                        for rook in rooks[color] and fileMask:
                            middleGameScores[color] += ROOK_SEMI_OPEN_FILE_BONUS[0]
                            endGameScores[color] += ROOK_SEMI_OPEN_FILE_BONUS[1]
                        for queen in queens[color] and fileMask:
                            middleGameScores[color] += QUEEN_SEMI_OPEN_FILE_BONUS[0]
                            endGameScores[color] += QUEEN_SEMI_OPEN_FILE_BONUS[1]
    #[
    for color in PieceColor.White..PieceColor.Black:
        let
@ -364,27 +412,6 @@ proc evaluate*(position: Position, mode: static EvalMode, features: Features = F
            friendlyKnights {.used.} = position.getBitboard(Bishop, color)
            friendlyKing {.used.} = position.getBitboard(King, color)
        when defined(evalFiles):
            # Rooks and queens on (semi-)open files
            for file in 0..7:
                let fileMask = getFileMask(file)
                if (fileMask and pawns).countSquares() == 0:
                    # Open file (no pawns in the way)
                    for rook in friendlyRooks and fileMask:
                        middleGameScores[color] += ROOK_OPEN_FILE_BONUS[0]
                        endGameScores[color] += ROOK_OPEN_FILE_BONUS[1]
                    for queen in friendlyQueens and fileMask:
                        middleGameScores[color] += QUEEN_OPEN_FILE_BONUS[0]
                        endGameScores[color] += QUEEN_OPEN_FILE_BONUS[1]
                if (fileMask and friendlyPawns).countSquares() == 0 and (fileMask and enemyPawns).countSquares() == 1:
                    # Semi-open file (only one enemy pawn in the way)
                    for rook in friendlyRooks and fileMask:
                        middleGameScores[color] += ROOK_SEMI_OPEN_FILE_BONUS[0]
                        endGameScores[color] += ROOK_SEMI_OPEN_FILE_BONUS[1]
                    for queen in friendlyQueens and fileMask:
                        middleGameScores[color] += QUEEN_SEMI_OPEN_FILE_BONUS[0]
                        endGameScores[color] += QUEEN_SEMI_OPEN_FILE_BONUS[1]
        when defined(evalBishops):
            # Bishop pair
@ -435,9 +462,9 @@ func featureCount*(self: Features): int {.exportpy.} =
    ## the evaluation
    # 2 piece square tables for each of the 6
-    # pieces for 2 game phases for 2 colors
+    # pieces for 2 game phases plus one extra
-    # plus tempo
+    # for tempo
-    return ((64 * 6) * 2) * 2 + 1
+    return 64 * 6 * 2 * 2 + 1
 proc extract*(self: Features, fen: string): Tensor[float] =
@ -453,7 +480,8 @@ proc extract*(self: Features, fen: string): Tensor[float] =
                var idx = color.int * len(self.psqts[color]) * len(self.psqts[color][piece])
                idx += piece.int * len(self.psqts[color][piece])
                idx += square.int
-                result[0, idx] = self.psqts[color][piece][square]
+                result[0, idx] = self.psqts[color][piece][square].eg
                result[0, idx + 1] = self.psqts[color][piece][square].mg
    result[0, ^1] = self.tempo
--- a/Chess/nimfish/nimfishpkg/search.nim
+++ b/Chess/nimfish/nimfishpkg/search.nim
@ -650,6 +650,14 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
            # This move was too good for us, opponent will not search it
            break
        else:
            when defined(historyPenalty):
                # History penalty: apply a penalty to moves that don't fail high.
                # We only actually apply this penalty in the event of a beta cutoff,
                # because it doesn't really make sense to look at moves we know are
                # bad if there's other ones down the list that leads to a fail high
                # (the earlier we can cause a beta cutoff the better!)
                failedLow.add((move, score))
        if score > alpha:
            alpha = score
            bestMove = move
@ -665,14 +673,6 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
                        break
                    self.pvMoves[ply][i + 1] = pv
                self.pvMoves[ply][0] = move
        else:
            when defined(historyPenalty):
                # History penalty: apply a penalty to moves that fail to raise alpha
                # because, let's be honest, they kinda suck. We only apply this penalty
                # in the event of a beta cutoff, because it doesn't really make sense to
                # look at moves we know are bad if there's other ones down the list that
                # lead to a fail high (the earlier we can cause a beta cutoff the better!)
                failedLow.add((move, score))
            else:
                discard
    # Store the best move in the transposition table so we can find it later
--- a/Chess/nimfish/nimfishpkg/tune.py
+++ b/Chess/nimfish/nimfishpkg/tune.py
@ -103,7 +103,7 @@ def main(batch_size: int, dataset_path: Path, epoch_size: int):
 BATCH_SIZE = 16384
 DATASET_PATH = Path.cwd() / "nimfish" / "nimfishpkg" / "resources" / "lichess-big3-resolved.book"
-EPOCH_SIZE = 15
+EPOCH_SIZE = 10
 if __name__ == "__main__":
--- a/Chess/nimfish/nimfishpkg/uci.nim
+++ b/Chess/nimfish/nimfishpkg/uci.nim
@ -432,7 +432,7 @@ proc startUCISession* =
                        session.searchState[].stopPondering()
                    else:
                        when not defined(historyPenalty):
-                            # Scale our history coefficients
+                            # History aging
                            for color in PieceColor.White..PieceColor.Black:
                                for source in Square(0)..Square(63):
                                    for target in Square(0)..Square(63):