Update evaluation and tuner

2024-05-16 14:14:25 +02:00 · 2024-05-16 14:14:25 +02:00 · f53e149af0
parent 46be23dbb7
commit f53e149af0
5 changed files with 78 additions and 47 deletions
--- a/Chess/README.md
+++ b/Chess/README.md
@ -47,12 +47,14 @@ Estimated elo: ~2100
 - [X] History heuristic
    - [X] History gravity
    - [ ] History malus
+    - [ ] History aging
 - [X] Killer heuristic
 - [X] Null-window search
 - [ ] Capture history
 - [ ] Continuation history
 - [X] Parallel search (lazy SMP)
 - [X] Pondering
+- [ ] Counter moves


 ### Eval
@ -60,6 +62,7 @@ Estimated elo: ~2100
 - [X] Piece-square tables
 - [X] Tempo
 - [ ] King safety
+- [ ] Pawn shield
 - [ ] Mobility
 - [ ] Minor piece outpost
 - [ ] Bishop pair
--- a/Chess/nimfish/nimfishpkg/eval.nim
+++ b/Chess/nimfish/nimfishpkg/eval.nim
@ -32,7 +32,7 @@ type
        # (and maluses), as well as the piece values themselves. We
        # have one for each game phase (middle and end game) and one
        # for each piece (one per color)
-        psqts: array[PieceColor.White..PieceColor.Black, array[PieceKind.Bishop..PieceKind.Rook, array[Square(0)..Square(63), float]]]
+        psqts: array[PieceColor.White..PieceColor.Black, array[PieceKind.Bishop..PieceKind.Rook, array[Square(0)..Square(63), tuple[mg, eg: float]]]]
        # Tempo bonus for the side to move
        tempo: float

@ -199,21 +199,26 @@ const

    # First entry is the middle game score, second entry is the endgame score. Half stolen
    # and half eyeballed. Tune these!
-    QUEEN_SEMI_OPEN_FILE_BONUS: array[2, Score] = [-5, 0]
-    QUEEN_OPEN_FILE_BONUS: array[2, Score] = [10, 0]
-    ROOK_SEMI_OPEN_FILE_BONUS: array[2, Score] = [40, 0]
-    ROOK_OPEN_FILE_BONUS: array[2, Score] = [60, 0]
-    BISHOP_PAIR_BONUS: array[2, Score] = [35, 100]
+    QUEEN_SEMI_OPEN_FILE_BONUS {.used.}: array[2, Score] = [-5, 0]
+    QUEEN_OPEN_FILE_BONUS {.used.}: array[2, Score] = [10, 0]
+    ROOK_SEMI_OPEN_FILE_BONUS {.used.}: array[2, Score] = [40, 0]
+    ROOK_OPEN_FILE_BONUS {.used.}: array[2, Score] = [60, 0]
+    BISHOP_PAIR_BONUS {.used.}: array[2, Score] = [35, 100]

    # Mobility bonuses (TODO: Refactor this into mobility tables. Need a tuner for that)
-    MIDDLEGAME_MOBILITY_BONUS: array[PieceKind.Bishop..PieceKind.Rook, Score] = [4, 0, 3, 0, 4, 5]
-    ENDGAME_MOBILITY_BONUS: array[PieceKind.Bishop..PieceKind.Rook, Score] = [6, 0, 5, 0, 8, 8] 
+    MIDDLEGAME_MOBILITY_BONUS {.used.}: array[PieceKind.Bishop..PieceKind.Rook, Score] = [4, 0, 3, 0, 4, 5]
+    ENDGAME_MOBILITY_BONUS {.used.}: array[PieceKind.Bishop..PieceKind.Rook, Score] = [6, 0, 5, 0, 8, 8] 

    # TODO: Don't use these. Make them tapered. Also tune them
    # DOUBLED_PAWNS_MALUS: array[9, Score] = [0, -5, -10, -20, -30, -30, -30, -30, -30]
    # ISOLATED_PAWN_MALUS: array[9, Score] = [0, -10, -25, -50, -75, -75, -75, -75, -75]
    # STRONG_PAWN_BONUS: array[9, Score] = [0, 5, 10, 20, 30, 30, 30, 30, 30]
-    # PASSED_PAWN_BONUS: array[7, Score] = [0, 120, 80, 50, 30, 15, 15]
+    MIDDLEGAME_PASSED_PAWN_BONUS {.used.}: array[7, Score] = [0, 120, 80, 50, 30, 15, 15]
+    ENDGAME_PASSED_PAWN_BONUS {.used.}: array[7, Score] = [0, 250, 150, 50, 30, 15, 15]
+    MIDDLEGAME_ISOLATED_PAWN_MALUS {.used.}: array[9, Score] = [0, -10, -25, -50, -75, -75, -75, -75, -75]
+    PASSED_PAWN_BONUS = Score(30)
+    ENDGAME_ISOLATED_PAWN_MALUS {.used.}: array[9, Score] = [0, -10, -25, -50, -75, -75, -75, -75, -75] #\[0, -40, -55, -90, -125, -125, -125, -125, -125]
+

 var
    MIDDLEGAME_VALUE_TABLES: array[PieceColor.White..PieceColor.Black, array[PieceKind.Bishop..PieceKind.Rook, array[Square(0)..Square(63), Score]]]
@ -325,7 +330,7 @@ proc evaluate*(position: Position, mode: static EvalMode, features: Features = F
        nonSideToMove = sideToMove.opposite()
        middleGamePhase = position.getGamePhase()
        occupancy = position.getOccupancy()
-        pawns {.used.} = position.getBitboard(Pawn, White) or position.getBitboard(Pawn, Black)
+        pawns: array[PieceColor.White..PieceColor.Black, Bitboard] = [position.getBitboard(Pawn, White), position.getBitboard(Pawn, Black)]
        endGamePhase = 24 - middleGamePhase
    var
        middleGameScores: array[PieceColor.White..PieceColor.Black, Score] = [0, 0]
@ -338,8 +343,8 @@ proc evaluate*(position: Position, mode: static EvalMode, features: Features = F
            middleGameScores[piece.color] += MIDDLEGAME_VALUE_TABLES[piece.color][piece.kind][sq]
            endGameScores[piece.color] += ENDGAME_VALUE_TABLES[piece.color][piece.kind][sq]
        else:
-            features.psqts[piece.color][piece.kind][sq] = 1.0
-
+            features.psqts[piece.color][piece.kind][sq].mg = middleGamePhase / 24
+            features.psqts[piece.color][piece.kind][sq].mg = endGamePhase / 24
    # Final score computation
    let 
        middleGameScore = middleGameScores[sideToMove] - middleGameScores[nonSideToMove]
@ -353,6 +358,49 @@ proc evaluate*(position: Position, mode: static EvalMode, features: Features = F
    else:
        features.tempo = 1.0

+
+    # Pawn structure
+    
+    when defined(evalPawns):        
+        for color in PieceColor.White..PieceColor.Black:
+            for pawn in pawns[color]:
+                if (getPassedPawnMask(color, pawn) and pawns[color.opposite()]) == 0:
+                    middleGameScores[color] += PASSED_PAWN_BONUS
+                    endGameScores[color] += PASSED_PAWN_BONUS * 2
+            #[
+            # Isolated pawns
+            for file in 0..7:
+                if (pawns[color] and getIsolatedPawnMask(file)) == 0:
+                    let onFile = pawns[color] and getFileMask(file)
+                    middleGameScores[color] += MIDDLEGAME_ISOLATED_PAWN_MALUS[onFile.countSquares()]
+                    endGameScores[color] += ENDGAME_ISOLATED_PAWN_MALUS[onFile.countSquares()]
+            ]#
+    
+    when defined(evalFiles):
+        let rooks: array[PieceColor.White..PieceColor.Black, Bitboard] = [position.getBitboard(Rook, White), position.getBitboard(Rook, Black)]
+        let queens: array[PieceColor.White..PieceColor.Black, Bitboard] = [position.getBitboard(Queen, White), position.getBitboard(Queen, Black)]
+        for color in PieceColor.White..PieceColor.Black:
+            when defined(evalFiles):
+                # Rooks and queens on (semi-)open files
+                for file in 0..7:
+                    let fileMask = getFileMask(file)
+                    if (fileMask and (pawns[White] or pawns[Black])).countSquares() == 0:
+                        # Open file (no pawns in the way)
+                        for rook in rooks[color] and fileMask:
+                            middleGameScores[color] += ROOK_OPEN_FILE_BONUS[0]
+                            endGameScores[color] += ROOK_OPEN_FILE_BONUS[1]
+                        for queen in queens[color] and fileMask:
+                            middleGameScores[color] += QUEEN_OPEN_FILE_BONUS[0]
+                            endGameScores[color] += QUEEN_OPEN_FILE_BONUS[1]
+
+                    if (fileMask and pawns[color]).countSquares() == 0 and (fileMask and pawns[color.opposite()]).countSquares() != 0:
+                        # Semi-open file (no friendly pawns and at least one enemy pawn in the way)
+                        for rook in rooks[color] and fileMask:
+                            middleGameScores[color] += ROOK_SEMI_OPEN_FILE_BONUS[0]
+                            endGameScores[color] += ROOK_SEMI_OPEN_FILE_BONUS[1]
+                        for queen in queens[color] and fileMask:
+                            middleGameScores[color] += QUEEN_SEMI_OPEN_FILE_BONUS[0]
+                            endGameScores[color] += QUEEN_SEMI_OPEN_FILE_BONUS[1]
    #[
    for color in PieceColor.White..PieceColor.Black:
        let
@ -364,27 +412,6 @@ proc evaluate*(position: Position, mode: static EvalMode, features: Features = F
            friendlyKnights {.used.} = position.getBitboard(Bishop, color)
            friendlyKing {.used.} = position.getBitboard(King, color)

-        when defined(evalFiles):
-            # Rooks and queens on (semi-)open files
-            for file in 0..7:
-                let fileMask = getFileMask(file)
-                if (fileMask and pawns).countSquares() == 0:
-                    # Open file (no pawns in the way)
-                    for rook in friendlyRooks and fileMask:
-                        middleGameScores[color] += ROOK_OPEN_FILE_BONUS[0]
-                        endGameScores[color] += ROOK_OPEN_FILE_BONUS[1]
-                    for queen in friendlyQueens and fileMask:
-                        middleGameScores[color] += QUEEN_OPEN_FILE_BONUS[0]
-                        endGameScores[color] += QUEEN_OPEN_FILE_BONUS[1]
-                if (fileMask and friendlyPawns).countSquares() == 0 and (fileMask and enemyPawns).countSquares() == 1:
-                    # Semi-open file (only one enemy pawn in the way)
-                    for rook in friendlyRooks and fileMask:
-                        middleGameScores[color] += ROOK_SEMI_OPEN_FILE_BONUS[0]
-                        endGameScores[color] += ROOK_SEMI_OPEN_FILE_BONUS[1]
-                    for queen in friendlyQueens and fileMask:
-                        middleGameScores[color] += QUEEN_SEMI_OPEN_FILE_BONUS[0]
-                        endGameScores[color] += QUEEN_SEMI_OPEN_FILE_BONUS[1]
-
        when defined(evalBishops):
            # Bishop pair

@ -435,9 +462,9 @@ func featureCount*(self: Features): int {.exportpy.} =
    ## the evaluation
    
    # 2 piece square tables for each of the 6
-    # pieces for 2 game phases for 2 colors
-    # plus tempo
-    return ((64 * 6) * 2) * 2 + 1
+    # pieces for 2 game phases plus one extra
+    # for tempo
+    return 64 * 6 * 2 * 2 + 1


 proc extract*(self: Features, fen: string): Tensor[float] =
@ -453,7 +480,8 @@ proc extract*(self: Features, fen: string): Tensor[float] =
                var idx = color.int * len(self.psqts[color]) * len(self.psqts[color][piece])
                idx += piece.int * len(self.psqts[color][piece])
                idx += square.int
-                result[0, idx] = self.psqts[color][piece][square]
+                result[0, idx] = self.psqts[color][piece][square].eg
+                result[0, idx + 1] = self.psqts[color][piece][square].mg
    result[0, ^1] = self.tempo


--- a/Chess/nimfish/nimfishpkg/search.nim
+++ b/Chess/nimfish/nimfishpkg/search.nim
@ -650,6 +650,14 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:

            # This move was too good for us, opponent will not search it
            break
+        else:
+            when defined(historyPenalty):
+                # History penalty: apply a penalty to moves that don't fail high.
+                # We only actually apply this penalty in the event of a beta cutoff,
+                # because it doesn't really make sense to look at moves we know are
+                # bad if there's other ones down the list that leads to a fail high
+                # (the earlier we can cause a beta cutoff the better!)
+                failedLow.add((move, score))
        if score > alpha:
            alpha = score
            bestMove = move
@ -665,14 +673,6 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
                        break
                    self.pvMoves[ply][i + 1] = pv
                self.pvMoves[ply][0] = move
-        else:
-            when defined(historyPenalty):
-                # History penalty: apply a penalty to moves that fail to raise alpha
-                # because, let's be honest, they kinda suck. We only apply this penalty
-                # in the event of a beta cutoff, because it doesn't really make sense to
-                # look at moves we know are bad if there's other ones down the list that
-                # lead to a fail high (the earlier we can cause a beta cutoff the better!)
-                failedLow.add((move, score))
            else:
                discard
    # Store the best move in the transposition table so we can find it later
--- a/Chess/nimfish/nimfishpkg/tune.py
+++ b/Chess/nimfish/nimfishpkg/tune.py
@ -103,7 +103,7 @@ def main(batch_size: int, dataset_path: Path, epoch_size: int):

 BATCH_SIZE = 16384
 DATASET_PATH = Path.cwd() / "nimfish" / "nimfishpkg" / "resources" / "lichess-big3-resolved.book"
-EPOCH_SIZE = 15
+EPOCH_SIZE = 10


 if __name__ == "__main__":
--- a/Chess/nimfish/nimfishpkg/uci.nim
+++ b/Chess/nimfish/nimfishpkg/uci.nim
@ -432,7 +432,7 @@ proc startUCISession* =
                        session.searchState[].stopPondering()
                    else:
                        when not defined(historyPenalty):
-                            # Scale our history coefficients
+                            # History aging
                            for color in PieceColor.White..PieceColor.Black:
                                for source in Square(0)..Square(63):
                                    for target in Square(0)..Square(63):