Implement QSEE pruning (gains 52.9 +/- 25.2). Initial work on history malus. Reset search state after ucinewgame + minor other fixes

2024-05-12 01:02:47 +02:00 · 2024-05-12 01:02:47 +02:00 · db76565aa8
parent ec309c9d4d
commit db76565aa8
5 changed files with 142 additions and 127 deletions
--- a/Chess/nim.cfg
+++ b/Chess/nim.cfg
@ -2,6 +2,6 @@
 -o:"bin/nimfish"
 -d:danger
 --passL:"-flto -lmimalloc"
--passC:"-flto -march=native -mtune=native"
+--passC:"-flto -march=native -mtune=native -Ofast"
 -d:useMalloc
 --mm:atomicArc
--- a/Chess/nimfish.nimble
+++ b/Chess/nimfish.nimble
@ -15,7 +15,7 @@ bin           = @["nimfish"]
 requires "nim >= 2.0.4"
 requires "jsony >= 1.1.5"
 requires "nint128 >= 0.3.3"
-requires "threading >= 0.2.0"
+

 task test, "Runs the test suite":
  exec "python tests/suite.py -d 6 -b -p -s"
--- a/Chess/nimfish/nimfishpkg/search.nim
+++ b/Chess/nimfish/nimfishpkg/search.nim
@ -63,10 +63,12 @@ const

    # Start pruning after LMP_DEPTH_MULTIPLIER * depth
    # moves have been analyzed
-    LMP_DEPTH_MULTIPLIER {.used.} = 10
+    LMP_DEPTH_MULTIPLIER {.used.} = 8
    # Only prune when depth is <= this
    # value
-    LMP_DEPTH_THRESHOLD {.used.} = 2
+    LMP_DEPTH_THRESHOLD {.used.} = 1
+
+    ASPIRATION_WINDOW_DEPTH_THRESHOLD = 5

    NUM_KILLERS* = 2
    MAX_DEPTH* = 255
@ -85,7 +87,7 @@ const
    KILLERS_OFFSET = 500_000
    HISTORY_OFFSET = 400_000
    QUIET_OFFSET = 300_000
-    BAD_SEE_OFFSET = 200_000
+    BAD_SEE_OFFSET = 100_000


 func computeLMRTable: array[MAX_DEPTH, array[218, int]] {.compileTime.} =
@ -187,75 +189,37 @@ proc getEstimatedMoveScore(self: SearchManager, move: Move, ply: int): int =

    if move.isTactical():
        let seeScore = self.board.position.see(move)
-        when not defined(SEE2):
-            # We want to prioritize good captures (see > 0), but if the capture
-            # is bad then at least we sort it with MVVLVA
-            if seeScore < 0 and (move.isCapture() or move.isEnPassant()):
-                # Implementation of MVVLVA: Most Valuable Victim Least Valuable Aggressor.
-                # We prioritize moves that capture the most valuable pieces, and as a
-                # second goal we want to use our least valuable pieces to do so (this
-                # is why we multiply the score of the captured piece by a constant, to give
-                # it priority)
-                let capturedScore = MVV_LVA_MULTIPLIER * self.board.position.getPieceScore(move.targetSquare)
-                result = capturedScore - self.board.position.getPieceScore(move.startSquare)
-            
-                # If the capture is also a promotion we want to give it an even bigger bonus
-                if move.isPromotion():
-                    var piece: Piece
-                    case move.getPromotionType():
-                        of PromoteToBishop:
-                            piece = Piece(kind: Bishop, color: sideToMove)
-                        of PromoteToKnight:
-                            piece = Piece(kind: Knight, color: sideToMove)
-                        of PromoteToRook:
-                            piece = Piece(kind: Rook, color: sideToMove)
-                        of PromoteToQueen:
-                            piece = Piece(kind: Queen, color: sideToMove)
-                        else:
-                            discard  # Unreachable
-                    result += PROMOTION_MULTIPLIER * self.board.position.getPieceScore(piece, move.targetSquare)
+        # We want to prioritize good captures (see > 0), but if the capture
+        # is bad then at least we sort it with MVVLVA
+        if seeScore < 0 and (move.isCapture() or move.isEnPassant()):
+            # Implementation of MVVLVA: Most Valuable Victim Least Valuable Aggressor.
+            # We prioritize moves that capture the most valuable pieces, and as a
+            # second goal we want to use our least valuable pieces to do so (this
+            # is why we multiply the score of the captured piece by a constant, to give
+            # it priority)
+            let capturedScore = MVV_LVA_MULTIPLIER * self.board.position.getPieceScore(move.targetSquare)
+            result = capturedScore - self.board.position.getPieceScore(move.startSquare)
+        
+            # If the capture is also a promotion we want to give it an even bigger bonus
+            if move.isPromotion():
+                var piece: Piece
+                case move.getPromotionType():
+                    of PromoteToBishop:
+                        piece = Piece(kind: Bishop, color: sideToMove)
+                    of PromoteToKnight:
+                        piece = Piece(kind: Knight, color: sideToMove)
+                    of PromoteToRook:
+                        piece = Piece(kind: Rook, color: sideToMove)
+                    of PromoteToQueen:
+                        piece = Piece(kind: Queen, color: sideToMove)
+                    else:
+                        discard  # Unreachable
+                result += PROMOTION_MULTIPLIER * self.board.position.getPieceScore(piece, move.targetSquare)

-                return result + BAD_SEE_OFFSET
-            else:
-                # If the capture is good then we just use the SEE score + the offset
-                return seeScore + GOOD_SEE_OFFSET
+            return result + BAD_SEE_OFFSET
        else:
-            # We want to prioritize good captures (see > 0) and then sort
-            # them with MVVLVA. Of course, good captures will be placed
-            # before bad captures regardless of what MVVLVA has to say
-            # about them
-            if move.isCapture() or move.isEnPassant():
-                # Implementation of MVVLVA: Most Valuable Victim Least Valuable Aggressor.
-                # We prioritize moves that capture the most valuable pieces, and as a
-                # second goal we want to use our least valuable pieces to do so (this
-                # is why we multiply the score of the captured piece by a constant, to give
-                # it priority)
-                let capturedScore = MVV_LVA_MULTIPLIER * self.board.position.getPieceScore(move.targetSquare)
-                result = capturedScore - self.board.position.getPieceScore(move.startSquare)
-            
-                # If the capture is also a promotion we want to give it an even bigger bonus
-                if move.isPromotion():
-                    var piece: Piece
-                    case move.getPromotionType():
-                        of PromoteToBishop:
-                            piece = Piece(kind: Bishop, color: sideToMove)
-                        of PromoteToKnight:
-                            piece = Piece(kind: Knight, color: sideToMove)
-                        of PromoteToRook:
-                            piece = Piece(kind: Rook, color: sideToMove)
-                        of PromoteToQueen:
-                            piece = Piece(kind: Queen, color: sideToMove)
-                        else:
-                            discard  # Unreachable
-                    result += PROMOTION_MULTIPLIER * self.board.position.getPieceScore(piece, move.targetSquare)
-            result += seeScore
-            # We use >= instead of > because we're evaluating promotions by
-            # their SEE scores as well, which would move them all to the back
-            # in cases where a promotion ends up with no material loss
-            if seeScore >= 0:
-                return result + GOOD_SEE_OFFSET
-            else:
-                return result + BAD_SEE_OFFSET
+            # If the capture is good then we just use the SEE score + the offset
+            return seeScore + GOOD_SEE_OFFSET

    if move.isQuiet():
        # History heuristic bonus
@ -263,8 +227,7 @@ proc getEstimatedMoveScore(self: SearchManager, move: Move, ply: int): int =
        if score != 0:
            return score + HISTORY_OFFSET
        # We use an explicit offset for quiet moves because
-        # we want to place bad captures (SEE < BAD_CAPTURE_THRESHOLD)
-        # behind them
+        # we want to place bad captures behind them
        return QUIET_OFFSET


@ -289,7 +252,11 @@ proc log(self: SearchManager, depth: int) =
        elapsedMsec = self.elapsedTime().uint64
        nps = 1000 * (self.nodeCount div max(elapsedMsec, 1))
    var logMsg = &"info depth {depth} seldepth {self.selectiveDepth} time {elapsedMsec} nodes {self.nodeCount} nps {nps}"
-    logMsg &= &" hashfull {self.transpositionTable[].getFillEstimate()} score cp {self.bestRootScore}"
+    logMsg &= &" hashfull {self.transpositionTable[].getFillEstimate()}"
+    if self.bestRootScore >= -(mateScore() + MAX_DEPTH):
+        logMsg &= &" score mate {((-mateScore() - self.bestRootScore + 1) div 2)}"
+    else:
+        logMsg &= &" score cp {self.bestRootScore}"
    if self.pvMoves[0][0] != nullMove():
        logMsg &= " pv "
        for move in self.pvMoves[0]:
@ -355,20 +322,6 @@ proc qsearch(self: var SearchManager, ply: int, alpha, beta: Score): Score =
    ## exist
    if self.shouldStop() or ply == MAX_DEPTH or self.board.isDrawn():
        return Score(0)
-    when defined(QTT):
-        # The gains from this are dubious at best, so it's disabled for now
-        if ply > 0:
-            let query = self.transpositionTable[].get(self.board.position.zobristKey)
-            if query.success:
-                case query.entry.flag:
-                    of Exact:
-                        return query.entry.score
-                    of LowerBound:
-                        if query.entry.score >= beta:
-                            return query.entry.score
-                    of UpperBound:
-                        if query.entry.score <= alpha:
-                            return query.entry.score
    let score = self.board.position.evaluate()
    if score >= beta:
        # Stand-pat evaluation
@ -440,24 +393,27 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
        # Probe the transposition table to see if we can cause an early cutoff
        let query = self.transpositionTable[].get(self.board.position.zobristKey, depth.uint8)
        if query.success:
+            var score = query.entry.score
+            if abs(score) >= -(mateScore() + MAX_DEPTH):
+                score -= int16(score.int.sgn() * ply)
            case query.entry.flag:
                of Exact:
-                    return query.entry.score
+                    return score
                of LowerBound:
-                    if query.entry.score >= beta:
-                        return query.entry.score
+                    if score >= beta:
+                        return score
                of UpperBound:
-                    if query.entry.score <= alpha:
-                        return query.entry.score
+                    if score <= alpha:
+                        return score
    if not isPV and not self.board.inCheck() and depth <= RFP_DEPTH_LIMIT and staticEval - RFP_EVAL_THRESHOLD * depth >= beta:
-        ## Reverse futility pruning: if the side to move has a significant advantage
-        ## in the current position and is not in check, return the position's static
-        ## evaluation to encourage the engine to deal with any potential threats from
-        ## the opponent. Since this optimization technique is not sound, we limit the
-        ## depth at which it can trigger for safety purposes (it is also the reason
-        ## why the "advantage" threshold scales with depth: the deeper we go, the more
-        ## careful we want to be with our estimate for how much of an advantage we may
-        ## or may not have)
+        # Reverse futility pruning: if the side to move has a significant advantage
+        # in the current position and is not in check, return the position's static
+        # evaluation to encourage the engine to deal with any potential threats from
+        # the opponent. Since this optimization technique is not sound, we limit the
+        # depth at which it can trigger for safety purposes (it is also the reason
+        # why the "advantage" threshold scales with depth: the deeper we go, the more
+        # careful we want to be with our estimate for how much of an advantage we may
+        # or may not have)
        return staticEval
    if not isPV and depth > NMP_DEPTH_THRESHOLD and self.board.canNullMove() and staticEval >= beta:
        # Null move pruning: it is reasonable to assume that
@ -510,8 +466,10 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
            return mateScore() + Score(ply)
        # Stalemate
        return Score(0)
-    var bestScore = lowestEval()
-    var alpha = alpha
+    var 
+        bestScore = lowestEval()
+        alpha = alpha
+        failedLow: seq[tuple[move: Move, score: Score]] = @[]
    let 
        sideToMove = self.board.position.sideToMove
    for i, move in moves:
@ -519,24 +477,22 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
        let isNotMated {.used.} = bestScore > mateScore() + MAX_DEPTH
        if ply == 0 and self.searchMoves.len() > 0 and move notin self.searchMoves:
            continue
-        self.board.doMove(move)
        when defined(FP):
-            if not isPV and move.isQuiet() and depth <= FP_DEPTH_LIMIT and staticEval + FP_EVAL_MARGIN * depth < alpha and isNotMated and not self.board.inCheck():
+            if not isPV and move.isQuiet() and depth <= FP_DEPTH_LIMIT and staticEval + FP_EVAL_MARGIN * depth < alpha and isNotMated:
                # Futility pruning: If a move cannot meaningfully improve alpha, prune it from the
                # tree. Much like RFP, this is an unsound optimization (and a riskier one at that,
                # apparently), so our depth limit and evaluation margins are very conservative
                # compared to RFP. Also, we need to make sure the best score is not a mate score, or
                # we'd risk pruning moves that evade checkmate
-                self.board.unmakeMove()
                break
        when defined(LMP):
-            if not isPV and move.isQuiet() and depth <= LMP_DEPTH_THRESHOLD and i >= LMP_DEPTH_MULTIPLIER * depth and isNotMated and not self.board.inCheck():
+            if not isPV and not move.isTactical() and depth <= LMP_DEPTH_THRESHOLD and i >= LMP_DEPTH_MULTIPLIER * depth and isNotMated:
                # Late move pruning: prune quiets when we've analyzed enough moves. This optimization is also
-                # unsound and so it's constrained at fairly low depths. We also want to prune less moves the deeper
+                # unsound and so it's constrained to fairly low depths. We also want to prune less moves the deeper
                # we go (hence the multiplier). Since the optimization is unsound, we also want to make sure we don't
                # accidentally miss a move that staves off checkmate
-                self.board.unmakeMove()
                break
+        self.board.doMove(move)
        let 
            extension = self.getSearchExtension(move)
            reduction = self.getReduction(move, depth, ply, i, isPV)
@ -586,10 +542,18 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
                # quadratic bonus wrt. depth is usually the value that is used (though some
                # engines, namely Stockfish, use a linear bonus. Maybe we can investigate this)
                self.storeHistoryScore(sideToMove, move, score, depth * depth)
+                when defined(historyPenalty):
+                    if bestMove != nullMove():
+                        # Punish bad quiets
+                        for (badMove, badScore) in failedLow:
+                            self.storeHistoryScore(sideToMove, badMove, badScore, -(depth * depth))
+
                # Killer move heuristic: store quiets that caused a beta cutoff according to the distance from
                # root that they occurred at, as they might be good refutations for future moves from the opponent.
                # Elo gains: 33.5 +/- 19.3
                self.storeKillerMove(ply, move)
+            
+
            # This move was too good for us, opponent will not search it
            break
        if score > alpha:
@ -607,9 +571,25 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
                        break
                    self.pvMoves[ply][i + 1] = pv
                self.pvMoves[ply][0] = move
+        else:
+            when defined(historyPenalty):
+                # History penalty: apply a penalty to moves that fail to raise alpha
+                # because, let's be honest, they kinda suck. We only apply this penalty
+                # in the event of a beta cutoff, because it doesn't really make sense to
+                # look at moves we know are bad if there's other ones down the list that
+                # lead to a fail high
+                failedLow.add((move, score))
+            else:
+                discard
    # Store the best move in the transposition table so we can find it later
    let nodeType = if bestScore >= beta: LowerBound elif bestScore <= alpha: UpperBound else: Exact
-    self.transpositionTable[].store(depth.uint8, bestScore, self.board.position.zobristKey, bestMove, nodeType)
+    var ttScore = bestScore
+    # We do this because we want to make sure that when we do a TT cutoff and it's
+    # a mate score, we pick the shortest possible mate line if we're mating and the
+    # longest possible one if we're being mated. We revert this when probing the TT
+    if abs(ttScore) >= -(mateScore() + MAX_DEPTH):
+        ttScore += Score(ttScore.int.sgn()) * Score(ply)
+    self.transpositionTable[].store(depth.uint8, ttScore, self.board.position.zobristKey, bestMove, nodeType)

    return bestScore

@ -678,9 +658,9 @@ proc findBestMove*(self: var SearchManager, timeRemaining, increment: int64, max
        maxDepth = 30
    self.searchFlag[].store(true)
    # Iterative deepening loop
-    var score {.used.} = Score(0)
+    var score = Score(0)
    for depth in 1..min(MAX_DEPTH, maxDepth):
-        if depth < 5 or not defined(aspirationWindow):
+        if depth < ASPIRATION_WINDOW_DEPTH_THRESHOLD or not defined(aspirationWindow):
            score = self.search(depth, 0, lowestEval(), highestEval(), true)
        else:
            score = self.aspirationWindow(score, depth)
--- a/Chess/nimfish/nimfishpkg/transpositions.nim
+++ b/Chess/nimfish/nimfishpkg/transpositions.nim
@ -80,6 +80,20 @@ proc newTranspositionTable*(size: uint64): TTable =
    result.size = numEntries


+func clear*(self: var TTable) =
+    ## Clears the transposition table
+    ## without releasing the memory
+    ## associated with it
+    self.data.setLen(0)
+
+
+func destroy*(self: var TTable) =
+    ## Permanently and irreversibly
+    ## destroys the transposition table
+    self.data = @[]
+    self.size = 0
+
+
 func getIndex(self: TTable, key: ZobristKey): uint64 = 
    ## Retrieves the index of the given
    ## zobrist key in our transposition table
--- a/Chess/nimfish/nimfishpkg/uci.nim
+++ b/Chess/nimfish/nimfishpkg/uci.nim
@ -21,6 +21,7 @@ import std/atomics
 import board
 import movegen
 import search
+import eval
 import transpositions


@ -346,16 +347,13 @@ proc startUCISession* =
        session = UCISession(hashTableSize: 64, position: startpos())
    # God forbid we try to use atomic ARC like it was intended. Raw pointers
    # it is then... sigh
-    session.transpositionTable = cast[ptr TTable](alloc0(sizeof(TTable)))
    session.stopFlag = cast[ptr Atomic[bool]](alloc0(sizeof(Atomic[bool])))
    session.searchFlag = cast[ptr Atomic[bool]](alloc0(sizeof(Atomic[bool])))
-    session.transpositionTable[] = newTranspositionTable(session.hashTableSize * 1024 * 1024)
+    session.transpositionTable = cast[ptr TTable](alloc0(sizeof(TTable)))
    session.historyTable = cast[ptr HistoryTable](alloc0(sizeof(HistoryTable)))
    session.killerMoves = cast[ptr KillersTable](alloc0(sizeof(KillersTable)))
-    # Initialize killer move array
-    for i in 0..<MAX_DEPTH:
-        for j in 0..<NUM_KILLERS:
-            session.killerMoves[i][j] = nullMove()
+    session.position = startpos()
+    session.history = @[]
    # Fun fact, nim doesn't collect the memory of thread vars. Another stupid fucking design pitfall
    # of nim's AWESOME threading model. Someone is getting a pipebomb in their mailbox about this, mark
    # my fucking words. (for legal purposes THAT IS A JOKE). See https://github.com/nim-lang/Nim/issues/23165
@ -386,14 +384,31 @@ proc startUCISession* =
                of Debug:
                    session.debug = cmd.on
                of NewGame:
-                    session.position = startpos()
-                    session.history = @[]
-                of Go:
-                    # Scale our history coefficients
+                    if session.transpositionTable[].size() == 0:
+                        if session.debug:
+                            echo &"info string allocating new TT of size {session.hashTableSize} MiB"
+                        session.transpositionTable[] = newTranspositionTable(session.hashTableSize * 1024 * 1024)
+                    else:
+                        if session.debug:
+                            echo &"info string clearing out TT of size {session.hashTableSize} MiB"
+                        session.transpositionTable[].clear()
+                    # (Re-)Initialize history table
+                    # array[PieceColor.White..PieceColor.Black, array[Square(0)..Square(63), array[Square(0)..Square(63), Score]]]
                    for color in PieceColor.White..PieceColor.Black:
-                        for source in Square(0)..Square(63):
-                            for target in Square(0)..Square(63):
-                                session.historyTable[][color][source][target] = session.historyTable[][color][source][target] div 2
+                        for i in Square(0)..Square(63):
+                            for j in Square(0)..Square(63):
+                                session.historyTable[color][i][j] = Score(0)
+                    # (Re-)Initialize killer move table
+                    for i in 0..<MAX_DEPTH:
+                        for j in 0..<NUM_KILLERS:
+                            session.killerMoves[i][j] = nullMove()
+                of Go:
+                    when not defined(historyPenalty):
+                        # Scale our history coefficients
+                        for color in PieceColor.White..PieceColor.Black:
+                            for source in Square(0)..Square(63):
+                                for target in Square(0)..Square(63):
+                                    session.historyTable[color][source][target] = session.historyTable[][color][source][target] div 2
                    if searchThread.running:
                        joinThread(searchThread)
                    createThread(searchThread, bestMove, (session, cmd))
@ -412,13 +427,19 @@ proc startUCISession* =
                        continue
                    case cmd.name:
                        of "Hash":
+                            if session.transpositionTable[].size() > 0:
+                                if session.debug:
+                                    echo &"info string destroying old TT of size {session.hashTableSize} MiB"
+                                session.transpositionTable[].destroy()
                            session.hashTableSize = cmd.value.parseBiggestUInt()
                            if session.debug:
                                echo &"info string set TT hash table size to {session.hashTableSize} MiB"
-                                session.transpositionTable[] = newTranspositionTable(session.hashTableSize * 1024 * 1024)
                        else:
                            discard
                of Position:
+                    # Due to the way the whole thing is designed, the
+                    # position is actually set when the command is parsed
+                    # rather than when it is processed here
                    discard
                else:
                    discard