From db76565aa84e55f26795b64785a3c6b102926fd1 Mon Sep 17 00:00:00 2001
From: Mattia Giambirtone <nocturn9x@nocturn9x.space>
Date: Sun, 12 May 2024 01:02:47 +0200
Subject: [PATCH] Implement QSEE pruning (gains 52.9 +/- 25.2). Initial work on
 history malus. Reset search state after ucinewgame + minor other fixes

---
 Chess/nim.cfg                               |   2 +-
 Chess/nimfish.nimble                        |   2 +-
 Chess/nimfish/nimfishpkg/search.nim         | 202 +++++++++-----------
 Chess/nimfish/nimfishpkg/transpositions.nim |  14 ++
 Chess/nimfish/nimfishpkg/uci.nim            |  49 +++--
 5 files changed, 142 insertions(+), 127 deletions(-)

diff --git a/Chess/nim.cfg b/Chess/nim.cfg
index 59fc441..d3feca2 100644
--- a/Chess/nim.cfg
+++ b/Chess/nim.cfg
@@ -2,6 +2,6 @@
 -o:"bin/nimfish"
 -d:danger
 --passL:"-flto -lmimalloc"
---passC:"-flto -march=native -mtune=native"
+--passC:"-flto -march=native -mtune=native -Ofast"
 -d:useMalloc
 --mm:atomicArc
diff --git a/Chess/nimfish.nimble b/Chess/nimfish.nimble
index 7c12837..c38d045 100644
--- a/Chess/nimfish.nimble
+++ b/Chess/nimfish.nimble
@@ -15,7 +15,7 @@ bin           = @["nimfish"]
 requires "nim >= 2.0.4"
 requires "jsony >= 1.1.5"
 requires "nint128 >= 0.3.3"
-requires "threading >= 0.2.0"
+
 
 task test, "Runs the test suite":
   exec "python tests/suite.py -d 6 -b -p -s"
diff --git a/Chess/nimfish/nimfishpkg/search.nim b/Chess/nimfish/nimfishpkg/search.nim
index 7a84765..0c60ca2 100644
--- a/Chess/nimfish/nimfishpkg/search.nim
+++ b/Chess/nimfish/nimfishpkg/search.nim
@@ -63,10 +63,12 @@ const
 
     # Start pruning after LMP_DEPTH_MULTIPLIER * depth
     # moves have been analyzed
-    LMP_DEPTH_MULTIPLIER {.used.} = 10
+    LMP_DEPTH_MULTIPLIER {.used.} = 8
     # Only prune when depth is <= this
     # value
-    LMP_DEPTH_THRESHOLD {.used.} = 2
+    LMP_DEPTH_THRESHOLD {.used.} = 1
+
+    ASPIRATION_WINDOW_DEPTH_THRESHOLD = 5
 
     NUM_KILLERS* = 2
     MAX_DEPTH* = 255
@@ -85,7 +87,7 @@ const
     KILLERS_OFFSET = 500_000
     HISTORY_OFFSET = 400_000
     QUIET_OFFSET = 300_000
-    BAD_SEE_OFFSET = 200_000
+    BAD_SEE_OFFSET = 100_000
 
 
 func computeLMRTable: array[MAX_DEPTH, array[218, int]] {.compileTime.} =
@@ -187,75 +189,37 @@ proc getEstimatedMoveScore(self: SearchManager, move: Move, ply: int): int =
 
     if move.isTactical():
         let seeScore = self.board.position.see(move)
-        when not defined(SEE2):
-            # We want to prioritize good captures (see > 0), but if the capture
-            # is bad then at least we sort it with MVVLVA
-            if seeScore < 0 and (move.isCapture() or move.isEnPassant()):
-                # Implementation of MVVLVA: Most Valuable Victim Least Valuable Aggressor.
-                # We prioritize moves that capture the most valuable pieces, and as a
-                # second goal we want to use our least valuable pieces to do so (this
-                # is why we multiply the score of the captured piece by a constant, to give
-                # it priority)
-                let capturedScore = MVV_LVA_MULTIPLIER * self.board.position.getPieceScore(move.targetSquare)
-                result = capturedScore - self.board.position.getPieceScore(move.startSquare)
-            
-                # If the capture is also a promotion we want to give it an even bigger bonus
-                if move.isPromotion():
-                    var piece: Piece
-                    case move.getPromotionType():
-                        of PromoteToBishop:
-                            piece = Piece(kind: Bishop, color: sideToMove)
-                        of PromoteToKnight:
-                            piece = Piece(kind: Knight, color: sideToMove)
-                        of PromoteToRook:
-                            piece = Piece(kind: Rook, color: sideToMove)
-                        of PromoteToQueen:
-                            piece = Piece(kind: Queen, color: sideToMove)
-                        else:
-                            discard  # Unreachable
-                    result += PROMOTION_MULTIPLIER * self.board.position.getPieceScore(piece, move.targetSquare)
+        # We want to prioritize good captures (see > 0), but if the capture
+        # is bad then at least we sort it with MVVLVA
+        if seeScore < 0 and (move.isCapture() or move.isEnPassant()):
+            # Implementation of MVVLVA: Most Valuable Victim Least Valuable Aggressor.
+            # We prioritize moves that capture the most valuable pieces, and as a
+            # second goal we want to use our least valuable pieces to do so (this
+            # is why we multiply the score of the captured piece by a constant, to give
+            # it priority)
+            let capturedScore = MVV_LVA_MULTIPLIER * self.board.position.getPieceScore(move.targetSquare)
+            result = capturedScore - self.board.position.getPieceScore(move.startSquare)
+        
+            # If the capture is also a promotion we want to give it an even bigger bonus
+            if move.isPromotion():
+                var piece: Piece
+                case move.getPromotionType():
+                    of PromoteToBishop:
+                        piece = Piece(kind: Bishop, color: sideToMove)
+                    of PromoteToKnight:
+                        piece = Piece(kind: Knight, color: sideToMove)
+                    of PromoteToRook:
+                        piece = Piece(kind: Rook, color: sideToMove)
+                    of PromoteToQueen:
+                        piece = Piece(kind: Queen, color: sideToMove)
+                    else:
+                        discard  # Unreachable
+                result += PROMOTION_MULTIPLIER * self.board.position.getPieceScore(piece, move.targetSquare)
 
-                return result + BAD_SEE_OFFSET
-            else:
-                # If the capture is good then we just use the SEE score + the offset
-                return seeScore + GOOD_SEE_OFFSET
+            return result + BAD_SEE_OFFSET
         else:
-            # We want to prioritize good captures (see > 0) and then sort
-            # them with MVVLVA. Of course, good captures will be placed
-            # before bad captures regardless of what MVVLVA has to say
-            # about them
-            if move.isCapture() or move.isEnPassant():
-                # Implementation of MVVLVA: Most Valuable Victim Least Valuable Aggressor.
-                # We prioritize moves that capture the most valuable pieces, and as a
-                # second goal we want to use our least valuable pieces to do so (this
-                # is why we multiply the score of the captured piece by a constant, to give
-                # it priority)
-                let capturedScore = MVV_LVA_MULTIPLIER * self.board.position.getPieceScore(move.targetSquare)
-                result = capturedScore - self.board.position.getPieceScore(move.startSquare)
-            
-                # If the capture is also a promotion we want to give it an even bigger bonus
-                if move.isPromotion():
-                    var piece: Piece
-                    case move.getPromotionType():
-                        of PromoteToBishop:
-                            piece = Piece(kind: Bishop, color: sideToMove)
-                        of PromoteToKnight:
-                            piece = Piece(kind: Knight, color: sideToMove)
-                        of PromoteToRook:
-                            piece = Piece(kind: Rook, color: sideToMove)
-                        of PromoteToQueen:
-                            piece = Piece(kind: Queen, color: sideToMove)
-                        else:
-                            discard  # Unreachable
-                    result += PROMOTION_MULTIPLIER * self.board.position.getPieceScore(piece, move.targetSquare)
-            result += seeScore
-            # We use >= instead of > because we're evaluating promotions by
-            # their SEE scores as well, which would move them all to the back
-            # in cases where a promotion ends up with no material loss
-            if seeScore >= 0:
-                return result + GOOD_SEE_OFFSET
-            else:
-                return result + BAD_SEE_OFFSET
+            # If the capture is good then we just use the SEE score + the offset
+            return seeScore + GOOD_SEE_OFFSET
 
     if move.isQuiet():
         # History heuristic bonus
@@ -263,8 +227,7 @@ proc getEstimatedMoveScore(self: SearchManager, move: Move, ply: int): int =
         if score != 0:
             return score + HISTORY_OFFSET
         # We use an explicit offset for quiet moves because
-        # we want to place bad captures (SEE < BAD_CAPTURE_THRESHOLD)
-        # behind them
+        # we want to place bad captures behind them
         return QUIET_OFFSET
 
 
@@ -289,7 +252,11 @@ proc log(self: SearchManager, depth: int) =
         elapsedMsec = self.elapsedTime().uint64
         nps = 1000 * (self.nodeCount div max(elapsedMsec, 1))
     var logMsg = &"info depth {depth} seldepth {self.selectiveDepth} time {elapsedMsec} nodes {self.nodeCount} nps {nps}"
-    logMsg &= &" hashfull {self.transpositionTable[].getFillEstimate()} score cp {self.bestRootScore}"
+    logMsg &= &" hashfull {self.transpositionTable[].getFillEstimate()}"
+    if self.bestRootScore >= -(mateScore() + MAX_DEPTH):
+        logMsg &= &" score mate {((-mateScore() - self.bestRootScore + 1) div 2)}"
+    else:
+        logMsg &= &" score cp {self.bestRootScore}"
     if self.pvMoves[0][0] != nullMove():
         logMsg &= " pv "
         for move in self.pvMoves[0]:
@@ -355,20 +322,6 @@ proc qsearch(self: var SearchManager, ply: int, alpha, beta: Score): Score =
     ## exist
     if self.shouldStop() or ply == MAX_DEPTH or self.board.isDrawn():
         return Score(0)
-    when defined(QTT):
-        # The gains from this are dubious at best, so it's disabled for now
-        if ply > 0:
-            let query = self.transpositionTable[].get(self.board.position.zobristKey)
-            if query.success:
-                case query.entry.flag:
-                    of Exact:
-                        return query.entry.score
-                    of LowerBound:
-                        if query.entry.score >= beta:
-                            return query.entry.score
-                    of UpperBound:
-                        if query.entry.score <= alpha:
-                            return query.entry.score
     let score = self.board.position.evaluate()
     if score >= beta:
         # Stand-pat evaluation
@@ -440,24 +393,27 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
         # Probe the transposition table to see if we can cause an early cutoff
         let query = self.transpositionTable[].get(self.board.position.zobristKey, depth.uint8)
         if query.success:
+            var score = query.entry.score
+            if abs(score) >= -(mateScore() + MAX_DEPTH):
+                score -= int16(score.int.sgn() * ply)
             case query.entry.flag:
                 of Exact:
-                    return query.entry.score
+                    return score
                 of LowerBound:
-                    if query.entry.score >= beta:
-                        return query.entry.score
+                    if score >= beta:
+                        return score
                 of UpperBound:
-                    if query.entry.score <= alpha:
-                        return query.entry.score
+                    if score <= alpha:
+                        return score
     if not isPV and not self.board.inCheck() and depth <= RFP_DEPTH_LIMIT and staticEval - RFP_EVAL_THRESHOLD * depth >= beta:
-        ## Reverse futility pruning: if the side to move has a significant advantage
-        ## in the current position and is not in check, return the position's static
-        ## evaluation to encourage the engine to deal with any potential threats from
-        ## the opponent. Since this optimization technique is not sound, we limit the
-        ## depth at which it can trigger for safety purposes (it is also the reason
-        ## why the "advantage" threshold scales with depth: the deeper we go, the more
-        ## careful we want to be with our estimate for how much of an advantage we may
-        ## or may not have)
+        # Reverse futility pruning: if the side to move has a significant advantage
+        # in the current position and is not in check, return the position's static
+        # evaluation to encourage the engine to deal with any potential threats from
+        # the opponent. Since this optimization technique is not sound, we limit the
+        # depth at which it can trigger for safety purposes (it is also the reason
+        # why the "advantage" threshold scales with depth: the deeper we go, the more
+        # careful we want to be with our estimate for how much of an advantage we may
+        # or may not have)
         return staticEval
     if not isPV and depth > NMP_DEPTH_THRESHOLD and self.board.canNullMove() and staticEval >= beta:
         # Null move pruning: it is reasonable to assume that
@@ -510,8 +466,10 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
             return mateScore() + Score(ply)
         # Stalemate
         return Score(0)
-    var bestScore = lowestEval()
-    var alpha = alpha
+    var 
+        bestScore = lowestEval()
+        alpha = alpha
+        failedLow: seq[tuple[move: Move, score: Score]] = @[]
     let 
         sideToMove = self.board.position.sideToMove
     for i, move in moves:
@@ -519,24 +477,22 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
         let isNotMated {.used.} = bestScore > mateScore() + MAX_DEPTH
         if ply == 0 and self.searchMoves.len() > 0 and move notin self.searchMoves:
             continue
-        self.board.doMove(move)
         when defined(FP):
-            if not isPV and move.isQuiet() and depth <= FP_DEPTH_LIMIT and staticEval + FP_EVAL_MARGIN * depth < alpha and isNotMated and not self.board.inCheck():
+            if not isPV and move.isQuiet() and depth <= FP_DEPTH_LIMIT and staticEval + FP_EVAL_MARGIN * depth < alpha and isNotMated:
                 # Futility pruning: If a move cannot meaningfully improve alpha, prune it from the
                 # tree. Much like RFP, this is an unsound optimization (and a riskier one at that,
                 # apparently), so our depth limit and evaluation margins are very conservative
                 # compared to RFP. Also, we need to make sure the best score is not a mate score, or
                 # we'd risk pruning moves that evade checkmate
-                self.board.unmakeMove()
                 break
         when defined(LMP):
-            if not isPV and move.isQuiet() and depth <= LMP_DEPTH_THRESHOLD and i >= LMP_DEPTH_MULTIPLIER * depth and isNotMated and not self.board.inCheck():
+            if not isPV and not move.isTactical() and depth <= LMP_DEPTH_THRESHOLD and i >= LMP_DEPTH_MULTIPLIER * depth and isNotMated:
                 # Late move pruning: prune quiets when we've analyzed enough moves. This optimization is also
-                # unsound and so it's constrained at fairly low depths. We also want to prune less moves the deeper
+                # unsound and so it's constrained to fairly low depths. We also want to prune less moves the deeper
                 # we go (hence the multiplier). Since the optimization is unsound, we also want to make sure we don't
                 # accidentally miss a move that staves off checkmate
-                self.board.unmakeMove()
                 break
+        self.board.doMove(move)
         let 
             extension = self.getSearchExtension(move)
             reduction = self.getReduction(move, depth, ply, i, isPV)
@@ -586,10 +542,18 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
                 # quadratic bonus wrt. depth is usually the value that is used (though some
                 # engines, namely Stockfish, use a linear bonus. Maybe we can investigate this)
                 self.storeHistoryScore(sideToMove, move, score, depth * depth)
+                when defined(historyPenalty):
+                    if bestMove != nullMove():
+                        # Punish bad quiets
+                        for (badMove, badScore) in failedLow:
+                            self.storeHistoryScore(sideToMove, badMove, badScore, -(depth * depth))
+
                 # Killer move heuristic: store quiets that caused a beta cutoff according to the distance from
                 # root that they occurred at, as they might be good refutations for future moves from the opponent.
                 # Elo gains: 33.5 +/- 19.3
                 self.storeKillerMove(ply, move)
+            
+
             # This move was too good for us, opponent will not search it
             break
         if score > alpha:
@@ -607,9 +571,25 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
                         break
                     self.pvMoves[ply][i + 1] = pv
                 self.pvMoves[ply][0] = move
+        else:
+            when defined(historyPenalty):
+                # History penalty: apply a penalty to moves that fail to raise alpha
+                # because, let's be honest, they kinda suck. We only apply this penalty
+                # in the event of a beta cutoff, because it doesn't really make sense to
+                # look at moves we know are bad if there's other ones down the list that
+                # lead to a fail high
+                failedLow.add((move, score))
+            else:
+                discard
     # Store the best move in the transposition table so we can find it later
     let nodeType = if bestScore >= beta: LowerBound elif bestScore <= alpha: UpperBound else: Exact
-    self.transpositionTable[].store(depth.uint8, bestScore, self.board.position.zobristKey, bestMove, nodeType)
+    var ttScore = bestScore
+    # We do this because we want to make sure that when we do a TT cutoff and it's
+    # a mate score, we pick the shortest possible mate line if we're mating and the
+    # longest possible one if we're being mated. We revert this when probing the TT
+    if abs(ttScore) >= -(mateScore() + MAX_DEPTH):
+        ttScore += Score(ttScore.int.sgn()) * Score(ply)
+    self.transpositionTable[].store(depth.uint8, ttScore, self.board.position.zobristKey, bestMove, nodeType)
 
     return bestScore
 
@@ -678,9 +658,9 @@ proc findBestMove*(self: var SearchManager, timeRemaining, increment: int64, max
         maxDepth = 30
     self.searchFlag[].store(true)
     # Iterative deepening loop
-    var score {.used.} = Score(0)
+    var score = Score(0)
     for depth in 1..min(MAX_DEPTH, maxDepth):
-        if depth < 5 or not defined(aspirationWindow):
+        if depth < ASPIRATION_WINDOW_DEPTH_THRESHOLD or not defined(aspirationWindow):
             score = self.search(depth, 0, lowestEval(), highestEval(), true)
         else:
             score = self.aspirationWindow(score, depth)
diff --git a/Chess/nimfish/nimfishpkg/transpositions.nim b/Chess/nimfish/nimfishpkg/transpositions.nim
index 6163786..c7a4cb6 100644
--- a/Chess/nimfish/nimfishpkg/transpositions.nim
+++ b/Chess/nimfish/nimfishpkg/transpositions.nim
@@ -80,6 +80,20 @@ proc newTranspositionTable*(size: uint64): TTable =
     result.size = numEntries
 
 
+func clear*(self: var TTable) =
+    ## Clears the transposition table
+    ## without releasing the memory
+    ## associated with it
+    self.data.setLen(0)
+
+
+func destroy*(self: var TTable) =
+    ## Permanently and irreversibly
+    ## destroys the transposition table
+    self.data = @[]
+    self.size = 0
+
+
 func getIndex(self: TTable, key: ZobristKey): uint64 = 
     ## Retrieves the index of the given
     ## zobrist key in our transposition table
diff --git a/Chess/nimfish/nimfishpkg/uci.nim b/Chess/nimfish/nimfishpkg/uci.nim
index 11c4759..a68d872 100644
--- a/Chess/nimfish/nimfishpkg/uci.nim
+++ b/Chess/nimfish/nimfishpkg/uci.nim
@@ -21,6 +21,7 @@ import std/atomics
 import board
 import movegen
 import search
+import eval
 import transpositions
 
 
@@ -346,16 +347,13 @@ proc startUCISession* =
         session = UCISession(hashTableSize: 64, position: startpos())
     # God forbid we try to use atomic ARC like it was intended. Raw pointers
     # it is then... sigh
-    session.transpositionTable = cast[ptr TTable](alloc0(sizeof(TTable)))
     session.stopFlag = cast[ptr Atomic[bool]](alloc0(sizeof(Atomic[bool])))
     session.searchFlag = cast[ptr Atomic[bool]](alloc0(sizeof(Atomic[bool])))
-    session.transpositionTable[] = newTranspositionTable(session.hashTableSize * 1024 * 1024)
+    session.transpositionTable = cast[ptr TTable](alloc0(sizeof(TTable)))
     session.historyTable = cast[ptr HistoryTable](alloc0(sizeof(HistoryTable)))
     session.killerMoves = cast[ptr KillersTable](alloc0(sizeof(KillersTable)))
-    # Initialize killer move array
-    for i in 0..<MAX_DEPTH:
-        for j in 0..<NUM_KILLERS:
-            session.killerMoves[i][j] = nullMove()
+    session.position = startpos()
+    session.history = @[]
     # Fun fact, nim doesn't collect the memory of thread vars. Another stupid fucking design pitfall
     # of nim's AWESOME threading model. Someone is getting a pipebomb in their mailbox about this, mark
     # my fucking words. (for legal purposes THAT IS A JOKE). See https://github.com/nim-lang/Nim/issues/23165
@@ -386,14 +384,31 @@ proc startUCISession* =
                 of Debug:
                     session.debug = cmd.on
                 of NewGame:
-                    session.position = startpos()
-                    session.history = @[]
-                of Go:
-                    # Scale our history coefficients
+                    if session.transpositionTable[].size() == 0:
+                        if session.debug:
+                            echo &"info string allocating new TT of size {session.hashTableSize} MiB"
+                        session.transpositionTable[] = newTranspositionTable(session.hashTableSize * 1024 * 1024)
+                    else:
+                        if session.debug:
+                            echo &"info string clearing out TT of size {session.hashTableSize} MiB"
+                        session.transpositionTable[].clear()
+                    # (Re-)Initialize history table
+                    # array[PieceColor.White..PieceColor.Black, array[Square(0)..Square(63), array[Square(0)..Square(63), Score]]]
                     for color in PieceColor.White..PieceColor.Black:
-                        for source in Square(0)..Square(63):
-                            for target in Square(0)..Square(63):
-                                session.historyTable[][color][source][target] = session.historyTable[][color][source][target] div 2
+                        for i in Square(0)..Square(63):
+                            for j in Square(0)..Square(63):
+                                session.historyTable[color][i][j] = Score(0)
+                    # (Re-)Initialize killer move table
+                    for i in 0..<MAX_DEPTH:
+                        for j in 0..<NUM_KILLERS:
+                            session.killerMoves[i][j] = nullMove()
+                of Go:
+                    when not defined(historyPenalty):
+                        # Scale our history coefficients
+                        for color in PieceColor.White..PieceColor.Black:
+                            for source in Square(0)..Square(63):
+                                for target in Square(0)..Square(63):
+                                    session.historyTable[color][source][target] = session.historyTable[][color][source][target] div 2
                     if searchThread.running:
                         joinThread(searchThread)
                     createThread(searchThread, bestMove, (session, cmd))
@@ -412,13 +427,19 @@ proc startUCISession* =
                         continue
                     case cmd.name:
                         of "Hash":
+                            if session.transpositionTable[].size() > 0:
+                                if session.debug:
+                                    echo &"info string destroying old TT of size {session.hashTableSize} MiB"
+                                session.transpositionTable[].destroy()
                             session.hashTableSize = cmd.value.parseBiggestUInt()
                             if session.debug:
                                 echo &"info string set TT hash table size to {session.hashTableSize} MiB"
-                                session.transpositionTable[] = newTranspositionTable(session.hashTableSize * 1024 * 1024)
                         else:
                             discard
                 of Position:
+                    # Due to the way the whole thing is designed, the
+                    # position is actually set when the command is parsed
+                    # rather than when it is processed here
                     discard
                 else:
                     discard