Implement QSEE pruning (gains 52.9 +/- 25.2). Initial work on history malus. Reset search state after ucinewgame + minor other fixes

This commit is contained in:
Mattia Giambirtone 2024-05-12 01:02:47 +02:00
parent ec309c9d4d
commit db76565aa8
5 changed files with 142 additions and 127 deletions

View File

@ -2,6 +2,6 @@
-o:"bin/nimfish"
-d:danger
--passL:"-flto -lmimalloc"
--passC:"-flto -march=native -mtune=native"
--passC:"-flto -march=native -mtune=native -Ofast"
-d:useMalloc
--mm:atomicArc

View File

@ -15,7 +15,7 @@ bin = @["nimfish"]
requires "nim >= 2.0.4"
requires "jsony >= 1.1.5"
requires "nint128 >= 0.3.3"
requires "threading >= 0.2.0"
task test, "Runs the test suite":
exec "python tests/suite.py -d 6 -b -p -s"

View File

@ -63,10 +63,12 @@ const
# Start pruning after LMP_DEPTH_MULTIPLIER * depth
# moves have been analyzed
LMP_DEPTH_MULTIPLIER {.used.} = 10
LMP_DEPTH_MULTIPLIER {.used.} = 8
# Only prune when depth is <= this
# value
LMP_DEPTH_THRESHOLD {.used.} = 2
LMP_DEPTH_THRESHOLD {.used.} = 1
ASPIRATION_WINDOW_DEPTH_THRESHOLD = 5
NUM_KILLERS* = 2
MAX_DEPTH* = 255
@ -85,7 +87,7 @@ const
KILLERS_OFFSET = 500_000
HISTORY_OFFSET = 400_000
QUIET_OFFSET = 300_000
BAD_SEE_OFFSET = 200_000
BAD_SEE_OFFSET = 100_000
func computeLMRTable: array[MAX_DEPTH, array[218, int]] {.compileTime.} =
@ -187,75 +189,37 @@ proc getEstimatedMoveScore(self: SearchManager, move: Move, ply: int): int =
if move.isTactical():
let seeScore = self.board.position.see(move)
when not defined(SEE2):
# We want to prioritize good captures (see > 0), but if the capture
# is bad then at least we sort it with MVVLVA
if seeScore < 0 and (move.isCapture() or move.isEnPassant()):
# Implementation of MVVLVA: Most Valuable Victim Least Valuable Aggressor.
# We prioritize moves that capture the most valuable pieces, and as a
# second goal we want to use our least valuable pieces to do so (this
# is why we multiply the score of the captured piece by a constant, to give
# it priority)
let capturedScore = MVV_LVA_MULTIPLIER * self.board.position.getPieceScore(move.targetSquare)
result = capturedScore - self.board.position.getPieceScore(move.startSquare)
# If the capture is also a promotion we want to give it an even bigger bonus
if move.isPromotion():
var piece: Piece
case move.getPromotionType():
of PromoteToBishop:
piece = Piece(kind: Bishop, color: sideToMove)
of PromoteToKnight:
piece = Piece(kind: Knight, color: sideToMove)
of PromoteToRook:
piece = Piece(kind: Rook, color: sideToMove)
of PromoteToQueen:
piece = Piece(kind: Queen, color: sideToMove)
else:
discard # Unreachable
result += PROMOTION_MULTIPLIER * self.board.position.getPieceScore(piece, move.targetSquare)
# We want to prioritize good captures (see > 0), but if the capture
# is bad then at least we sort it with MVVLVA
if seeScore < 0 and (move.isCapture() or move.isEnPassant()):
# Implementation of MVVLVA: Most Valuable Victim Least Valuable Aggressor.
# We prioritize moves that capture the most valuable pieces, and as a
# second goal we want to use our least valuable pieces to do so (this
# is why we multiply the score of the captured piece by a constant, to give
# it priority)
let capturedScore = MVV_LVA_MULTIPLIER * self.board.position.getPieceScore(move.targetSquare)
result = capturedScore - self.board.position.getPieceScore(move.startSquare)
# If the capture is also a promotion we want to give it an even bigger bonus
if move.isPromotion():
var piece: Piece
case move.getPromotionType():
of PromoteToBishop:
piece = Piece(kind: Bishop, color: sideToMove)
of PromoteToKnight:
piece = Piece(kind: Knight, color: sideToMove)
of PromoteToRook:
piece = Piece(kind: Rook, color: sideToMove)
of PromoteToQueen:
piece = Piece(kind: Queen, color: sideToMove)
else:
discard # Unreachable
result += PROMOTION_MULTIPLIER * self.board.position.getPieceScore(piece, move.targetSquare)
return result + BAD_SEE_OFFSET
else:
# If the capture is good then we just use the SEE score + the offset
return seeScore + GOOD_SEE_OFFSET
return result + BAD_SEE_OFFSET
else:
# We want to prioritize good captures (see > 0) and then sort
# them with MVVLVA. Of course, good captures will be placed
# before bad captures regardless of what MVVLVA has to say
# about them
if move.isCapture() or move.isEnPassant():
# Implementation of MVVLVA: Most Valuable Victim Least Valuable Aggressor.
# We prioritize moves that capture the most valuable pieces, and as a
# second goal we want to use our least valuable pieces to do so (this
# is why we multiply the score of the captured piece by a constant, to give
# it priority)
let capturedScore = MVV_LVA_MULTIPLIER * self.board.position.getPieceScore(move.targetSquare)
result = capturedScore - self.board.position.getPieceScore(move.startSquare)
# If the capture is also a promotion we want to give it an even bigger bonus
if move.isPromotion():
var piece: Piece
case move.getPromotionType():
of PromoteToBishop:
piece = Piece(kind: Bishop, color: sideToMove)
of PromoteToKnight:
piece = Piece(kind: Knight, color: sideToMove)
of PromoteToRook:
piece = Piece(kind: Rook, color: sideToMove)
of PromoteToQueen:
piece = Piece(kind: Queen, color: sideToMove)
else:
discard # Unreachable
result += PROMOTION_MULTIPLIER * self.board.position.getPieceScore(piece, move.targetSquare)
result += seeScore
# We use >= instead of > because we're evaluating promotions by
# their SEE scores as well, which would move them all to the back
# in cases where a promotion ends up with no material loss
if seeScore >= 0:
return result + GOOD_SEE_OFFSET
else:
return result + BAD_SEE_OFFSET
# If the capture is good then we just use the SEE score + the offset
return seeScore + GOOD_SEE_OFFSET
if move.isQuiet():
# History heuristic bonus
@ -263,8 +227,7 @@ proc getEstimatedMoveScore(self: SearchManager, move: Move, ply: int): int =
if score != 0:
return score + HISTORY_OFFSET
# We use an explicit offset for quiet moves because
# we want to place bad captures (SEE < BAD_CAPTURE_THRESHOLD)
# behind them
# we want to place bad captures behind them
return QUIET_OFFSET
@ -289,7 +252,11 @@ proc log(self: SearchManager, depth: int) =
elapsedMsec = self.elapsedTime().uint64
nps = 1000 * (self.nodeCount div max(elapsedMsec, 1))
var logMsg = &"info depth {depth} seldepth {self.selectiveDepth} time {elapsedMsec} nodes {self.nodeCount} nps {nps}"
logMsg &= &" hashfull {self.transpositionTable[].getFillEstimate()} score cp {self.bestRootScore}"
logMsg &= &" hashfull {self.transpositionTable[].getFillEstimate()}"
if self.bestRootScore >= -(mateScore() + MAX_DEPTH):
logMsg &= &" score mate {((-mateScore() - self.bestRootScore + 1) div 2)}"
else:
logMsg &= &" score cp {self.bestRootScore}"
if self.pvMoves[0][0] != nullMove():
logMsg &= " pv "
for move in self.pvMoves[0]:
@ -355,20 +322,6 @@ proc qsearch(self: var SearchManager, ply: int, alpha, beta: Score): Score =
## exist
if self.shouldStop() or ply == MAX_DEPTH or self.board.isDrawn():
return Score(0)
when defined(QTT):
# The gains from this are dubious at best, so it's disabled for now
if ply > 0:
let query = self.transpositionTable[].get(self.board.position.zobristKey)
if query.success:
case query.entry.flag:
of Exact:
return query.entry.score
of LowerBound:
if query.entry.score >= beta:
return query.entry.score
of UpperBound:
if query.entry.score <= alpha:
return query.entry.score
let score = self.board.position.evaluate()
if score >= beta:
# Stand-pat evaluation
@ -440,24 +393,27 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
# Probe the transposition table to see if we can cause an early cutoff
let query = self.transpositionTable[].get(self.board.position.zobristKey, depth.uint8)
if query.success:
var score = query.entry.score
if abs(score) >= -(mateScore() + MAX_DEPTH):
score -= int16(score.int.sgn() * ply)
case query.entry.flag:
of Exact:
return query.entry.score
return score
of LowerBound:
if query.entry.score >= beta:
return query.entry.score
if score >= beta:
return score
of UpperBound:
if query.entry.score <= alpha:
return query.entry.score
if score <= alpha:
return score
if not isPV and not self.board.inCheck() and depth <= RFP_DEPTH_LIMIT and staticEval - RFP_EVAL_THRESHOLD * depth >= beta:
## Reverse futility pruning: if the side to move has a significant advantage
## in the current position and is not in check, return the position's static
## evaluation to encourage the engine to deal with any potential threats from
## the opponent. Since this optimization technique is not sound, we limit the
## depth at which it can trigger for safety purposes (it is also the reason
## why the "advantage" threshold scales with depth: the deeper we go, the more
## careful we want to be with our estimate for how much of an advantage we may
## or may not have)
# Reverse futility pruning: if the side to move has a significant advantage
# in the current position and is not in check, return the position's static
# evaluation to encourage the engine to deal with any potential threats from
# the opponent. Since this optimization technique is not sound, we limit the
# depth at which it can trigger for safety purposes (it is also the reason
# why the "advantage" threshold scales with depth: the deeper we go, the more
# careful we want to be with our estimate for how much of an advantage we may
# or may not have)
return staticEval
if not isPV and depth > NMP_DEPTH_THRESHOLD and self.board.canNullMove() and staticEval >= beta:
# Null move pruning: it is reasonable to assume that
@ -510,8 +466,10 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
return mateScore() + Score(ply)
# Stalemate
return Score(0)
var bestScore = lowestEval()
var alpha = alpha
var
bestScore = lowestEval()
alpha = alpha
failedLow: seq[tuple[move: Move, score: Score]] = @[]
let
sideToMove = self.board.position.sideToMove
for i, move in moves:
@ -519,24 +477,22 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
let isNotMated {.used.} = bestScore > mateScore() + MAX_DEPTH
if ply == 0 and self.searchMoves.len() > 0 and move notin self.searchMoves:
continue
self.board.doMove(move)
when defined(FP):
if not isPV and move.isQuiet() and depth <= FP_DEPTH_LIMIT and staticEval + FP_EVAL_MARGIN * depth < alpha and isNotMated and not self.board.inCheck():
if not isPV and move.isQuiet() and depth <= FP_DEPTH_LIMIT and staticEval + FP_EVAL_MARGIN * depth < alpha and isNotMated:
# Futility pruning: If a move cannot meaningfully improve alpha, prune it from the
# tree. Much like RFP, this is an unsound optimization (and a riskier one at that,
# apparently), so our depth limit and evaluation margins are very conservative
# compared to RFP. Also, we need to make sure the best score is not a mate score, or
# we'd risk pruning moves that evade checkmate
self.board.unmakeMove()
break
when defined(LMP):
if not isPV and move.isQuiet() and depth <= LMP_DEPTH_THRESHOLD and i >= LMP_DEPTH_MULTIPLIER * depth and isNotMated and not self.board.inCheck():
if not isPV and not move.isTactical() and depth <= LMP_DEPTH_THRESHOLD and i >= LMP_DEPTH_MULTIPLIER * depth and isNotMated:
# Late move pruning: prune quiets when we've analyzed enough moves. This optimization is also
# unsound and so it's constrained at fairly low depths. We also want to prune less moves the deeper
# unsound and so it's constrained to fairly low depths. We also want to prune less moves the deeper
# we go (hence the multiplier). Since the optimization is unsound, we also want to make sure we don't
# accidentally miss a move that staves off checkmate
self.board.unmakeMove()
break
self.board.doMove(move)
let
extension = self.getSearchExtension(move)
reduction = self.getReduction(move, depth, ply, i, isPV)
@ -586,10 +542,18 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
# quadratic bonus wrt. depth is usually the value that is used (though some
# engines, namely Stockfish, use a linear bonus. Maybe we can investigate this)
self.storeHistoryScore(sideToMove, move, score, depth * depth)
when defined(historyPenalty):
if bestMove != nullMove():
# Punish bad quiets
for (badMove, badScore) in failedLow:
self.storeHistoryScore(sideToMove, badMove, badScore, -(depth * depth))
# Killer move heuristic: store quiets that caused a beta cutoff according to the distance from
# root that they occurred at, as they might be good refutations for future moves from the opponent.
# Elo gains: 33.5 +/- 19.3
self.storeKillerMove(ply, move)
# This move was too good for us, opponent will not search it
break
if score > alpha:
@ -607,9 +571,25 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
break
self.pvMoves[ply][i + 1] = pv
self.pvMoves[ply][0] = move
else:
when defined(historyPenalty):
# History penalty: apply a penalty to moves that fail to raise alpha
# because, let's be honest, they kinda suck. We only apply this penalty
# in the event of a beta cutoff, because it doesn't really make sense to
# look at moves we know are bad if there's other ones down the list that
# lead to a fail high
failedLow.add((move, score))
else:
discard
# Store the best move in the transposition table so we can find it later
let nodeType = if bestScore >= beta: LowerBound elif bestScore <= alpha: UpperBound else: Exact
self.transpositionTable[].store(depth.uint8, bestScore, self.board.position.zobristKey, bestMove, nodeType)
var ttScore = bestScore
# We do this because we want to make sure that when we do a TT cutoff and it's
# a mate score, we pick the shortest possible mate line if we're mating and the
# longest possible one if we're being mated. We revert this when probing the TT
if abs(ttScore) >= -(mateScore() + MAX_DEPTH):
ttScore += Score(ttScore.int.sgn()) * Score(ply)
self.transpositionTable[].store(depth.uint8, ttScore, self.board.position.zobristKey, bestMove, nodeType)
return bestScore
@ -678,9 +658,9 @@ proc findBestMove*(self: var SearchManager, timeRemaining, increment: int64, max
maxDepth = 30
self.searchFlag[].store(true)
# Iterative deepening loop
var score {.used.} = Score(0)
var score = Score(0)
for depth in 1..min(MAX_DEPTH, maxDepth):
if depth < 5 or not defined(aspirationWindow):
if depth < ASPIRATION_WINDOW_DEPTH_THRESHOLD or not defined(aspirationWindow):
score = self.search(depth, 0, lowestEval(), highestEval(), true)
else:
score = self.aspirationWindow(score, depth)

View File

@ -80,6 +80,20 @@ proc newTranspositionTable*(size: uint64): TTable =
result.size = numEntries
func clear*(self: var TTable) =
## Clears the transposition table
## without releasing the memory
## associated with it
self.data.setLen(0)
func destroy*(self: var TTable) =
## Permanently and irreversibly
## destroys the transposition table
self.data = @[]
self.size = 0
func getIndex(self: TTable, key: ZobristKey): uint64 =
## Retrieves the index of the given
## zobrist key in our transposition table

View File

@ -21,6 +21,7 @@ import std/atomics
import board
import movegen
import search
import eval
import transpositions
@ -346,16 +347,13 @@ proc startUCISession* =
session = UCISession(hashTableSize: 64, position: startpos())
# God forbid we try to use atomic ARC like it was intended. Raw pointers
# it is then... sigh
session.transpositionTable = cast[ptr TTable](alloc0(sizeof(TTable)))
session.stopFlag = cast[ptr Atomic[bool]](alloc0(sizeof(Atomic[bool])))
session.searchFlag = cast[ptr Atomic[bool]](alloc0(sizeof(Atomic[bool])))
session.transpositionTable[] = newTranspositionTable(session.hashTableSize * 1024 * 1024)
session.transpositionTable = cast[ptr TTable](alloc0(sizeof(TTable)))
session.historyTable = cast[ptr HistoryTable](alloc0(sizeof(HistoryTable)))
session.killerMoves = cast[ptr KillersTable](alloc0(sizeof(KillersTable)))
# Initialize killer move array
for i in 0..<MAX_DEPTH:
for j in 0..<NUM_KILLERS:
session.killerMoves[i][j] = nullMove()
session.position = startpos()
session.history = @[]
# Fun fact, nim doesn't collect the memory of thread vars. Another stupid fucking design pitfall
# of nim's AWESOME threading model. Someone is getting a pipebomb in their mailbox about this, mark
# my fucking words. (for legal purposes THAT IS A JOKE). See https://github.com/nim-lang/Nim/issues/23165
@ -386,14 +384,31 @@ proc startUCISession* =
of Debug:
session.debug = cmd.on
of NewGame:
session.position = startpos()
session.history = @[]
of Go:
# Scale our history coefficients
if session.transpositionTable[].size() == 0:
if session.debug:
echo &"info string allocating new TT of size {session.hashTableSize} MiB"
session.transpositionTable[] = newTranspositionTable(session.hashTableSize * 1024 * 1024)
else:
if session.debug:
echo &"info string clearing out TT of size {session.hashTableSize} MiB"
session.transpositionTable[].clear()
# (Re-)Initialize history table
# array[PieceColor.White..PieceColor.Black, array[Square(0)..Square(63), array[Square(0)..Square(63), Score]]]
for color in PieceColor.White..PieceColor.Black:
for source in Square(0)..Square(63):
for target in Square(0)..Square(63):
session.historyTable[][color][source][target] = session.historyTable[][color][source][target] div 2
for i in Square(0)..Square(63):
for j in Square(0)..Square(63):
session.historyTable[color][i][j] = Score(0)
# (Re-)Initialize killer move table
for i in 0..<MAX_DEPTH:
for j in 0..<NUM_KILLERS:
session.killerMoves[i][j] = nullMove()
of Go:
when not defined(historyPenalty):
# Scale our history coefficients
for color in PieceColor.White..PieceColor.Black:
for source in Square(0)..Square(63):
for target in Square(0)..Square(63):
session.historyTable[color][source][target] = session.historyTable[][color][source][target] div 2
if searchThread.running:
joinThread(searchThread)
createThread(searchThread, bestMove, (session, cmd))
@ -412,13 +427,19 @@ proc startUCISession* =
continue
case cmd.name:
of "Hash":
if session.transpositionTable[].size() > 0:
if session.debug:
echo &"info string destroying old TT of size {session.hashTableSize} MiB"
session.transpositionTable[].destroy()
session.hashTableSize = cmd.value.parseBiggestUInt()
if session.debug:
echo &"info string set TT hash table size to {session.hashTableSize} MiB"
session.transpositionTable[] = newTranspositionTable(session.hashTableSize * 1024 * 1024)
else:
discard
of Position:
# Due to the way the whole thing is designed, the
# position is actually set when the command is parsed
# rather than when it is processed here
discard
else:
discard