Update evaluation and tuner

This commit is contained in:
Mattia Giambirtone 2024-05-16 14:14:25 +02:00
parent 46be23dbb7
commit f53e149af0
Signed by: nocturn9x
GPG Key ID: 37B83AB6C3BE6514
5 changed files with 78 additions and 47 deletions

View File

@ -47,12 +47,14 @@ Estimated elo: ~2100
- [X] History heuristic
- [X] History gravity
- [ ] History malus
- [ ] History aging
- [X] Killer heuristic
- [X] Null-window search
- [ ] Capture history
- [ ] Continuation history
- [X] Parallel search (lazy SMP)
- [X] Pondering
- [ ] Counter moves
### Eval
@ -60,6 +62,7 @@ Estimated elo: ~2100
- [X] Piece-square tables
- [X] Tempo
- [ ] King safety
- [ ] Pawn shield
- [ ] Mobility
- [ ] Minor piece outpost
- [ ] Bishop pair

View File

@ -32,7 +32,7 @@ type
# (and maluses), as well as the piece values themselves. We
# have one for each game phase (middle and end game) and one
# for each piece (one per color)
psqts: array[PieceColor.White..PieceColor.Black, array[PieceKind.Bishop..PieceKind.Rook, array[Square(0)..Square(63), float]]]
psqts: array[PieceColor.White..PieceColor.Black, array[PieceKind.Bishop..PieceKind.Rook, array[Square(0)..Square(63), tuple[mg, eg: float]]]]
# Tempo bonus for the side to move
tempo: float
@ -199,21 +199,26 @@ const
# First entry is the middle game score, second entry is the endgame score. Half stolen
# and half eyeballed. Tune these!
QUEEN_SEMI_OPEN_FILE_BONUS: array[2, Score] = [-5, 0]
QUEEN_OPEN_FILE_BONUS: array[2, Score] = [10, 0]
ROOK_SEMI_OPEN_FILE_BONUS: array[2, Score] = [40, 0]
ROOK_OPEN_FILE_BONUS: array[2, Score] = [60, 0]
BISHOP_PAIR_BONUS: array[2, Score] = [35, 100]
QUEEN_SEMI_OPEN_FILE_BONUS {.used.}: array[2, Score] = [-5, 0]
QUEEN_OPEN_FILE_BONUS {.used.}: array[2, Score] = [10, 0]
ROOK_SEMI_OPEN_FILE_BONUS {.used.}: array[2, Score] = [40, 0]
ROOK_OPEN_FILE_BONUS {.used.}: array[2, Score] = [60, 0]
BISHOP_PAIR_BONUS {.used.}: array[2, Score] = [35, 100]
# Mobility bonuses (TODO: Refactor this into mobility tables. Need a tuner for that)
MIDDLEGAME_MOBILITY_BONUS: array[PieceKind.Bishop..PieceKind.Rook, Score] = [4, 0, 3, 0, 4, 5]
ENDGAME_MOBILITY_BONUS: array[PieceKind.Bishop..PieceKind.Rook, Score] = [6, 0, 5, 0, 8, 8]
MIDDLEGAME_MOBILITY_BONUS {.used.}: array[PieceKind.Bishop..PieceKind.Rook, Score] = [4, 0, 3, 0, 4, 5]
ENDGAME_MOBILITY_BONUS {.used.}: array[PieceKind.Bishop..PieceKind.Rook, Score] = [6, 0, 5, 0, 8, 8]
# TODO: Don't use these. Make them tapered. Also tune them
# DOUBLED_PAWNS_MALUS: array[9, Score] = [0, -5, -10, -20, -30, -30, -30, -30, -30]
# ISOLATED_PAWN_MALUS: array[9, Score] = [0, -10, -25, -50, -75, -75, -75, -75, -75]
# STRONG_PAWN_BONUS: array[9, Score] = [0, 5, 10, 20, 30, 30, 30, 30, 30]
# PASSED_PAWN_BONUS: array[7, Score] = [0, 120, 80, 50, 30, 15, 15]
MIDDLEGAME_PASSED_PAWN_BONUS {.used.}: array[7, Score] = [0, 120, 80, 50, 30, 15, 15]
ENDGAME_PASSED_PAWN_BONUS {.used.}: array[7, Score] = [0, 250, 150, 50, 30, 15, 15]
MIDDLEGAME_ISOLATED_PAWN_MALUS {.used.}: array[9, Score] = [0, -10, -25, -50, -75, -75, -75, -75, -75]
PASSED_PAWN_BONUS = Score(30)
ENDGAME_ISOLATED_PAWN_MALUS {.used.}: array[9, Score] = [0, -10, -25, -50, -75, -75, -75, -75, -75] #\[0, -40, -55, -90, -125, -125, -125, -125, -125]
var
MIDDLEGAME_VALUE_TABLES: array[PieceColor.White..PieceColor.Black, array[PieceKind.Bishop..PieceKind.Rook, array[Square(0)..Square(63), Score]]]
@ -325,7 +330,7 @@ proc evaluate*(position: Position, mode: static EvalMode, features: Features = F
nonSideToMove = sideToMove.opposite()
middleGamePhase = position.getGamePhase()
occupancy = position.getOccupancy()
pawns {.used.} = position.getBitboard(Pawn, White) or position.getBitboard(Pawn, Black)
pawns: array[PieceColor.White..PieceColor.Black, Bitboard] = [position.getBitboard(Pawn, White), position.getBitboard(Pawn, Black)]
endGamePhase = 24 - middleGamePhase
var
middleGameScores: array[PieceColor.White..PieceColor.Black, Score] = [0, 0]
@ -338,8 +343,8 @@ proc evaluate*(position: Position, mode: static EvalMode, features: Features = F
middleGameScores[piece.color] += MIDDLEGAME_VALUE_TABLES[piece.color][piece.kind][sq]
endGameScores[piece.color] += ENDGAME_VALUE_TABLES[piece.color][piece.kind][sq]
else:
features.psqts[piece.color][piece.kind][sq] = 1.0
features.psqts[piece.color][piece.kind][sq].mg = middleGamePhase / 24
features.psqts[piece.color][piece.kind][sq].mg = endGamePhase / 24
# Final score computation
let
middleGameScore = middleGameScores[sideToMove] - middleGameScores[nonSideToMove]
@ -353,6 +358,49 @@ proc evaluate*(position: Position, mode: static EvalMode, features: Features = F
else:
features.tempo = 1.0
# Pawn structure
when defined(evalPawns):
for color in PieceColor.White..PieceColor.Black:
for pawn in pawns[color]:
if (getPassedPawnMask(color, pawn) and pawns[color.opposite()]) == 0:
middleGameScores[color] += PASSED_PAWN_BONUS
endGameScores[color] += PASSED_PAWN_BONUS * 2
#[
# Isolated pawns
for file in 0..7:
if (pawns[color] and getIsolatedPawnMask(file)) == 0:
let onFile = pawns[color] and getFileMask(file)
middleGameScores[color] += MIDDLEGAME_ISOLATED_PAWN_MALUS[onFile.countSquares()]
endGameScores[color] += ENDGAME_ISOLATED_PAWN_MALUS[onFile.countSquares()]
]#
when defined(evalFiles):
let rooks: array[PieceColor.White..PieceColor.Black, Bitboard] = [position.getBitboard(Rook, White), position.getBitboard(Rook, Black)]
let queens: array[PieceColor.White..PieceColor.Black, Bitboard] = [position.getBitboard(Queen, White), position.getBitboard(Queen, Black)]
for color in PieceColor.White..PieceColor.Black:
when defined(evalFiles):
# Rooks and queens on (semi-)open files
for file in 0..7:
let fileMask = getFileMask(file)
if (fileMask and (pawns[White] or pawns[Black])).countSquares() == 0:
# Open file (no pawns in the way)
for rook in rooks[color] and fileMask:
middleGameScores[color] += ROOK_OPEN_FILE_BONUS[0]
endGameScores[color] += ROOK_OPEN_FILE_BONUS[1]
for queen in queens[color] and fileMask:
middleGameScores[color] += QUEEN_OPEN_FILE_BONUS[0]
endGameScores[color] += QUEEN_OPEN_FILE_BONUS[1]
if (fileMask and pawns[color]).countSquares() == 0 and (fileMask and pawns[color.opposite()]).countSquares() != 0:
# Semi-open file (no friendly pawns and at least one enemy pawn in the way)
for rook in rooks[color] and fileMask:
middleGameScores[color] += ROOK_SEMI_OPEN_FILE_BONUS[0]
endGameScores[color] += ROOK_SEMI_OPEN_FILE_BONUS[1]
for queen in queens[color] and fileMask:
middleGameScores[color] += QUEEN_SEMI_OPEN_FILE_BONUS[0]
endGameScores[color] += QUEEN_SEMI_OPEN_FILE_BONUS[1]
#[
for color in PieceColor.White..PieceColor.Black:
let
@ -364,27 +412,6 @@ proc evaluate*(position: Position, mode: static EvalMode, features: Features = F
friendlyKnights {.used.} = position.getBitboard(Bishop, color)
friendlyKing {.used.} = position.getBitboard(King, color)
when defined(evalFiles):
# Rooks and queens on (semi-)open files
for file in 0..7:
let fileMask = getFileMask(file)
if (fileMask and pawns).countSquares() == 0:
# Open file (no pawns in the way)
for rook in friendlyRooks and fileMask:
middleGameScores[color] += ROOK_OPEN_FILE_BONUS[0]
endGameScores[color] += ROOK_OPEN_FILE_BONUS[1]
for queen in friendlyQueens and fileMask:
middleGameScores[color] += QUEEN_OPEN_FILE_BONUS[0]
endGameScores[color] += QUEEN_OPEN_FILE_BONUS[1]
if (fileMask and friendlyPawns).countSquares() == 0 and (fileMask and enemyPawns).countSquares() == 1:
# Semi-open file (only one enemy pawn in the way)
for rook in friendlyRooks and fileMask:
middleGameScores[color] += ROOK_SEMI_OPEN_FILE_BONUS[0]
endGameScores[color] += ROOK_SEMI_OPEN_FILE_BONUS[1]
for queen in friendlyQueens and fileMask:
middleGameScores[color] += QUEEN_SEMI_OPEN_FILE_BONUS[0]
endGameScores[color] += QUEEN_SEMI_OPEN_FILE_BONUS[1]
when defined(evalBishops):
# Bishop pair
@ -435,9 +462,9 @@ func featureCount*(self: Features): int {.exportpy.} =
## the evaluation
# 2 piece square tables for each of the 6
# pieces for 2 game phases for 2 colors
# plus tempo
return ((64 * 6) * 2) * 2 + 1
# pieces for 2 game phases plus one extra
# for tempo
return 64 * 6 * 2 * 2 + 1
proc extract*(self: Features, fen: string): Tensor[float] =
@ -453,7 +480,8 @@ proc extract*(self: Features, fen: string): Tensor[float] =
var idx = color.int * len(self.psqts[color]) * len(self.psqts[color][piece])
idx += piece.int * len(self.psqts[color][piece])
idx += square.int
result[0, idx] = self.psqts[color][piece][square]
result[0, idx] = self.psqts[color][piece][square].eg
result[0, idx + 1] = self.psqts[color][piece][square].mg
result[0, ^1] = self.tempo

View File

@ -650,6 +650,14 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
# This move was too good for us, opponent will not search it
break
else:
when defined(historyPenalty):
# History penalty: apply a penalty to moves that don't fail high.
# We only actually apply this penalty in the event of a beta cutoff,
# because it doesn't really make sense to look at moves we know are
# bad if there's other ones down the list that leads to a fail high
# (the earlier we can cause a beta cutoff the better!)
failedLow.add((move, score))
if score > alpha:
alpha = score
bestMove = move
@ -665,14 +673,6 @@ proc search(self: var SearchManager, depth, ply: int, alpha, beta: Score, isPV:
break
self.pvMoves[ply][i + 1] = pv
self.pvMoves[ply][0] = move
else:
when defined(historyPenalty):
# History penalty: apply a penalty to moves that fail to raise alpha
# because, let's be honest, they kinda suck. We only apply this penalty
# in the event of a beta cutoff, because it doesn't really make sense to
# look at moves we know are bad if there's other ones down the list that
# lead to a fail high (the earlier we can cause a beta cutoff the better!)
failedLow.add((move, score))
else:
discard
# Store the best move in the transposition table so we can find it later

View File

@ -103,7 +103,7 @@ def main(batch_size: int, dataset_path: Path, epoch_size: int):
BATCH_SIZE = 16384
DATASET_PATH = Path.cwd() / "nimfish" / "nimfishpkg" / "resources" / "lichess-big3-resolved.book"
EPOCH_SIZE = 15
EPOCH_SIZE = 10
if __name__ == "__main__":

View File

@ -432,7 +432,7 @@ proc startUCISession* =
session.searchState[].stopPondering()
else:
when not defined(historyPenalty):
# Scale our history coefficients
# History aging
for color in PieceColor.White..PieceColor.Black:
for source in Square(0)..Square(63):
for target in Square(0)..Square(63):