Initial work on tuning via pytorch
This commit is contained in:
parent
9a7139cdd5
commit
133cde53b2
|
@ -0,0 +1,111 @@
|
|||
# Copyright 2024 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# I couldn't be arsed to write a tuner myself, so I'm using pytorch instead.
|
||||
# Many many many thanks to analog-hors on the Engine Programming Discord
|
||||
# server for providing a starting point to write this script! Also thanks
|
||||
# to @affinelytyped and @jw1912 for the explanations
|
||||
|
||||
import re
|
||||
import json
|
||||
import torch
|
||||
import random
|
||||
import numpy as np
|
||||
# This comes from our Nim module with
|
||||
# the same name
|
||||
from utils import Features
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def load_dataset(path: Path) -> tuple[np.array, list[str]]:
|
||||
"""
|
||||
Loads a .book file at the given path and returns a tuple of
|
||||
the outcomes (as a numpy array) and the associated FEN of
|
||||
the position for each outcome
|
||||
"""
|
||||
|
||||
print(f"Loading positions from '{path}'")
|
||||
content = path.read_text()
|
||||
fens = []
|
||||
outcomes = []
|
||||
for match in re.finditer(r"([0-8kqrbnpKQRBNPw\/\s\-]+)\s\[(\d\.\d)\]", content):
|
||||
fens.append(match.group(1))
|
||||
outcomes.append(float(match.group(2)))
|
||||
print(f"Loaded {len(fens)} positions")
|
||||
return np.array(outcomes, dtype=float), fens
|
||||
|
||||
|
||||
def batch_loader(extractor: Features, batch_size: int, dataset: tuple[np.array, list[str]]):
|
||||
"""
|
||||
Generator that yields the data necessary to train the optimizer
|
||||
"""
|
||||
|
||||
outcomes, fens = dataset
|
||||
total_size = len(outcomes)
|
||||
num_batches = total_size // batch_size
|
||||
for batch in range(num_batches):
|
||||
targets = np.zeros((batch_size, 1), dtype=float)
|
||||
features = np.zeros((batch_size, extractor.featureCount()), dtype=float)
|
||||
for batch_index in range(batch_size):
|
||||
chosen = random.randint(0, len(fens) - 1)
|
||||
targets[batch_index] = outcomes[chosen]
|
||||
features[batch_index] = extractor.extractFeatures(fens[chosen])
|
||||
yield torch.from_numpy(features), torch.from_numpy(targets)
|
||||
|
||||
|
||||
def main(batch_size: int, dataset_path: Path, epoch_size: int):
|
||||
"""
|
||||
Uses pytorch to tune Nimfish's evaluation using the provided
|
||||
dataset
|
||||
"""
|
||||
|
||||
features = Features()
|
||||
data = load_dataset(dataset_path)
|
||||
dataset_size = len(data[0])
|
||||
feature_count = features.featureCount()
|
||||
dataset = batch_loader(features, batch_size, data)
|
||||
model = torch.nn.Linear(feature_count, 1, bias=False, dtype=float)
|
||||
torch.nn.init.constant_(model.weight, 0)
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
|
||||
|
||||
print(f"Starting tuning on a dataset of size {dataset_size} with batch size {batch_size}")
|
||||
|
||||
running_loss = 0.0
|
||||
for i, (features, target) in enumerate(dataset):
|
||||
optimizer.zero_grad()
|
||||
outputs = torch.sigmoid(model(features))
|
||||
loss = torch.mean(torch.abs(outputs - target) ** 2.6)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
running_loss += loss.item()
|
||||
if (i + 1) % epoch_size == 0:
|
||||
print(f"epoch {(i + 1) // epoch_size}: loss: {running_loss / epoch_size}")
|
||||
running_loss = 0.0
|
||||
|
||||
param_map = {
|
||||
name: param.detach().cpu().numpy().tolist()
|
||||
for name, param in model.named_parameters()
|
||||
}
|
||||
(dataset_path.parent / "model.json").write_text(json.dumps(param_map))
|
||||
|
||||
|
||||
BATCH_SIZE = 16384
|
||||
DATASET_PATH = Path.cwd() / "nimfish" / "nimfishpkg" / "resources" / "lichess-big3-resolved.book"
|
||||
EPOCH_SIZE = 2
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(BATCH_SIZE, DATASET_PATH, EPOCH_SIZE)
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
# Copyright 2024 Mattia Giambirtone & All Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
## Evaluation tuning utilities
|
||||
|
||||
|
||||
import ../position
|
||||
import ../pieces
|
||||
import ../movegen
|
||||
import ../magics
|
||||
|
||||
|
||||
import nimpy
|
||||
|
||||
|
||||
let np = pyImport("numpy")
|
||||
|
||||
|
||||
type
|
||||
Features* = ref object of PyNimObjectExperimental
|
||||
## The features of our evaluation
|
||||
## represented as a linear system
|
||||
|
||||
# Our piece-square tables contain both positional bonuses
|
||||
# (and maluses), as well as the piece values themselves. We
|
||||
# have one for each game phase (middle and end game) and one
|
||||
# for each piece
|
||||
psqts: array[2, array[6, array[Square(0)..Square(63), float]]]
|
||||
# Tempo bonus for the side to move
|
||||
tempo: float
|
||||
|
||||
|
||||
func featureCount*(self: Features): int {.exportpy.} =
|
||||
## Returns the number of features
|
||||
|
||||
# 2 piece square tables for each of the 6
|
||||
# pieces for 2 game phases plus tempo
|
||||
return (64 * 6) * 2 + 1
|
||||
|
||||
|
||||
proc extractFeatures*(self: Features, fen: string): auto {.exportpy.} =
|
||||
## TODO
|
||||
result = np.zeros((1, self.featureCount()))
|
Loading…
Reference in New Issue