From 30b3de233f57c7a1a6f52fd3d863ad599ed4fefb Mon Sep 17 00:00:00 2001
From: Mattia Giambirtone <nocturn9x@nocturn9x.space>
Date: Wed, 15 May 2024 00:26:58 +0200
Subject: [PATCH] Add missing tuner

---
 Chess/nimfish/nimfishpkg/tune.py | 111 +++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)
 create mode 100644 Chess/nimfish/nimfishpkg/tune.py

diff --git a/Chess/nimfish/nimfishpkg/tune.py b/Chess/nimfish/nimfishpkg/tune.py
new file mode 100644
index 0000000..5d7c359
--- /dev/null
+++ b/Chess/nimfish/nimfishpkg/tune.py
@@ -0,0 +1,111 @@
+# Copyright 2024 Mattia Giambirtone & All Contributors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# I couldn't be arsed to write a tuner myself, so I'm using pytorch instead.
+# Many many many thanks to analog-hors on the Engine Programming Discord 
+# server for providing a starting point to write this script! Also thanks
+# to @affinelytyped and @jw1912 for the explanations
+
+import re
+import json
+import torch
+import random
+import numpy as np
+# This comes from our Nim module with
+# the same name
+from eval import Features
+from pathlib import Path
+
+
+def load_dataset(path: Path) -> tuple[np.array, list[str]]:
+    """
+    Loads a .book file at the given path and returns a tuple of
+    the outcomes (as a numpy array) and the associated FEN of
+    the position for each outcome
+    """
+
+    print(f"Loading positions from '{path}'")
+    content = path.read_text()
+    fens = []
+    outcomes = []
+    for match in re.finditer(r"((?:[rnbqkpRNBQKP1-8]+\/){7}[rnbqkpRNBQKP1-8]+\s[b|w]\s(?:[K|Q|k|q|]{1,4}|-)\s(?:-|[a-h][1-8])\s\d+\s\d+)\s\[(\d\.\d)\]", content):
+        fens.append(match.group(1).strip())
+        outcomes.append(float(match.group(2)))
+    print(f"Loaded {len(fens)} positions")
+    return np.array(outcomes, dtype=float), fens
+
+
+def batch_loader(extractor: Features, batch_size: int, dataset: tuple[np.array, list[str]]):
+    """
+    Generator that yields the data necessary to train the optimizer
+    """
+
+    outcomes, fens = dataset
+    total_size = len(outcomes)
+    num_batches = total_size // batch_size
+    for _ in range(num_batches):
+        targets = np.zeros((batch_size, 1), dtype=float)
+        features = np.zeros((batch_size, extractor.featureCount()), dtype=float)
+        for batch_index in range(batch_size):
+            chosen = random.randint(0, len(fens) - 1)
+            targets[batch_index] = outcomes[chosen]
+            features[batch_index] = extractor.extractFeatures(fens[chosen])
+        yield torch.from_numpy(features), torch.from_numpy(targets)
+
+
+def main(batch_size: int, dataset_path: Path, epoch_size: int):
+    """
+    Uses pytorch to tune Nimfish's evaluation using the provided
+    dataset
+    """
+
+    features = Features()
+    data = load_dataset(dataset_path)
+    dataset_size = len(data[0])
+    feature_count = features.featureCount()
+    dataset = batch_loader(features, batch_size, data)
+    model = torch.nn.Linear(feature_count, 1, bias=False, dtype=float)
+    torch.nn.init.constant_(model.weight, 0)
+    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
+
+    print(f"Starting tuning on a dataset of size {dataset_size} with batch size {batch_size}")
+
+    running_loss = 0.0
+    for i, (features, target) in enumerate(dataset):
+        optimizer.zero_grad()
+        outputs = torch.sigmoid(model(features))
+        loss = torch.mean(torch.abs(outputs - target) ** 2.6)
+        loss.backward()
+        optimizer.step()
+
+        running_loss += loss.item()
+        if (i + 1) % epoch_size == 0:
+            print(f"epoch {(i + 1) // epoch_size}:  loss: {running_loss / epoch_size}")
+            running_loss = 0.0
+
+    param_map = {
+        name: param.detach().cpu().numpy().tolist()
+        for name, param in model.named_parameters()
+    }
+    (dataset_path.parent / "model.json").write_text(json.dumps(param_map))
+
+
+BATCH_SIZE = 16384
+DATASET_PATH = Path.cwd() / "nimfish" / "nimfishpkg" / "resources" / "lichess-big3-resolved.book"
+EPOCH_SIZE = 15
+
+
+if __name__ == "__main__":
+    main(BATCH_SIZE, DATASET_PATH, EPOCH_SIZE)
+