Initial work on new merged kings network. Delete third-party code factorizer. Refactor Makefile to move networks out of the main repo. Move book to appropriate directory (bench 5472385)
This commit is contained in:
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
[submodule "networks"]
|
||||
path = networks
|
||||
url = https://git.nocturn9x.space/heimdall-engine/networks
|
||||
230
3rdparty/fcq.cpp
vendored
230
3rdparty/fcq.cpp
vendored
@@ -1,230 +0,0 @@
|
||||
/*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2024 Ciekce
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cstdint>
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <cassert>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
namespace
|
||||
{
|
||||
enum class QuantiseMode
|
||||
{
|
||||
Truncate,
|
||||
Round,
|
||||
};
|
||||
|
||||
constexpr auto InFile = "raw.bin";
|
||||
constexpr auto OutFile = "factorised.bin";
|
||||
|
||||
constexpr bool Factorised = true;
|
||||
constexpr bool PairwiseMul = false;
|
||||
|
||||
constexpr bool TransposeOutputWeights = false;
|
||||
|
||||
constexpr std::uint32_t InputSize = 768;
|
||||
constexpr std::uint32_t InputBuckets = 16;
|
||||
constexpr std::uint32_t L1 = 1280;
|
||||
constexpr std::uint32_t OutputBuckets = 8;
|
||||
|
||||
constexpr float Clip = 1.98F;
|
||||
|
||||
constexpr std::uint32_t L1Q = 255;
|
||||
constexpr std::uint32_t OutputQ = 64;
|
||||
|
||||
constexpr QuantiseMode Mode = QuantiseMode::Round;
|
||||
|
||||
constexpr std::size_t PaddingBlockSize = 64;
|
||||
|
||||
// ========================================================================
|
||||
|
||||
namespace internal
|
||||
{
|
||||
template <typename T, std::size_t N, std::size_t... Ns>
|
||||
struct MultiArrayImpl
|
||||
{
|
||||
using Type = std::array<typename MultiArrayImpl<T, Ns...>::Type, N>;
|
||||
};
|
||||
|
||||
template <typename T, std::size_t N>
|
||||
struct MultiArrayImpl<T, N>
|
||||
{
|
||||
using Type = std::array<T, N>;
|
||||
};
|
||||
}
|
||||
|
||||
template <typename T, std::size_t... Ns>
|
||||
using MultiArray = typename internal::MultiArrayImpl<T, Ns...>::Type;
|
||||
|
||||
constexpr auto L1Weights = 2 * L1 / (1 + PairwiseMul);
|
||||
|
||||
template <typename Param, std::uint32_t InputBuckets>
|
||||
struct Network
|
||||
{
|
||||
MultiArray<Param, InputBuckets, InputSize * L1> ftWeights;
|
||||
std::array<Param, L1> ftBiases;
|
||||
std::array<Param, L1Weights * OutputBuckets> l1Weights;
|
||||
std::array<Param, OutputBuckets> l1Biases;
|
||||
};
|
||||
|
||||
using RawNetwork = Network<float, InputBuckets + Factorised>;
|
||||
using QuantisedNetwork = Network<std::int16_t, InputBuckets>;
|
||||
|
||||
using RawNetworkUnfactorised = Network<float, InputBuckets>;
|
||||
|
||||
template <std::uint32_t Q>
|
||||
[[nodiscard]] inline auto quantise(float v)
|
||||
{
|
||||
v = std::clamp(v, -Clip, Clip);
|
||||
v *= static_cast<float>(Q);
|
||||
|
||||
if constexpr (Mode == QuantiseMode::Round)
|
||||
v = std::round(v);
|
||||
|
||||
assert(std::abs(v) <= static_cast<float>(std::numeric_limits<std::int16_t>::max()));
|
||||
|
||||
return static_cast<std::int16_t>(v);
|
||||
}
|
||||
|
||||
template <std::size_t Block>
|
||||
[[nodiscard]] inline auto pad(std::size_t v)
|
||||
{
|
||||
return ((v + Block - 1) / Block) * Block;
|
||||
}
|
||||
}
|
||||
|
||||
auto main() -> int
|
||||
{
|
||||
auto raw = std::make_unique<RawNetwork>();
|
||||
|
||||
{
|
||||
std::ifstream in{InFile, std::ios::binary};
|
||||
|
||||
if (!in)
|
||||
{
|
||||
std::cerr << "failed to open source network" << std::endl;
|
||||
std::cerr << std::strerror(errno) << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!in.read(reinterpret_cast<char *>(raw.get()), sizeof(RawNetwork)))
|
||||
{
|
||||
std::cerr << "failed to load source network" << std::endl;
|
||||
|
||||
if (in.eof())
|
||||
{
|
||||
std::cerr << "Source network too small";
|
||||
if (Factorised && in.gcount() >= sizeof(RawNetworkUnfactorised))
|
||||
std::cerr << " - unfactorised network?";
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
else std::cerr << std::strerror(errno) << std::endl;
|
||||
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
auto quantised = std::make_unique<QuantisedNetwork>();
|
||||
|
||||
for (std::uint32_t bucket = 0; bucket < InputBuckets; ++bucket)
|
||||
{
|
||||
for (std::uint32_t weight = 0; weight < InputSize * L1; ++weight)
|
||||
{
|
||||
auto param = raw->ftWeights[bucket + Factorised][weight];
|
||||
|
||||
if constexpr (Factorised)
|
||||
param += raw->ftWeights[0][weight];
|
||||
|
||||
quantised->ftWeights[bucket][weight] = quantise<L1Q>(param);
|
||||
}
|
||||
}
|
||||
|
||||
for (std::uint32_t bias = 0; bias < L1; ++bias)
|
||||
{
|
||||
quantised->ftBiases[bias] = quantise<L1Q>(raw->ftBiases[bias]);
|
||||
}
|
||||
|
||||
if constexpr (TransposeOutputWeights)
|
||||
{
|
||||
for (std::uint32_t weight = 0; weight < L1Weights; ++weight)
|
||||
{
|
||||
for (std::uint32_t bucket = 0; bucket < OutputBuckets; ++bucket)
|
||||
{
|
||||
const auto src = weight * OutputBuckets + bucket;
|
||||
const auto dst = bucket * L1Weights + weight;
|
||||
|
||||
quantised->l1Weights[dst] = quantise<OutputQ>(raw->l1Weights[src]);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (std::uint32_t weight = 0; weight < L1Weights * OutputBuckets; ++weight)
|
||||
{
|
||||
quantised->l1Weights[weight] = quantise<OutputQ>(raw->l1Weights[weight]);
|
||||
}
|
||||
}
|
||||
|
||||
for (std::uint32_t bias = 0; bias < OutputBuckets; ++bias)
|
||||
{
|
||||
quantised->l1Biases[bias] = quantise<L1Q * OutputQ>(raw->l1Biases[bias]);
|
||||
}
|
||||
|
||||
{
|
||||
std::ofstream out{OutFile, std::ios::binary};
|
||||
|
||||
if (!out.write(reinterpret_cast<const char *>(quantised.get()), sizeof(QuantisedNetwork)))
|
||||
{
|
||||
std::cerr << "failed to write transposed network" << std::endl;
|
||||
std::cerr << std::strerror(errno) << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if constexpr (PaddingBlockSize > 1)
|
||||
{
|
||||
if (const auto padding = pad<PaddingBlockSize>(sizeof(QuantisedNetwork)) - sizeof(QuantisedNetwork);
|
||||
padding != 0)
|
||||
{
|
||||
static const std::array<std::byte, PaddingBlockSize> empty{};
|
||||
|
||||
if (!out.write(reinterpret_cast<const char *>(empty.data()), static_cast<std::streamsize>(padding)))
|
||||
{
|
||||
std::cerr << "failed to write padding" << std::endl;
|
||||
std::cerr << std::strerror(errno) << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
29
Makefile
29
Makefile
@@ -4,30 +4,37 @@
|
||||
|
||||
CC := clang
|
||||
EXE := bin/heimdall
|
||||
EVALFILE := ../hofud-v2.bin
|
||||
EVALFILE := ../networks/files/mistilteinn.bin
|
||||
NET_NAME := $(notdir $(EVALFILE))
|
||||
LD := ld
|
||||
SRCDIR := src
|
||||
LFLAGS := -flto -fuse-ld=$(LD)
|
||||
NFLAGS := --cc:$(CC) --mm:atomicArc -d:useMalloc -o:$(EXE) -d:evalFile=$(EVALFILE)
|
||||
NFLAGS := --panics:on --cc:$(CC) --mm:atomicArc -d:useMalloc -o:$(EXE) --passL:"$(LFLAGS)" -d:evalFile=$(EVALFILE)
|
||||
CFLAGS := -flto -static
|
||||
|
||||
CFLAGS_MODERN := -flto -mtune=haswell -march=haswell -static
|
||||
NFLAGS_MODERN := $(NFLAGS) -d:danger --passC:"$(CFLAGS_MODERN)" --passL:"$(LFLAGS)" -d:simd -d:avx2
|
||||
CFLAGS_MODERN := $(CFLAGS) -mtune=haswell -march=haswell
|
||||
NFLAGS_MODERN := $(NFLAGS) -d:danger --passC:"$(CFLAGS_MODERN)" -d:simd -d:avx2
|
||||
|
||||
CFLAGS_NATIVE:= -flto -mtune=native -march=native -static
|
||||
NFLAGS_NATIVE := $(NFLAGS) -d:danger --passC:"$(CFLAGS_MODERN)" --passL:"$(LFLAGS)" -d:simd -d:avx2
|
||||
CFLAGS_NATIVE:= $(CFLAGS) -mtune=native -march=native
|
||||
NFLAGS_NATIVE := $(NFLAGS) -d:danger --passC:"$(CFLAGS_NATIVE)" -d:simd -d:avx2
|
||||
|
||||
CFLAGS_LEGACY := -flto -mtune=core2 -march=core2 -static
|
||||
NFLAGS_LEGACY := $(NFLAGS) -d:danger --passC:"$(CFLAGS_LEGACY)" --passL:"$(LFLAGS)" -u:simd -u:avx2
|
||||
CFLAGS_LEGACY := $(CFLAGS) -mtune=core2 -march=core2
|
||||
NFLAGS_LEGACY := $(NFLAGS) -d:danger --passC:"$(CFLAGS_LEGACY)" -u:simd -u:avx2
|
||||
|
||||
|
||||
deps:
|
||||
nimble install -d
|
||||
|
||||
modern: deps
|
||||
net:
|
||||
git submodule update --init --recursive
|
||||
cd networks && git fetch origin && git checkout FETCH_HEAD
|
||||
git lfs fetch --include files/$(NET_NAME)
|
||||
|
||||
modern: deps net
|
||||
nim c $(NFLAGS_MODERN) $(SRCDIR)/heimdall.nim
|
||||
|
||||
legacy: deps
|
||||
legacy: deps net
|
||||
nim c $(NFLAGS_LEGACY) $(SRCDIR)/heimdall.nim
|
||||
|
||||
native: deps
|
||||
native: deps net
|
||||
nim c $(NFLAGS_NATIVE) $(SRCDIR)/heimdall.nim
|
||||
@@ -24,6 +24,8 @@ architecture the compile was done on (and is what you want for releases/sharing
|
||||
Or you can grab the latest version from the [releases](https://git.nocturn9x.space/nocturn9x/heimdall/releases) page
|
||||
|
||||
|
||||
**Note**: Unless you know what you're doing and how `nim.cfg` works, you probably don't want to build Heimdall using nimble. Just use the Makefile
|
||||
|
||||
**P.S.**: If you want to install Heimdall on your system you can also run `nimble install` (making sure that nimble's
|
||||
own binary directory is in your system's path), which will build the same executable that a bare `make` would (no
|
||||
legacy/generic installation support as of now)
|
||||
|
||||
1
networks
Submodule
1
networks
Submodule
Submodule networks added at 62058cd6f5
5
nim.cfg
5
nim.cfg
@@ -17,8 +17,9 @@
|
||||
#-d:mimalloc
|
||||
#-d:enableTuning
|
||||
#-d:pinSearchThreads
|
||||
-d:evalFile="../hofud-v2.bin"
|
||||
-d:hlSize=1280
|
||||
-d:evalFile="../mistilteinn.bin"
|
||||
-d:hlSize=1536
|
||||
-d:ftSize=704
|
||||
-d:inputBuckets=16
|
||||
-d:evalNormalizeFactor=298
|
||||
--panics:on
|
||||
|
||||
@@ -74,7 +74,13 @@ proc newEvalState*(networkPath: string = ""): EvalState =
|
||||
func feature(perspective: PieceColor, color: PieceColor, piece: PieceKind, square: Square): int =
|
||||
## Constructs a feature from the given perspective for a piece
|
||||
## of the given type and color on the given square
|
||||
let colorIndex = if perspective == color: 0 else: 1
|
||||
|
||||
# We always use index 0 for the king because we do something called merged kings:
|
||||
# due to the layout of our input buckets (i.e. they don't span more than 2x2 squares),
|
||||
# it is impossible for two kings to be in the same bucket at any given time, so we can
|
||||
# save a bunch of space (about 8%) by only accounting for one king per bucket, shrinking
|
||||
# the size of the feature transformer from 768 inputs to 704
|
||||
let colorIndex = if (perspective == color or piece == King): 0 else: 1
|
||||
let pieceIndex = piece.int
|
||||
let squareIndex = if perspective == White: int(square.flipRank()) else: int(square)
|
||||
|
||||
|
||||
@@ -58,10 +58,13 @@ proc loadNet*(stream: Stream): Network =
|
||||
for i in 0..<HL_SIZE:
|
||||
result.ft.bias[i] = stream.readInt16().toLittleEndian()
|
||||
|
||||
for i in 0..<(HL_SIZE * 2):
|
||||
for j in 0..<NUM_OUTPUT_BUCKETS:
|
||||
# We transpose the output layer for faster CPU inference
|
||||
result.l1.weight[j][i] = stream.readInt16().toLittleEndian()
|
||||
for i in 0..<NUM_OUTPUT_BUCKETS:
|
||||
for j in 0..<(HL_SIZE * 2):
|
||||
# Note to self: bullet already transposes the weights for us
|
||||
# so we don't need to do it manually (this is done because it
|
||||
# allows for faster CPU inference). Just something to keep in
|
||||
# mind!
|
||||
result.l1.weight[i][j] = stream.readInt16().toLittleEndian()
|
||||
|
||||
for i in 0..<NUM_OUTPUT_BUCKETS:
|
||||
result.l1.bias[i] = stream.readInt16().toLittleEndian()
|
||||
|
||||
BIN
src/heimdall/resources/misc/lichess-big3-resolved.book.zstd
(Stored with Git LFS)
BIN
src/heimdall/resources/misc/lichess-big3-resolved.book.zstd
(Stored with Git LFS)
Binary file not shown.
@@ -574,9 +574,6 @@ proc log(self: SearchManager, depth, variation: int, line: array[256, Move], bes
|
||||
proc shouldStop*(self: var SearchManager, inTree=true): bool {.inline.} =
|
||||
## Returns whether searching should
|
||||
## stop
|
||||
if self.expired:
|
||||
# Search limit has expired before
|
||||
return true
|
||||
if self.cancelled():
|
||||
# Search has been cancelled!
|
||||
return true
|
||||
@@ -590,7 +587,6 @@ proc shouldStop*(self: var SearchManager, inTree=true): bool {.inline.} =
|
||||
self.expired = result
|
||||
|
||||
|
||||
|
||||
proc getReduction(self: SearchManager, move: Move, depth, ply, moveNumber: int, isPV: static bool, improving, cutNode: bool): int {.inline.} =
|
||||
## Returns the amount a search depth should be reduced to
|
||||
let moveCount = when isPV: self.parameters.lmrMoveNumber.pv else: self.parameters.lmrMoveNumber.nonpv
|
||||
|
||||
Reference in New Issue
Block a user