ref: 2b768fac5b4bf58983c4873af9217a480f0e9df9
parent: 86cc53a590f0a8c72259dfd4bdba7012cf5c49ea
author: zamfofex <zamfofex@twdb.moe>
date: Fri Oct 25 01:41:56 EDT 2024
switch to MCTS
--- a/README.md
+++ b/README.md
@@ -27,7 +27,6 @@
- [compiling on other systems](#porting-moonfish)
- using moonfish’s tools
- [using “play” and “analyse”](#using-play-and-analyse)
- - [using “book”](#using-book) (for adding simple opening book support to bots without it by wrapping them)
- [using “chat”](#using-chat) and [using “lichess”](#using-lichess) (for integrating with IRC and Lichess)
- [helping improve moonfish!](#contributing-to-moonfish)
- [license](#license)
@@ -36,7 +35,7 @@
---
- simple evaluation based on PSTs
-- alpha/beta pruning search
+- MCTS (Monte Carlo tree search)
- cute custom UCI TUIs
- custom Lichess integration
- custom IRC integration
@@ -47,7 +46,7 @@
These are things that might be resolved eventually.
- the TUIs do not let you underpromote
-- no support for `go infinite`, `go mate`, or `go nodes`
+- no support for `go depth`, `go infinite`, `go mate`, or `go nodes`
- no FEN validation (may lead to potential exploits)
download
@@ -81,13 +80,9 @@
Conversely, you may also invoke your compiler by hand. (Feel free to replace `cc` with your compiler of choice.)
~~~
-cc -ansi -O3 -pthread -D_POSIX_C_SOURCE=199309L -o moonfish chess.c search.c main.c
+cc -ansi -O3 -D_POSIX_C_SOURCE=199309L -o moonfish chess.c search.c main.c -lm
~~~
-Note: If your C implementation doesn’t support pthreads, but supports C11 threads, you may pass in `-Dmoonfish_c11_threads`.
-
-Note: If your C implementation doesn’t support threads at all, you may pass in `-Dmoonfish_no_threads`.
-
using “play” and “analyse”
---
@@ -183,20 +178,19 @@
compiling on Windows
---
-Clone the repository, then open `moonfish.vcxproj` with Visual Studio. Support for [C11 `<threads.h>`][C11 threads in VS] is required. Only the UCI bot will be compiled, not its tools. (You may use a GUI like [cutechess] to try it.)
+Clone the repository, then open `moonfish.vcxproj` with Visual Studio. Only the UCI bot will be compiled, not its tools. (You may use a GUI like [cutechess] to try it.)
Note that [MinGW] compilation is also supported.
[cutechess]: <https://github.com/cutechess/cutechess>
-[C11 threads in VS]: <https://devblogs.microsoft.com/cppblog/c11-threads-in-visual-studio-2022-version-17-8-preview-2/>
[MinGW]: <https://mingw-w64.org>
porting moonfish
---
-The only pieces of functionality the moonfish depends on that are not specified entirely in C89 are pthreads (POSIX threads) and `clock_gettime`. POSIX threads are not required, and may be substituted by C11 threads or even disabled altogether with compile-time macros. (See [“compiling from source”](#compile-from-source)) However, `clock_gettime` is required and cannot be replaced with macros.
+The only piece of functionality the moonfish depends on that is not specified entirely in C89 is `clock_gettime`.
-Porting moonfish to a different platform should be a matter of simply providing a “mostly C89‐compliant” environment alongside `clock_gettime` and pthreads. Of course, moonfish doesn’t make use of *all* C89 features, so it is not necessary to have features that it doesn’t use. [Compiling on 9front](#compiling-on-9front), for example, works through NPE, which provides something close enough to C89 for moonfish to work.
+Porting moonfish to a different platform should be a matter of simply providing a “mostly C89‐compliant” environment alongside `clock_gettime`. Of course, moonfish doesn’t make use of *all* C89 features, so it is not necessary to have features that it doesn’t use. [Compiling on 9front](#compiling-on-9front), for example, works through NPE, which provides something close enough to C89 for moonfish to work.
license
---
--- a/main.c
+++ b/main.c
@@ -15,12 +15,11 @@
char *arg, *arg0;
struct moonfish_move move;
char name[6];
- long int our_time, their_time, *xtime;
- long int score;
- int depth;
+ long int our_time, their_time, *xtime, time;
struct moonfish_chess chess;
char *end;
- long int time;
+ long int count;
+ struct moonfish_options options;
if (argc > 1) {fprintf(stderr, "usage: %s\n", argv[0]);
@@ -49,7 +48,6 @@
our_time = -1;
their_time = -1;
- depth = -1;
time = -1;
for (;;) {@@ -82,27 +80,6 @@
}
}
- if (!strcmp(arg0, "depth")) {-
- arg = strtok(NULL, "\r\n\t ");
-
- if (arg == NULL) {- fprintf(stderr, "%s: malformed 'go depth' command\n", argv[0]);
- return 1;
- }
-
- errno = 0;
- depth = strtol(arg, &end, 10);
- if (errno) {- perror(argv[0]);
- return 1;
- }
- if (*end != 0 || depth < 0 || depth > 1000) {- fprintf(stderr, "%s: malformed depth in 'go' command\n", argv[0]);
- return 1;
- }
- }
-
if (!strcmp(arg0, "movetime")) {arg = strtok(NULL, "\r\n\t ");
@@ -114,11 +91,7 @@
errno = 0;
time = strtol(arg, &end, 10);
- if (errno) {- perror(argv[0]);
- return 1;
- }
- if (*end != 0 || time < 0) {+ if (errno || *end != 0 || time < 0) {fprintf(stderr, "%s: malformed move time in 'go' command\n", argv[0]);
return 1;
}
@@ -125,20 +98,16 @@
}
}
- if (our_time < 0) our_time = 0;
- if (their_time < 0) their_time = 0;
-
- if (depth >= 0) {- score = moonfish_best_move_depth(&chess, &move, depth);
+ if (our_time < 0 && time < 0) {+ fprintf(stderr, "%s: warning: missing constraints in 'go' command\n", argv[0]);
}
- else {- if (time >= 0) score = moonfish_best_move_time(&chess, &move, time);
- else score = moonfish_best_move_clock(&chess, &move, our_time, their_time);
- }
- if (depth < 0) depth = 1;
- printf("info depth %d score cp %ld\n", depth, score);+ options.max_time = time;
+ options.our_time = our_time;
+ count = moonfish_best_move(&chess, &move, &options);
+ printf("info nodes %ld\n", count);+
moonfish_to_uci(&chess, &move, name);
printf("bestmove %s\n", name);continue;
@@ -150,7 +119,7 @@
arg = strtok(NULL, "\r\n\t ");
if (arg == NULL) {- fprintf(stderr, "incomplete 'position' command\n");
+ fprintf(stderr, "%s: incomplete 'position' command\n", argv[0]);
return 1;
}
@@ -158,7 +127,7 @@
arg = strtok(NULL, "\r\n");
if (arg == NULL) {- fprintf(stderr, "incomplete 'position fen' command\n");
+ fprintf(stderr, "%s: incomplete 'position fen' command\n", argv[0]);
return 1;
}
moonfish_from_fen(&chess, arg);
@@ -179,7 +148,7 @@
moonfish_chess(&chess);
}
else {- fprintf(stderr, "malformed 'position' command\n");
+ fprintf(stderr, "%s: malformed 'position' command\n", argv[0]);
return 1;
}
}
@@ -197,6 +166,7 @@
continue;
}
+
if (!strcmp(arg, "uci")) { printf("id name moonfish\n"); printf("id author zamfofex\n");--- a/makefile
+++ b/makefile
@@ -13,10 +13,10 @@
all: moonfish play lichess analyse chat
moonfish: moonfish.h chess.c search.c main.c
- $(cc) $(filter %.c,$^) -o $@ -pthread -D_POSIX_C_SOURCE=199309L
+ $(cc) $(filter %.c,$^) -D_POSIX_C_SOURCE=199309L -o $@ -lm
%: moonfish.h tools/tools.h tools/utils.c chess.c tools/%.c
- $(cc) $(filter %.c,$^) -o $@ $(cflags) -D_POSIX_C_SOURCE=200809L
+ $(cc) $(filter %.c,$^) -D_POSIX_C_SOURCE=200809L -o $@ $(cflags)
play analyse: cflags := -pthread
analyse: tools/pgn.c
@@ -24,7 +24,7 @@
lichess: cflags := -ltls -lssl -lcrypto -lcjson
chat: cflags := -ltls -lssl -lcrypto
learn: search.c
-learn: cflags := -Dmoonfish_no_threads -Dmoonfish_learn
+learn: cflags := -Dmoonfish_learn -lm
clean:
git clean -fdx
--- a/moonfish.h
+++ b/moonfish.h
@@ -82,13 +82,6 @@
#define moonfish_empty 0
#define moonfish_outside 0xFF
-/* for tuning the PST */
-#ifdef moonfish_learn
-#define moonfish_t double
-#else
-#define moonfish_t long int
-#endif
-
/* represents a chess position */
struct moonfish_chess {@@ -118,8 +111,14 @@
unsigned char from, to;
};
+/* represents options for the search */
+struct moonfish_options {+ long int max_time;
+ long int our_time;
+};
+
/* the PST */
-extern moonfish_t moonfish_values[];
+extern double moonfish_values[];
/* initialises the position and sets up the initial position */
/* note: this must be called *first* even if you want to use "moonfish_from_fen" */
@@ -134,21 +133,14 @@
/* this will return the number of moves generated */
int moonfish_moves(struct moonfish_chess *chess, struct moonfish_move *moves, unsigned char from);
-/* tries to find the best move in the given position in at most the given time */
-/* the move is stored in the "move" pointer, and the score for the position is returned */
+/* tries to find the best move in the given position with the given options */
+/* the move is stored in the "move" pointer */
/* the move found is the best for the player whose turn it is on the given position */
-/* likewise, the score returned is from the perspective of the player whose turn it is */
-int moonfish_best_move_time(struct moonfish_chess *chess, struct moonfish_move *move, long int time);
+/* this will return the number of positions that were looked into */
+long int moonfish_best_move(struct moonfish_chess *chess, struct moonfish_move *move, struct moonfish_options *options);
-/* same as above, but tries to optimise the time spent searching for the given time left on each player's clock */
-int moonfish_best_move_clock(struct moonfish_chess *chess, struct moonfish_move *move, long int our_time, long int their_time);
-
-/* tries to find the best move on the given position with a given depth */
-/* similar to "moonfish_best_move_time" and "moonfish_best_move_clock" */
-int moonfish_best_move_depth(struct moonfish_chess *chess, struct moonfish_move *move, int depth);
-
/* returns the depth-zero score for the given position */
-moonfish_t moonfish_score(struct moonfish_chess *chess);
+double moonfish_score(struct moonfish_chess *chess);
/* creates a move from UCI notation */
/* the move is stored in "move" */
--- a/search.c
+++ b/search.c
@@ -4,68 +4,17 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <limits.h>
+#include <math.h>
#ifdef _WIN32
-
#include <windows.h>
-#ifndef __MINGW32__
-#define moonfish_c11_threads
-#endif
-
#else
-
#include <time.h>
-
#endif
-#ifdef moonfish_no_threads
-
-#define thrd_t int
-#define thrd_create(thread, fn, arg) ((*fn)(arg), 0)
-#define thrd_join(thread, ret) 0
-#define moonfish_result_t int
-#define moonfish_value 0
-#define mtx_t int
-#define thrd_success 0
-
-#elif defined(moonfish_c11_threads)
-
-#include <threads.h>
-#define moonfish_result_t int
-#define moonfish_value 0
-
-#else
-
-#include <pthread.h>
-#define thrd_t pthread_t
-#define thrd_create(thread, fn, arg) pthread_create(thread, NULL, fn, arg)
-#define thrd_join pthread_join
-#define moonfish_result_t void *
-#define moonfish_value NULL
-#define mtx_t pthread_mutex_t
-#define thrd_success 0
-
-#endif
-
#include "moonfish.h"
-#define moonfish_omega 0x2000
-
-struct moonfish_thread {- thrd_t thread;
- struct moonfish_analysis *analysis;
- struct moonfish_move move;
- int score;
-};
-
-struct moonfish_analysis {- struct moonfish_chess chess;
- struct moonfish_thread threads[256];
- int score;
- int depth;
- long int time;
-};
-
#ifdef _WIN32
static long int moonfish_clock(void)
@@ -80,7 +29,7 @@
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts)) {- perror(NULL);
+ perror("clock_gettime");exit(1);
}
@@ -89,15 +38,24 @@
#endif
-moonfish_t moonfish_values[] = {0,0,0,0,138,180,159,139,137,167,147,150,135,159,159,167,170,190,176,191,222,260,267,253,313,370,387,366,0,0,0,0,311,363,377,386,366,390,408,413,382,416,436,433,416,448,459,462,431,459,483,483,435,479,491,505,402,418,469,477,307,390,403,431,431,422,411,426,452,467,461,456,466,470,482,483,473,475,483,493,470,485,492,508,489,483,496,505,441,465,476,483,442,451,462,465,653,686,713,726,660,684,687,698,680,703,700,711,709,726,728,729,736,755,757,757,760,781,785,777,780,772,790,785,762,764,759,775,1282,1267,1261,1274,1284,1289,1295,1297,1290,1300,1303,1301,1323,1338,1325,1325,1344,1328,1366,1361,1328,1368,1379,1392,1326,1324,1363,1384,1286,1306,1348,1351,-4,5,-51,-42,-9,-11,-30,-58,-37,-26,-36,-36,-44,-16,-17,-16,-11,14,9,-14,19,50,36,15,26,86,41,36,2,42,42,34};+struct moonfish_node {+ struct moonfish_move move;
+ struct moonfish_node *parent;
+ struct moonfish_node *children;
+ double score;
+ long int visits;
+ int count;
+};
-moonfish_t moonfish_score(struct moonfish_chess *chess)
+double moonfish_values[] = {0,0,0,0,138,180,159,139,137,167,147,150,135,159,159,167,170,190,176,191,222,260,267,253,313,370,387,366,0,0,0,0,311,363,377,386,366,390,408,413,382,416,436,433,416,448,459,462,431,459,483,483,435,479,491,505,402,418,469,477,307,390,403,431,431,422,411,426,452,467,461,456,466,470,482,483,473,475,483,493,470,485,492,508,489,483,496,505,441,465,476,483,442,451,462,465,653,686,713,726,660,684,687,698,680,703,700,711,709,726,728,729,736,755,757,757,760,781,785,777,780,772,790,785,762,764,759,775,1282,1267,1261,1274,1284,1289,1295,1297,1290,1300,1303,1301,1323,1338,1325,1325,1344,1328,1366,1361,1328,1368,1379,1392,1326,1324,1363,1384,1286,1306,1348,1351,-4,5,-51,-42,-9,-11,-30,-58,-37,-26,-36,-36,-44,-16,-17,-16,-11,14,9,-14,19,50,36,15,26,86,41,36,2,42,42,34};+
+double moonfish_score(struct moonfish_chess *chess)
{int x, y;
int x1, y1;
int from;
unsigned char type, color;
- moonfish_t score;
+ double score;
score = 0;
@@ -118,166 +76,152 @@
return score;
}
-static int moonfish_search(struct moonfish_thread *thread, struct moonfish_chess *chess, int alpha, int beta, int depth, long int t0, long int time)
+static void moonfish_discard(struct moonfish_node *node)
{- int score;
int i;
- int x, y;
- int count;
- long int t1, c;
+ if (node->count == 0) return;
+ for (i = 0 ; i < node->count ; i++) moonfish_discard(node->children + i);
+ free(node->children);
+}
+
+static void moonfish_expand(struct moonfish_node *node)
+{struct moonfish_move moves[32];
+ int x, y;
+ int count, i;
- if (depth < 0) {- score = moonfish_score(chess);
- if (!chess->white) score *= -1;
- if (score >= beta) return beta;
- if (score < alpha - 100) return alpha;
- if (score > alpha) alpha = score;
- }
- else {- if (thread->analysis->time >= 0 && time < 5) {- depth = 0;
- }
- }
+ node->count = 0;
+ node->children = NULL;
for (y = 0 ; y < 8 ; y++) { for (x = 0 ; x < 8 ; x++) {- count = moonfish_moves(chess, moves, (x + 1) + (y + 2) * 10);
+ count = moonfish_moves(&node->move.chess, moves, (x + 1) + (y + 2) * 10);
+ if (count == 0) continue;
+
+ node->children = realloc(node->children, (node->count + count) * sizeof *node->children);
+ if (node->children == NULL) {+ perror("realloc");+ exit(1);
+ }
+
for (i = 0 ; i < count ; i++) {-
if (!moonfish_validate(&moves[i].chess)) continue;
-
- if (depth < 0) {- if (chess->board[moves[i].to] == moonfish_empty) {- if (moves[i].chess.board[moves[i].to] == chess->board[moves[i].from]) continue;
- }
- }
- t1 = moonfish_clock();
- c = 2 * time * i / count - t1 + t0;
-
- score = -moonfish_search(thread, &moves[i].chess, -beta, -alpha, depth - 1, t1, time / count + c);
-
- if (score >= beta) return beta;
- if (score > alpha) alpha = score;
+ node->children[node->count].move = moves[i];
+ node->children[node->count].parent = node;
+ node->children[node->count].score = 0;
+ node->children[node->count].visits = 0;
+ node->children[node->count].count = 0;
+ node->count++;
}
}
}
- return alpha;
+ if (node->count == 0 && node->children != NULL) {+ free(node->children);
+ }
}
-static moonfish_result_t moonfish_start_search(void *data)
+static double moonfish_confidence(struct moonfish_node *node)
{- struct moonfish_thread *thread;
-
- thread = data;
-
- thread->score = -moonfish_search(
- thread, &thread->move.chess,
- -moonfish_omega, moonfish_omega,
- thread->analysis->depth, moonfish_clock(), thread->analysis->time
- );
-
- return moonfish_value;
+ if (node->visits == 0) return 1e9;
+ return node->score / node->visits + 1.25 * sqrt(log(node->parent->visits) / node->visits);
}
-static void moonfish_iteration(struct moonfish_analysis *analysis, struct moonfish_move *best_move)
+static struct moonfish_node *moonfish_select(struct moonfish_node *node)
{- int x, y;
- struct moonfish_move moves[32];
- int i, j, count;
+ struct moonfish_node *next;
+ double max_confidence, confidence;
+ int i;
-#ifdef moonfish_no_threads
-
- int total;
- int invalid_count;
-
- if (analysis->time >= 0) {+ while (node->count > 0) {- total = 0;
+ next = NULL;
+ max_confidence = -1e9;
- for (y = 0 ; y < 8 ; y++) {- for (x = 0 ; x < 8 ; x++) {- invalid_count = 0;
- count = moonfish_moves(&analysis->chess, moves, (x + 1) + (y + 2) * 10);
- for (i = 0 ; i < count ; i++) {- if (!moonfish_validate(&moves[i].chess)) invalid_count++;
- }
- total += count - invalid_count;
+ for (i = 0 ; i < node->count ; i++) {+ confidence = moonfish_confidence(node->children + i);
+ if (confidence > max_confidence) {+ next = node->children + i;
+ max_confidence = confidence;
}
}
- analysis->time /= total;
- }
-
-#endif
-
- j = 0;
-
- for (y = 0 ; y < 8 ; y++) {- for (x = 0 ; x < 8 ; x++) {-
- count = moonfish_moves(&analysis->chess, moves, (x + 1) + (y + 2) * 10);
- for (i = 0 ; i < count ; i++) {-
- if (!moonfish_validate(&moves[i].chess)) continue;
-
- analysis->threads[j].analysis = analysis;
- analysis->threads[j].move = moves[i];
-
- if (thrd_create(&analysis->threads[j].thread, &moonfish_start_search, analysis->threads + j) != thrd_success) {- fprintf(stderr, "error creating thread\n");
- exit(1);
- }
-
- j++;
- }
- }
+ node = next;
}
- analysis->score = -2 * moonfish_omega;
- for (i = 0 ; i < j ; i++) {-
- if (thrd_join(analysis->threads[i].thread, NULL) != thrd_success) {- fprintf(stderr, "error joining thread\n");
- exit(1);
- }
-
- if (analysis->threads[i].score > analysis->score) {- *best_move = analysis->threads[i].move;
- analysis->score = analysis->threads[i].score;
- }
- }
+ return node;
}
-int moonfish_best_move_depth(struct moonfish_chess *chess, struct moonfish_move *best_move, int depth)
+static void moonfish_propagate(struct moonfish_node *node, double score)
{- static struct moonfish_analysis analysis;
-
- analysis.chess = *chess;
- analysis.depth = depth;
- analysis.time = -1;
- moonfish_iteration(&analysis, best_move);
- return analysis.score;
+ while (node != NULL) {+ node->visits++;
+ node->score += score;
+ node = node->parent;
+ score = 1 - score;
+ }
}
-int moonfish_best_move_time(struct moonfish_chess *chess, struct moonfish_move *best_move, long int time)
+static void moonfish_search(struct moonfish_node *node, int count)
{- static struct moonfish_analysis analysis;
+ int i;
+ struct moonfish_node *leaf;
+ double score;
- analysis.chess = *chess;
- time -= 125;
- if (time < 10) time = 10;
- analysis.depth = 16;
- analysis.time = time;
- moonfish_iteration(&analysis, best_move);
- return analysis.score;
+ for (i = 0 ; i < count ; i++) {+ leaf = moonfish_select(node);
+ if (moonfish_finished(&leaf->move.chess)) {+ if (moonfish_checkmate(&leaf->move.chess)) moonfish_propagate(leaf, 0.5);
+ else moonfish_propagate(leaf, 0.5);
+ continue;
+ }
+ moonfish_expand(leaf);
+ score = moonfish_score(&leaf->move.chess);
+ if (!leaf->move.chess.white) score *= -1;
+ moonfish_propagate(leaf, 1 / (1 + pow(10, score / 400)));
+ }
}
-int moonfish_best_move_clock(struct moonfish_chess *chess, struct moonfish_move *best_move, long int our_time, long int their_time)
+long int moonfish_best_move(struct moonfish_chess *chess, struct moonfish_move *best_move, struct moonfish_options *options)
{- (void) their_time;
- return moonfish_best_move_time(chess, best_move, our_time / 16);
+ static struct moonfish_node node;
+
+ struct moonfish_node *best_node;
+ long int time, time0;
+ int i;
+ long int best_visits;
+
+ time0 = moonfish_clock();
+ time = LONG_MAX;
+
+ if (options->max_time >= 0 && time > options->max_time) time = options->max_time;
+ if (options->our_time >= 0 && time > options->our_time / 16) time = options->our_time / 16;
+ time -= time / 32 + 125;
+
+ node.move.chess = *chess;
+ node.parent = NULL;
+ node.count = 0;
+ node.score = 0;
+ node.visits = 0;
+
+ moonfish_search(&node, 0x800);
+ while (moonfish_clock() - time0 < time) {+ moonfish_search(&node, 0x2000);
+ }
+
+ best_visits = -1;
+ best_node = NULL;
+
+ for (i = 0 ; i < node.count ; i++) {+ if (node.children[i].visits > best_visits) {+ best_node = node.children + i;
+ best_visits = best_node->visits;
+ }
+ }
+
+ *best_move = best_node->move;
+ moonfish_discard(&node);
+ return node.visits;
}
--
⑨