-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathplayout.c
165 lines (137 loc) · 4.17 KB
/
playout.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#define DEBUG
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "board.h"
#include "debug.h"
#include "engine.h"
#include "move.h"
#include "ownermap.h"
#include "playout.h"
/* Whether to set global debug level to the same as the playout
* has, in case it is different. This can make sure e.g. tactical
* reading produces proper level of debug prints during simulations.
* But it is safe to enable this only in single-threaded instances! */
//#define DEBUGL_BY_PLAYOUT
#define PLDEBUGL(n) DEBUGL_(policy->debug_level, n)
coord_t
play_random_move(struct playout_setup *setup,
struct board *b, enum stone color,
struct playout_policy *policy)
{
coord_t coord = pass;
if (setup->prepolicy_hook) {
coord = setup->prepolicy_hook(policy, setup, b, color);
// fprintf(stderr, "prehook: %s\n", coord2sstr(coord, b));
}
if (is_pass(coord)) {
coord = policy->choose(policy, setup, b, color);
// fprintf(stderr, "policy: %s\n", coord2sstr(coord, b));
}
if (is_pass(coord) && setup->postpolicy_hook) {
coord = setup->postpolicy_hook(policy, setup, b, color);
// fprintf(stderr, "posthook: %s\n", coord2sstr(coord, b));
}
if (is_pass(coord)) {
play_random:
/* Defer to uniformly random move choice. */
/* This must never happen if the policy is tracking
* internal board state, obviously. */
assert(!policy->setboard || policy->setboard_randomok);
board_play_random(b, color, &coord, (ppr_permit) policy->permit, policy);
} else {
struct move m;
m.coord = coord; m.color = color;
if (board_play(b, &m) < 0) {
if (PLDEBUGL(4)) {
fprintf(stderr, "Pre-picked move %d,%d is ILLEGAL:\n",
coord_x(coord, b), coord_y(coord, b));
board_print(b, stderr);
}
goto play_random;
}
}
return coord;
}
int
play_random_game(struct playout_setup *setup,
struct board *b, enum stone starting_color,
struct playout_amafmap *amafmap,
struct board_ownermap *ownermap,
struct playout_policy *policy)
{
assert(setup && policy);
int gamelen = setup->gamelen - b->moves;
if (policy->setboard)
policy->setboard(policy, b);
#ifdef DEBUGL_BY_PLAYOUT
int debug_level_orig = debug_level;
debug_level = policy->debug_level;
#endif
enum stone color = starting_color;
int passes = is_pass(b->last_move.coord) && b->moves > 0;
while (gamelen-- && passes < 2) {
coord_t coord = play_random_move(setup, b, color, policy);
#if 0
/* For UCT, superko test here is downright harmful since
* in superko-likely situation we throw away literally
* 95% of our playouts; UCT will deal with this fine by
* itself. */
if (unlikely(b->superko_violation)) {
/* We ignore superko violations that are suicides. These
* are common only at the end of the game and are
* rather harmless. (They will not go through as a root
* move anyway.) */
if (group_at(b, coord)) {
if (DEBUGL(3)) {
fprintf(stderr, "Superko fun at %d,%d in\n", coord_x(coord, b), coord_y(coord, b));
if (DEBUGL(4))
board_print(b, stderr);
}
return 0;
} else {
if (DEBUGL(6)) {
fprintf(stderr, "Ignoring superko at %d,%d in\n", coord_x(coord, b), coord_y(coord, b));
board_print(b, stderr);
}
b->superko_violation = false;
}
}
#endif
if (PLDEBUGL(7)) {
fprintf(stderr, "%s %s\n", stone2str(color), coord2sstr(coord, b));
if (PLDEBUGL(8))
board_print(b, stderr);
}
if (unlikely(is_pass(coord))) {
passes++;
} else {
passes = 0;
}
if (amafmap) {
assert(amafmap->gamelen < MAX_GAMELEN);
amafmap->is_ko_capture[amafmap->gamelen] = board_playing_ko_threat(b);
amafmap->game[amafmap->gamelen++] = coord;
}
if (setup->mercymin && abs(b->captures[S_BLACK] - b->captures[S_WHITE]) > setup->mercymin)
break;
color = stone_other(color);
}
floating_t score = board_fast_score(b);
int result = (starting_color == S_WHITE ? score * 2 : - (score * 2));
if (DEBUGL(6)) {
fprintf(stderr, "Random playout result: %d (W %f)\n", result, score);
if (DEBUGL(7))
board_print(b, stderr);
}
if (ownermap)
board_ownermap_fill(ownermap, b);
if (b->ps)
free(b->ps);
#ifdef DEBUGL_BY_PLAYOUT
debug_level = debug_level_orig;
#endif
return result;
}