-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsolver_cpu_opt.inl
362 lines (318 loc) · 14.4 KB
/
solver_cpu_opt.inl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
// Copyright (c) Michael M. Magruder (https://github.com/mikemag)
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#include <algorithm>
#include <cassert>
#include <iostream>
#include <iterator>
#include <limits>
#include <ranges>
#include <span>
#include "utils.hpp"
// CPU Implementation w/ Some Optimizations
//
// This is a version that runs on the CPU and is structured very close to the CUDA version. It's much faster than the
// reference impl, and includes the most common gameplay shortcuts.
//
// This plays all games at once, playing a turn on each game and forming a set of next guesses for the next turn.
// Scoring all games subdivides those games with the same score into disjoint regions. We form an id for each region
// made up of a growing list of scores, and sorting the list of games by region id groups the regions together. Then we
// can find a single best guess for each region and play that for all games in the region.
//
// Region ids are chosen to ensure that a stable sort keeps games within each region in their original lexical order.
// Many algorithms pick the first lexical game on ties.
//
// Pseudocode for the algorithm:
// start: all games get the same initial guess
//
// while any games have guesses to play:
// score all games against their next guess, if any, which was given per-region
// append their score to their region id
// if no games have guesses, then we're done
// stable sort all games by region id
// get start and run length for each region by id
// for reach region:
// games with a win at the end of their region id get no new guess
// otherwise, find next guess using the region itself as the possible solutions set PS
template <typename SolverConfig>
std::chrono::nanoseconds SolverCPUFaster<SolverConfig>::playAllGames(uint32_t packedInitialGuess) {
constexpr static bool LOG = SolverConfig::LOG;
auto startTime = chrono::high_resolution_clock::now();
constexpr static int MAX_SUPPORTED_TURNS = 16;
vector<CodewordT>& allCodewords = CodewordT::getAllCodewords();
// Starting case: region ids are empty
regionIDs = vector<RegionIDT>(allCodewords.size());
for (int i = 0; i < regionIDs.size(); i++) regionIDs[i].index = i;
// Space for PS
vector<CodewordT> ps;
ps.reserve(allCodewords.size());
// Space for a next moves vector for each possible turn
nextMovesList.resize(MAX_SUPPORTED_TURNS, vector<CodewordT>(allCodewords.size()));
auto nextMovesIter = nextMovesList.begin();
std::fill(nextMovesIter->begin(), nextMovesIter->end(), CodewordT(packedInitialGuess));
uint depth = 0;
auto regionIDsEnd = regionIDs.end(); // The set of active games contracts as we go
while (true) {
depth++;
if (LOG) printf("\nDepth = %d\n", depth);
// Score all games against their next guess, if any, which was given per-region. Append the score to the game's
// region id.
for (auto rit = regionIDs.begin(); rit != regionIDsEnd; ++rit) {
auto& regionID = *rit;
auto s = allCodewords[regionID.index].score((*nextMovesIter)[regionID.index]);
regionID.append(s.result, depth);
}
counters[find_counter(counterDescs, "Scores")] += regionIDsEnd - regionIDs.begin();
// Advance to a fresh next moves vector
++nextMovesIter;
// Push won games to the end and focus on the remaining games
regionIDsEnd = std::partition(regionIDs.begin(), regionIDsEnd, [](const RegionIDT& r) { return !r.isGameOver(); });
if (LOG) printf("Number of games left: %ld\n", regionIDsEnd - regionIDs.begin());
// If no games need new moves, then we're done
if (regionIDsEnd - regionIDs.begin() == 0) break;
// Sort all games by region id
std::stable_sort(regionIDs.begin(), regionIDsEnd,
[&](const RegionIDT& a, const RegionIDT& b) { return a.value < b.value; });
// Get start and run length for each region
struct RegionBounds {
RegionIDT regionID;
uint32_t start;
uint32_t length;
};
vector<RegionBounds> regions{};
regions.push_back({regionIDs[0], 0, 0});
for (uint32_t i = 0; i < regionIDsEnd - regionIDs.begin(); i++) {
if (regionIDs[i].value == regions.back().regionID.value) {
regions.back().length++;
} else {
regions.push_back({regionIDs[i], i, 1});
}
}
if (LOG) printf("Number of regions: %lu\n", regions.size());
// For reach region, find next guess using the region itself as the possible solutions set PS
for (auto& region : regions) {
CodewordT ng;
if (region.length <= 2) {
ng = allCodewords[region.regionID.index];
} else {
ps.clear();
for (int j = region.start; j < region.start + region.length; j++) {
ps.push_back(allCodewords[regionIDs[j].index]);
}
bool fdoptSuccessful = false;
if (ps.size() < SolverConfig::TOTAL_SCORES) {
fdoptSuccessful = shortcutSmallSets(ps, ng);
counters[find_counter(counterDescs, "Scores")] += ps.size() * ps.size();
}
if (!fdoptSuccessful) {
vector<CodewordT> usedCodewords;
for (auto& previousMoves : nextMovesList) {
usedCodewords.push_back(previousMoves[region.regionID.index]);
}
if constexpr (applySymOpt) {
auto reducedAC = getReducedAC(allCodewords, ps, usedCodewords, depth);
ng = nextGuess(reducedAC, ps, usedCodewords);
assert(ng == nextGuess(allCodewords, ps, usedCodewords));
} else {
ng = nextGuess(allCodewords, ps, usedCodewords);
}
}
}
for (int j = region.start; j < region.start + region.length; j++) {
(*nextMovesIter)[regionIDs[j].index] = ng;
}
}
if (depth == MAX_SUPPORTED_TURNS) {
printf("\nMax depth reached, impl is broken!\n");
break;
}
}
auto endTime = chrono::high_resolution_clock::now();
// Post-process for stats
for (int i = 0; i < regionIDs.size(); i++) {
auto c = regionIDs[i].countMovesPacked();
this->maxDepth = max<size_t>(this->maxDepth, c);
this->totalTurns += c;
}
return endTime - startTime;
}
// These algorithms all rely on splitting the remaining possible guesses into groups or subsets based on their scores
// vs each other.
//
// The core of these comes from the method described by Knuth in [1], which subsets the possibilities by score,
// comparing all remaining codewords (i.e., not yet guessed) to all current possible guesses. This is O(n^2) in the
// number of total codewords, though the size of the possible solutions set does decrease drastically with each play. Of
// course, n = c^PIN_COUNT, so this is very, very expensive for larger games. Only the sizes of the subsets are
// retained; the actual codewords in each subset are unnecessary.
//
// Given these subset sizes, other algorithms can proceed to come up with a rank for each possible guess, and the guess
// with the maximum rank will be used, favoring guesses which are still in the possible solution set on ties.
//
// There's a decent summary of Knuth's overall algorithm on Wikipedia, too:
// https://en.wikipedia.org/wiki/Mastermind_(board_game)
//
// References:
// [1] D.E. Knuth. The computer as Master Mind. Journal of Recreational Mathematics, 9(1):1–6, 1976.
// https://www.cs.uni.edu/~wallingf/teaching/cs3530/resources/knuth-mastermind.pdf
//
// [2] Geoffroy Ville, An Optimal Mastermind (4,7) Strategy and More Results in the Expected Case, March
// 2013, arXiv:1305.1010 [cs.GT].
//
// [3] Barteld Kooi, Yet another mastermind Strategy. International Computer Games Association Journal,
// 28(1):13–20, 2005. https://www.researchgate.net/publication/30485793_Yet_another_Mastermind_strategy
template <typename SolverConfig>
typename SolverConfig::CodewordT SolverCPUFaster<SolverConfig>::nextGuess(const vector<CodewordT>& allCodewords,
const vector<CodewordT>& possibleSolutions,
const vector<CodewordT>& usedCodewords) {
using ALGO = typename SolverConfig::ALGO;
CodewordT bestGuess;
size_t bestRank = 0;
bool bestIsPossibleSolution = false;
int subsetSizes[SolverConfig::MAX_SCORE_SLOTS];
fill(begin(subsetSizes), end(subsetSizes), 0);
for (const auto& g : allCodewords) {
bool isPossibleSolution = false;
for (const auto& ps : possibleSolutions) {
Score r = g.score(ps);
ALGO::accumulateSubsetSize(subsetSizes[r.result]);
if (r == CodewordT::WINNING_SCORE) {
isPossibleSolution = true; // Remember if this guess is in the set of possible solutions
}
}
counters[find_counter(counterDescs, "Scores")] += possibleSolutions.size();
typename ALGO::RankingAccumulatorType rankingAccumulator{};
int totalSubsets = 0;
for (auto& s : subsetSizes) {
if (s > 0) {
ALGO::accumulateRanking(rankingAccumulator, s, possibleSolutions.size());
totalSubsets++;
}
s = 0;
}
// Any fully discriminating guess is an instant winner. Part of the shortcutSmallSets opt below.
if (totalSubsets == possibleSolutions.size()) {
return g;
}
uint32_t rank = ALGO::computeRank(rankingAccumulator, possibleSolutions.size());
if (rank > bestRank || (!bestIsPossibleSolution && isPossibleSolution && rank == bestRank)) {
if (find(usedCodewords.cbegin(), usedCodewords.cend(), g) != usedCodewords.end()) {
continue; // Ignore codewords we've already used
}
bestRank = rank;
bestGuess = g;
bestIsPossibleSolution = isPossibleSolution;
}
}
return bestGuess;
}
// Optimization from [2]: if the possible solution set is smaller than the number of possible scores, and if one
// codeword can fully discriminate all of the possible solutions (i.e., it produces a different score for each one),
// then play it right away since it will tell us the winner.
//
// This is an interesting shortcut. It doesn't change the results of any of the subsetting algorithms at all: average
// turns, max turns, max secret, and the full histograms all remain precisely the same. What does change is the number
// of scores computed, and the runtime.
template <typename SolverConfig>
bool SolverCPUFaster<SolverConfig>::shortcutSmallSets(const vector<CodewordT>& possibleSolutions,
CodewordT& nextGuess) {
bool subsetSizes[SolverConfig::MAX_SCORE_SLOTS];
for (const auto& psa : possibleSolutions) {
fill(begin(subsetSizes), end(subsetSizes), 0);
for (const auto& psb : possibleSolutions) {
Score r = psa.score(psb);
subsetSizes[r.result] = true;
}
int totalSubsets = 0;
for (auto s : subsetSizes) {
if (s) {
totalSubsets++;
}
}
if (totalSubsets == possibleSolutions.size()) {
nextGuess = psa;
return true;
}
}
return false;
}
// Optimization for Symmetry and Case Equivalence
//
// Adapted from Ville[2], section 5.4. See docs/Symmetry_and_Case_Equivalence.ipynb for full details.
// Produce a reduced AC which only contains representative codewords given the zero and free colors in PS.
template <typename SolverConfig>
vector<typename SolverConfig::CodewordT> SolverCPUFaster<SolverConfig>::getReducedAC(
const vector<CodewordT>& allCodewords, const vector<CodewordT>& possibleSolutions,
const vector<CodewordT>& usedCodewords, uint depth) {
// We can form the Zeros set by looking at which colors are present in PS. PS represents all codewords which are
// consistent with the guesses so far, and therefore contains the colors which could still be in the final solution.
// If a color is absent, that color will not be part of the solution, and is therefore part of the Zeros set.
typename CodewordT::CT usedColors = 0;
for (auto& cw : possibleSolutions) {
usedColors |= cw.packedColors();
}
// We can form the Frees set by looking at all colors played to reach this PS.
typename CodewordT::CT playedColors = 0;
for (const auto& cw : usedCodewords) {
playedColors |= cw.packedColors();
}
// The subsetting algorithms expect the Zero and Free sets to be in lexicographical order, so transformed codewords
// are as well.
uint32_t isZero = 0;
uint32_t isFree = 0;
for (uint8_t color = 1; color <= SolverConfig::COLOR_COUNT; color++) {
if ((usedColors & 0xFF) == 0) {
isZero |= (1 << color);
} else if ((playedColors & 0xFF) == 0) {
isFree |= (1 << color);
}
usedColors >>= 8;
playedColors >>= 8;
}
// Sets of size one are useless, as any transformation does nothing.
int zeroSize = popcount(isZero);
int freeSize = popcount(isFree);
if (zeroSize == 1) {
isZero = 0;
zeroSize = 0;
}
if (freeSize == 1) {
isFree = 0;
freeSize = 0;
}
if (zeroSize == 0 && freeSize == 0) {
return allCodewords;
}
// Caching these on playedColors and usedColors. Hit rate is reasonable, but not as high as you'd initially guess.
ACrCacheKey key{isZero, isFree};
if (symACCache.contains(key)) {
return symACCache[key];
}
// The representative codeword we'll use is the lexically first one in each set of equal transformed codewords. That's
// the codeword which doesn't change during transformation, and we can just use it straightaway and toss the rest. We
// don't actually have to form any new codewords, try to sort and unique them, etc. (which, frankly, was my first
// idea.)
vector<CodewordT> reducedAC;
std::copy_if(allCodewords.begin(), allCodewords.end(), std::back_inserter(reducedAC),
[&](auto cw) { return cw.isClassRepresentative(isZero, isFree); });
// Verify AC_r size with the cache.
auto& ACrCache = getACrCache();
int k = SolverConfig::PIN_COUNT * 1000000 + SolverConfig::COLOR_COUNT * 10000 + zeroSize * 100 + freeSize;
if (ACrCache.contains(k)) {
if (ACrCache[k] != reducedAC.size()) {
cout << "ACrCache: incorrect entry, " << k << "=" << ACrCache[k] << " vs actual=" << reducedAC.size() << endl;
}
} else {
cout << "ACrCache: missing entry for key " << k << endl;
}
symACCache[key] = reducedAC;
return reducedAC;
}
template <typename SolverConfig>
void SolverCPUFaster<SolverConfig>::dump() {
Solver::dump<SolverConfig, CodewordT>(regionIDs);
}
template <typename SolverConfig>
vector<uint32_t> SolverCPUFaster<SolverConfig>::getGuessesForGame(uint32_t packedCodeword) {
return Solver::getGuessesForGame<SolverCPUFaster, SolverConfig, CodewordT>(packedCodeword, regionIDs);
}