ChinChangYang · ChinChangYang · Aug 14, 2024 · Jul 20, 2024 · Jul 24, 2024 · Jul 24, 2024
diff --git a/cpp/configs/gtp_human5k_example.cfg b/cpp/configs/gtp_human5k_example.cfg
@@ -39,8 +39,9 @@ resignMinMovesPerBoardArea = 0.4
 # Note: unless you change other parameters too, by default increasing visits won't do much.
 # If humanSLChosenMoveProp = 1.0 AND humanSLChosenMovePiklLambda is a large number,
 # then KataGo's normal search is ignored except for possibly choosing whether to pass/resign,
-# so more visits will have no effect on play. Still, having some visits is good for
-# ensuring good pass/resign judgment.
+# so more visits will have no effect on play. Only the raw model is used for move selection.
+# Still, having some visits is good for ensuring good pass/resign judgment, this is what the 40 visits
+# are used for unless you change other parameters to make use of them for move choice too.
 maxVisits = 40
 numSearchThreads = 1
 lagBuffer = 1.0
@@ -64,6 +65,10 @@ delayMoveMax = 10.0
 #   Warning: for rank differences > 9 ranks, or drastically mis-matched to the handicap used in the game,
 #   this may be out of distribution due to lack of training data and the model might not behave well! Experiment with care.
 # proyear_{YEAR from 1800 to 2023} - imitate historical pros or insei from given year.
+# NOTE: for pros or high-dan ranks the raw human SL model is NOT expected to match the *strength* of the players imitated
+# because it does no search! Go AI reaching high-dan/pro ranks virtually always requires search.
+# To increase strength and make use of the visits in the search, adjust values like humanSLChosenMovePiklLambda and temperature. See:
+# https://github.com/lightvector/KataGo/blob/master/docs/Analysis_Engine.md#how-to-get-stronger-human-style-play
 humanSLProfile = preaz_5k
 
 # The probability that we should play a HUMAN-like move, rather than playing KataGo's move.

diff --git a/cpp/configs/gtp_human9d_search_example.cfg b/cpp/configs/gtp_human9d_search_example.cfg
@@ -0,0 +1,117 @@
+
+# This is an example config for configuring KataGo to attempt to imitate a 9d player and then
+# further improving the strength (to possibly modestly superhuman) using KataGo's evaluations and search.
+
+# By contrast, gtp_human5k_example.cfg uses only the raw neural net. Raw neural nets like the
+# HumanSL net without search do NOT play accurately enough to reach top human levels.
+# If you were to take gtp_human5k_example.cfg and swap out preaz_5k -> preaz_9d,
+# despite *attempting* to imitate a 9d player, it would not actually achieve that strength.
+
+# Due to the use of search, this config plays significantly stronger, and depending on how
+# you adjust visits and parameters and what `-model` you use for KataGo's model, can probably
+# reach 9d strength or higher.
+
+# Using the search greatly reduces the "humanness" of the play in exchange for strength,
+# but still retains noticeable bias from the human SL policy and may still play somewhat
+# "human-style" openings and moves.
+
+# Running with this config requires giving a human SL model b18c384nbt-humanv0.bin.gz
+# on the command line such as:
+# ./katago gtp -config gtp_human5k_example.cfg -model your_favorite_normal_model_for_katago.bin.gz -human-model b18c384nbt-humanv0.bin.gz
+# You can obtain the human model at https://github.com/lightvector/KataGo/releases/tag/v1.15.0
+
+# You can compare the config parameters here with gtp_human5k_example.cfg and see the differences
+# and how they were achieved.
+# gtp_human5k_example.cfg also has a bit more explanation on many of the parameters.
+
+logDir = gtp_logs
+logAllGTPCommunication = true
+logSearchInfo = true
+logSearchInfoForChosenMove = false
+logToStderr = false
+
+rules = japanese
+
+# Adjusted resignation settings compared to gtp_human5k_example.cfg, to resign a bit more easily.
+allowResignation = true
+resignThreshold = -0.98  # was -0.99 in gtp_human5k_example.cfg
+resignConsecTurns = 10   # was 20 in gtp_human5k_example.cfg
+resignMinScoreDifference = 20      # was 40 in gtp_human5k_example.cfg
+resignMinMovesPerBoardArea = 0.40
+
+# maxVisits: With a config like this, more visits *will* make it stronger and fewer visits will make it weaker.
+# But strength will NOT scale indefinitely upward with visits unlike KataGo's normal config without human SL.
+# This is due to the strong bias of the human SL network.
+# You can reduce this number if you are on weaker hardware. It may reduce strength a bit but will still
+# provide a huge strength boost over using the humanSL network alone as in gtp_human5k_example.cfg
+# It's NOT recommended to reduce this below about 30-40 visits, since that will result in too few explorations of a variety of moves.
+# If you want to adjust strength, see humanSLChosenMovePiklLambda instead.
+maxVisits = 400   # 40 in gtp_human5k_example.cfg.
+
+# Having more than one thread speeds up search when visits are larger.
+# Make sure the number of threads is much smaller than the number of visits, however.
+numSearchThreads = 8  # 1 in gtp_human5k_example.cfg.
+lagBuffer = 1.0
+
+# Rough scale in seconds to randomly delay moving, so as not to respond instantly.
+# Some moves will delay longer, some moves will delay a little less.
+delayMoveScale = 2.0
+delayMoveMax = 10.0
+
+# Imitate human amateur 9d players (roughly based on ~KGS ranks)
+humanSLProfile = preaz_9d    # was preaz_5k in gtp_human5k_example.cfg
+humanSLChosenMoveProp = 1.0
+humanSLChosenMoveIgnorePass = true
+
+# When a move starts to lose more than 0.08 utility (several percent winrate), downweight it.
+# Increase this number to reduce the strength and use the human SL policy more and KataGo's evaluations less.
+# Decrease this number a little more to improve strength even further and play less human-like.
+#   (although below 0.02 you probably are better off going back to a normal KataGo config and scaling visits).
+# Since this uses KataGo's judgment, even at large values and with a weak-ranked humanSLProfile
+# this may still produce a very strong player.
+# To calibrate for some but less strength gain, it will take experimentation.
+# In addition to increasing this value a lot, e.g. (0.2, 0.5, 1.0, 2.0,...)
+# you can also try using old/small KataGo nets (e.g. b6c96, b10c128), reducing visits (though reducing below about 30-40
+# is NOT recommended), or using the humanSLModel itself as the main "-model".
+humanSLChosenMovePiklLambda = 0.08  # was 100000000 in gtp_human5k_example.cfg.
+
+# Spend 80% of visits to explore humanSL moves to ensure they get evaluations to use with humanSLChosenMovePiklLambda
+humanSLRootExploreProbWeightless = 0.8  # was 0 in gtp_human5k_example.cfg.
+humanSLRootExploreProbWeightful = 0.0
+humanSLPlaExploreProbWeightless = 0.0
+humanSLPlaExploreProbWeightful = 0.0
+humanSLOppExploreProbWeightless = 0.0
+humanSLOppExploreProbWeightful = 0.0
+
+humanSLCpuctExploration = 0.50
+# Explore moves very widely according to the human SL policy even if they lose up to about 2.0 utility.
+# This is not the largest this value can go, you increase it further to do even less filtering based on winrates
+# as far as move exploration goes.
+humanSLCpuctPermanent = 2.0  # was 0.2 in gtp_human5k_example.cfg.
+
+# Reduced temperature settings - reduce the chance of picking unlikely low policy or low-value moves.
+chosenMoveTemperatureEarly = 0.70  # was 0.85 in gtp_human5k_example.cfg.
+chosenMoveTemperature = 0.25       # was 0.70 in gtp_human5k_example.cfg.
+chosenMoveTemperatureHalflife = 30         # was 80 in gtp_human5k_example.cfg.
+chosenMoveTemperatureOnlyBelowProb = 1.0   # was 0.01 in gtp_human5k_example.cfg.
+chosenMoveSubtract = 0
+chosenMovePrune = 0
+
+# Since we're doing more search, increase size of neural net cache a bit for performance.
+nnCacheSizePowerOfTwo = 20    # was 17 in gtp_human5k_example.cfg.
+nnMutexPoolSizePowerOfTwo = 14
+
+ignorePreRootHistory = false
+analysisIgnorePreRootHistory = false
+
+rootNumSymmetriesToSample = 2
+useLcbForSelection = false
+
+# Make score utility count a bit more
+winLossUtilityFactor = 1.0
+staticScoreUtilityFactor = 0.5    # was 0.3 in gtp_human5k_example.cfg.
+dynamicScoreUtilityFactor = 0.5   # was 0.0 in gtp_human5k_example.cfg.
+
+useUncertainty = false
+subtreeValueBiasFactor = 0.0
+useNoisePruning = false
diff --git a/cpp/main.cpp b/cpp/main.cpp
@@ -210,11 +210,11 @@ int main(int argc, const char* const* argv) {
 
 
 string Version::getKataGoVersion() {
-  return string("1.15.1-coreml3");
+  return string("1.15.3");
 }
 
 string Version::getKataGoVersionForHelp() {
-  return string("KataGo v1.15.1-coreml3");
+  return string("KataGo v1.15.3");
 }
 
 string Version::getKataGoVersionFullInfo() {

diff --git a/cpp/neuralnet/cudabackend.cpp b/cpp/neuralnet/cudabackend.cpp
@@ -2237,7 +2237,7 @@ struct Buffers {
     CUDA_ERR("Buffers",cudaMalloc(reinterpret_cast<void**>(&inputGlobalBufFloat), inputGlobalBufBytesFloat));
     CUDA_ERR("Buffers",cudaMalloc(&inputGlobalBuf, inputGlobalBufBytes));
     if(m.numInputMetaChannels > 0) {
-      CUDA_ERR("Buffers",cudaMalloc(&inputMetaBufFloat, inputMetaBufBytesFloat));
+      CUDA_ERR("Buffers",cudaMalloc(reinterpret_cast<void**>(&inputMetaBufFloat), inputMetaBufBytesFloat));
       CUDA_ERR("Buffers",cudaMalloc(&inputMetaBuf, inputMetaBufBytes));
     }
     else {

diff --git a/cpp/neuralnet/dummybackend.cpp b/cpp/neuralnet/dummybackend.cpp
@@ -63,6 +63,11 @@ int NeuralNet::getModelVersion(const LoadedModel* loadedModel) {
   throw StringError("Dummy neural net backend: NeuralNet::getModelVersion unimplemented");
 }
 
+int NeuralNet::getNumInputMetaChannels(const LoadedModel* loadedModel) {
+  (void)loadedModel;
+  throw StringError("Dummy neural net backend: NeuralNet::getNumInputMetaChannels unimplemented");
+}
+
 Rules NeuralNet::getSupportedRules(const LoadedModel* loadedModel, const Rules& desiredRules, bool& supported) {
   (void)loadedModel;
   (void)desiredRules;

diff --git a/cpp/search/search.cpp b/cpp/search/search.cpp
@@ -41,6 +41,7 @@ SearchThread::SearchThread(int tIdx, const Search& search)
    history(search.rootHistory),
    graphHash(search.rootGraphHash),
    graphPath(),
+   shouldCountPlayout(false),
    rand(makeSeed(search,tIdx)),
    nnResultBuf(),
    statsBuf(),
@@ -1102,7 +1103,11 @@ bool Search::runSinglePlayout(SearchThread& thread, double upperBoundVisitsLeft)
   //Store this value, used for futile-visit pruning this thread's root children selections.
   thread.upperBoundVisitsLeft = upperBoundVisitsLeft;
 
+  //Prep this value, playoutDescend will set it to true if we do have a playout
+  thread.shouldCountPlayout = false;
+
   bool finishedPlayout = playoutDescend(thread,*rootNode,true);
+  (void)finishedPlayout;
 
   //Restore thread state back to the root state
   thread.pla = rootPla;
@@ -1111,7 +1116,7 @@ bool Search::runSinglePlayout(SearchThread& thread, double upperBoundVisitsLeft)
   thread.graphHash = rootGraphHash;
   thread.graphPath.clear();
 
-  return finishedPlayout;
+  return thread.shouldCountPlayout;
 }
 
 bool Search::playoutDescend(
@@ -1136,6 +1141,7 @@ bool Search::playoutDescend(
       double lead = 0.0;
       double weight = (searchParams.useUncertainty && nnEvaluator->supportsShorttermError()) ? searchParams.uncertaintyMaxWeight : 1.0;
       addLeafValue(node, winLossValue, noResultValue, scoreMean, scoreMeanSq, lead, weight, true, false);
+      thread.shouldCountPlayout = true;
       return true;
     }
     else {
@@ -1146,6 +1152,7 @@ bool Search::playoutDescend(
       double lead = scoreMean;
       double weight = (searchParams.useUncertainty && nnEvaluator->supportsShorttermError()) ? searchParams.uncertaintyMaxWeight : 1.0;
       addLeafValue(node, winLossValue, noResultValue, scoreMean, scoreMeanSq, lead, weight, true, false);
+      thread.shouldCountPlayout = true;
       return true;
     }
   }
@@ -1171,6 +1178,7 @@ bool Search::playoutDescend(
       //Perform the nn evaluation and finish!
       node.initializeChildren();
       node.state.store(SearchNode::STATE_EXPANDED0, std::memory_order_seq_cst);
+      thread.shouldCountPlayout = true;
       return true;
     }
   }
@@ -1221,13 +1229,15 @@ bool Search::playoutDescend(
       //Return TRUE though, so that the parent path we traversed increments its edge visits.
       //We want the search to continue as best it can, so we increment visits so search will still make progress
       //even if this keeps happening in some really bad transposition or something.
+      thread.shouldCountPlayout = true;
       return true;
     }
 
     if(bestChildIdx <= -1) {
       //This might happen if all moves have been forbidden. The node will just get stuck counting visits without expanding
       //and we won't do any search.
       addCurrentNNOutputAsLeafValue(node,false);
+      thread.shouldCountPlayout = true;
       return true;
     }
 
@@ -1295,6 +1305,7 @@ bool Search::playoutDescend(
       if(countEdgeVisit && maybeCatchUpEdgeVisits(thread, node, child, nodeState, bestChildIdx)) {
         updateStatsAfterPlayout(node,thread,isRoot);
         child->virtualLosses.fetch_add(-1,std::memory_order_release);
+        thread.shouldCountPlayout = true;
         return true;
       }
     }
@@ -1312,6 +1323,7 @@ bool Search::playoutDescend(
       if(countEdgeVisit && maybeCatchUpEdgeVisits(thread, node, child, nodeState, bestChildIdx)) {
         updateStatsAfterPlayout(node,thread,isRoot);
         child->virtualLosses.fetch_add(-1,std::memory_order_release);
+        thread.shouldCountPlayout = true;
         return true;
       }
 
@@ -1339,6 +1351,7 @@ bool Search::playoutDescend(
         SearchNodeChildrenReference children = node.getChildren(nodeState);
         children[bestChildIdx].addEdgeVisits(1);
         updateStatsAfterPlayout(node,thread,isRoot);
+        thread.shouldCountPlayout = true;
       }
       child->virtualLosses.fetch_add(-1,std::memory_order_release);
       // If we didn't count an edge visit, none of the parents need to update either.

diff --git a/cpp/search/search.h b/cpp/search/search.h
@@ -50,6 +50,10 @@ struct SearchThread {
   //The path we trace down the graph as we do a playout
   std::unordered_set<SearchNode*> graphPath;
 
+  //Tracks whether this thread did something that "should" be counted as a playout
+  //for the purpose of playout limits
+  bool shouldCountPlayout;
+
   Rand rand;
 
   NNResultBuf nnResultBuf;
@@ -449,6 +453,7 @@ struct Search {
 
   // LCB helpers
   void getSelfUtilityLCBAndRadius(const SearchNode& parent, const SearchNode* child, int64_t edgeVisits, Loc moveLoc, double& lcbBuf, double& radiusBuf) const;
+  void getSelfUtilityLCBAndRadiusZeroVisits(double& lcbBuf, double& radiusBuf) const;
 
   //----------------------------------------------------------------------------------------
   // Mirror handling logic

diff --git a/cpp/search/searchexplorehelpers.cpp b/cpp/search/searchexplorehelpers.cpp
@@ -364,8 +364,7 @@ void Search::selectBestChildToDescend(
 
   bool useHumanSL = false;
   if(humanEvaluator != NULL &&
-     (searchParams.humanSLProfile.initialized || !humanEvaluator->requiresSGFMetadata()) &&
-     totalChildWeight > 0
+     (searchParams.humanSLProfile.initialized || !humanEvaluator->requiresSGFMetadata())
   ) {
     const NNOutput* humanOutput = node.getHumanOutput();
     if(humanOutput != NULL) {
@@ -385,7 +384,7 @@ void Search::selectBestChildToDescend(
       }
 
       double totalHumanProb = weightlessProb + weightfulProb;
-      if(totalHumanProb > 0.0 && totalChildWeight > 1.0 / totalHumanProb) {
+      if(totalHumanProb > 0.0) {
         double r = thread.rand.nextDouble();
         if(r < weightlessProb) {
           useHumanSL = true;

diff --git a/cpp/search/searchhelpers.cpp b/cpp/search/searchhelpers.cpp
@@ -544,6 +544,14 @@ double Search::interpolateEarly(double halflife, double earlyValue, double value
   return value + (earlyValue - value) * pow(0.5, halflives);
 }
 
+void Search::getSelfUtilityLCBAndRadiusZeroVisits(double& lcbBuf, double& radiusBuf) const {
+  // Max radius of the entire utility range
+  double utilityRangeRadius = searchParams.winLossUtilityFactor + searchParams.staticScoreUtilityFactor + searchParams.dynamicScoreUtilityFactor;
+  radiusBuf = 2.0 * utilityRangeRadius * searchParams.lcbStdevs;
+  lcbBuf = -radiusBuf;
+  return;
+}
+
 void Search::getSelfUtilityLCBAndRadius(const SearchNode& parent, const SearchNode* child, int64_t edgeVisits, Loc moveLoc, double& lcbBuf, double& radiusBuf) const {
   int64_t childVisits = child->stats.visits.load(std::memory_order_acquire);
   double scoreMeanAvg = child->stats.scoreMeanAvg.load(std::memory_order_acquire);

diff --git a/cpp/search/searchresults.cpp b/cpp/search/searchresults.cpp
@@ -295,8 +295,21 @@ bool Search::getPlaySelectionValues(
       maxValue = playSelectionValues[i];
   }
 
-  if(maxValue <= 1e-50)
-    return false;
+  if(maxValue <= 1e-50) {
+    //If we reached this point we have nonzero many children but the children are all weightless.
+    //In that case, at least set each one to be weighted by its policy.
+    for(int i = 0; i<numChildren; i++) {
+      playSelectionValues[i] = std::max(0.0,(double)policyProbs[getPos(locs[i])]);
+    }
+    //Recompute max
+    for(int i = 0; i<numChildren; i++) {
+      if(playSelectionValues[i] > maxValue)
+        maxValue = playSelectionValues[i];
+    }
+    if(maxValue <= 1e-50) {
+      return false;
+    }
+  }
 
   //Sanity check - if somehow we had more than this, something must have overflowed or gone wrong
   assert(maxValue < 1e40);
@@ -999,10 +1012,22 @@ void Search::getAnalysisData(
       return;
     assert(numChildren <= NNPos::MAX_NN_POLICY_SIZE);
 
-    bool alwaysComputeLcb = true;
-    bool success = getPlaySelectionValues(node, scratchLocs, scratchValues, NULL, 1.0, false, alwaysComputeLcb, false, lcbBuf, radiusBuf);
-    if(!success)
-      return;
+    const bool alwaysComputeLcb = true;
+    bool gotPlaySelectionValues = getPlaySelectionValues(node, scratchLocs, scratchValues, NULL, 1.0, false, alwaysComputeLcb, false, lcbBuf, radiusBuf);
+
+    // No play selection values - then fill with values consistent with all 0 visits.
+    // We want it to be possible to get analysis data even when all visits are weightless.
+    if(!gotPlaySelectionValues) {
+      for(int i = 0; i<numChildren; i++) {
+        scratchLocs.push_back(childrenMoveLocs[i]);
+        scratchValues.push_back(0.0);
+      }
+      double lcbBufValue;
+      double radiusBufValue;
+      getSelfUtilityLCBAndRadiusZeroVisits(lcbBufValue,radiusBufValue);
+      std::fill(lcbBuf,lcbBuf+numChildren,lcbBufValue);
+      std::fill(radiusBuf,radiusBuf+numChildren,radiusBufValue);
+    }
 
     const NNOutput* nnOutput = node.getNNOutput();
     const float* policyProbsFromNN = nnOutput->getPolicyProbsMaybeNoised();

diff --git a/cpp/tests/results/analysis/badoverride.txt.log b/cpp/tests/results/analysis/badoverride.txt.log
@@ -22,7 +22,7 @@ rootSymmetryPruning = false
 trtUseFP16 = false
 
 : Analysis Engine starting...
-: KataGo v1.15.1
+: KataGo v1.15.3
 : nnRandSeed0 = analysisTest
 : After dedups: nnModelFile0 = tests/models/g170-b6c96-s175395328-d26788732.bin.gz useFP16 false useNHWC false
 : Initializing neural net buffer to be size 19 * 19 allowing smaller boards

diff --git a/cpp/tests/results/analysis/badoverride.txt.stderr b/cpp/tests/results/analysis/badoverride.txt.stderr
@@ -22,7 +22,7 @@ rootSymmetryPruning = false
 trtUseFP16 = false
 
 : Analysis Engine starting...
-: KataGo v1.15.1
+: KataGo v1.15.3
 : nnRandSeed0 = analysisTest
 : After dedups: nnModelFile0 = tests/models/g170-b6c96-s175395328-d26788732.bin.gz useFP16 false useNHWC false
 : Initializing neural net buffer to be size 19 * 19 allowing smaller boards

diff --git a/cpp/tests/results/analysis/basic.txt.log b/cpp/tests/results/analysis/basic.txt.log
@@ -22,7 +22,7 @@ rootSymmetryPruning = false
 trtUseFP16 = false
 
 : Analysis Engine starting...
-: KataGo v1.15.1
+: KataGo v1.15.3
 : nnRandSeed0 = analysisTest
 : After dedups: nnModelFile0 = tests/models/g170-b6c96-s175395328-d26788732.bin.gz useFP16 false useNHWC false
 : Initializing neural net buffer to be size 19 * 19 allowing smaller boards