From 5460da828551961ba93f31dd898e6a2dabf17020 Mon Sep 17 00:00:00 2001 From: Viswanath Kuchibhotla Date: Mon, 6 Jan 2025 18:13:46 +0530 Subject: [PATCH] Optimize DFS while marking connected components (#14022) --- .../java/org/apache/lucene/util/hnsw/HnswUtil.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswUtil.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswUtil.java index a4b1d0c7c536..7028a81808d6 100644 --- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswUtil.java @@ -29,6 +29,7 @@ import org.apache.lucene.index.FilterLeafReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.internal.hppc.IntHashSet; import org.apache.lucene.util.FixedBitSet; /** Utilities for use in tests involving HNSW graphs */ @@ -105,7 +106,9 @@ static List components( } else { entryPoint = connectedNodes.nextSetBit(0); } - components.add(new Component(entryPoint, total)); + if (total > 0) { + components.add(new Component(entryPoint, total)); + } if (level == 0) { int nextClear = nextClearBit(connectedNodes, 0); while (nextClear != NO_MORE_DOCS) { @@ -163,6 +166,10 @@ private static Component markRooted( throws IOException { // Start at entry point and search all nodes on this level // System.out.println("markRooted level=" + level + " entryPoint=" + entryPoint); + if (connectedNodes.get(entryPoint)) { + return new Component(entryPoint, 0); + } + IntHashSet nodesInStack = new IntHashSet(); Deque stack = new ArrayDeque<>(); stack.push(entryPoint); int count = 0; @@ -178,7 +185,10 @@ private static Component markRooted( int friendCount = 0; while ((friendOrd = hnswGraph.nextNeighbor()) != NO_MORE_DOCS) { ++friendCount; - stack.push(friendOrd); + if (connectedNodes.get(friendOrd) == false && nodesInStack.contains(friendOrd) == false) { + stack.push(friendOrd); + nodesInStack.add(friendOrd); + } } if (friendCount < maxConn && notFullyConnected != null) { notFullyConnected.set(node);