From b8d7382b7a26fe082f62ba5ee16b2073c6c7b466 Mon Sep 17 00:00:00 2001
From: James Lu <jamesluhz@gmail.com>
Date: Tue, 10 Sep 2019 13:22:46 +0800
Subject: [PATCH] Ceph: nodes always in OrchestrationStatusStarting

When osd nodes do not finish the prepare jobs, the user remove the
nodes manully and it causes nodes always is
'OrchestrationStatusStarting' status.
Try to remove them after completeProvisionTimeout.

Signed-off-by: James Lu <jamesluhz@gmail.com>
---
 pkg/operator/ceph/cluster/osd/status.go | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pkg/operator/ceph/cluster/osd/status.go b/pkg/operator/ceph/cluster/osd/status.go
index f8d57ff55a4e..4e1aed968898 100644
--- a/pkg/operator/ceph/cluster/osd/status.go
+++ b/pkg/operator/ceph/cluster/osd/status.go
@@ -235,6 +235,13 @@ func (c *Cluster) completeOSDsForAllNodes(config *provisionConfig, configOSDs bo
 				currentTimeoutMinutes++
 				if currentTimeoutMinutes == timeoutMinutes {
 					config.addError("timed out waiting for %d nodes: %+v", remainingNodes.Count(), remainingNodes)
+					//start to remove remainingNodes waiting timeout.
+					for remainingNode := range remainingNodes.Iter() {
+						clearNodeName := k8sutil.TruncateNodeName(orchestrationStatusMapName, remainingNode)
+						if err := c.kv.ClearStore(clearNodeName); err != nil {
+							config.addError("failed to clear node %s status with name %s. %+v", remainingNode, clearNodeName, err)
+						}
+					}
 					return false
 				}
 				logger.Infof("waiting on orchestration status update from %d remaining nodes", remainingNodes.Count())