From b8d7382b7a26fe082f62ba5ee16b2073c6c7b466 Mon Sep 17 00:00:00 2001 From: James Lu Date: Tue, 10 Sep 2019 13:22:46 +0800 Subject: [PATCH] Ceph: nodes always in OrchestrationStatusStarting When osd nodes do not finish the prepare jobs, the user remove the nodes manully and it causes nodes always is 'OrchestrationStatusStarting' status. Try to remove them after completeProvisionTimeout. Signed-off-by: James Lu --- pkg/operator/ceph/cluster/osd/status.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pkg/operator/ceph/cluster/osd/status.go b/pkg/operator/ceph/cluster/osd/status.go index f8d57ff55a4e..4e1aed968898 100644 --- a/pkg/operator/ceph/cluster/osd/status.go +++ b/pkg/operator/ceph/cluster/osd/status.go @@ -235,6 +235,13 @@ func (c *Cluster) completeOSDsForAllNodes(config *provisionConfig, configOSDs bo currentTimeoutMinutes++ if currentTimeoutMinutes == timeoutMinutes { config.addError("timed out waiting for %d nodes: %+v", remainingNodes.Count(), remainingNodes) + //start to remove remainingNodes waiting timeout. + for remainingNode := range remainingNodes.Iter() { + clearNodeName := k8sutil.TruncateNodeName(orchestrationStatusMapName, remainingNode) + if err := c.kv.ClearStore(clearNodeName); err != nil { + config.addError("failed to clear node %s status with name %s. %+v", remainingNode, clearNodeName, err) + } + } return false } logger.Infof("waiting on orchestration status update from %d remaining nodes", remainingNodes.Count())