launchdarkly · cwaldren-ld · Dec 7, 2024 · Dec 7, 2024 · Dec 7, 2024 · Dec 9, 2024
@@ -72,13 +72,10 @@ func (r *pollingRequester) Request() (*fdv2proto.ChangeSet, error) {
 		r.loggers.Debug("Polling LaunchDarkly for feature flag updates")
 	}
 
-	body, cached, err := r.makeRequest(endpoints.PollingRequestPath)
+	body, _, err := r.makeRequest(endpoints.PollingRequestPath)
 	if err != nil {
 		return nil, err
 	}
-	if cached {
-		return fdv2proto.NewChangeSetBuilder().NoChanges(), nil
-	}
 
 	var payload fdv2proto.PollingPayload
 	if err = json.Unmarshal(body, &payload); err != nil {

@@ -287,6 +287,7 @@ func (sp *StreamProcessor) consumeStream(stream *es.Stream, closeWhenReady chan<
 				sp.setInitializedAndNotifyClient(true, closeWhenReady)
 
 			default:
+				processedEvent = false
 				sp.loggers.Infof("Unexpected event found in stream: %s", event.Event())
 			}
 

@@ -6,8 +6,6 @@
 	"sync"
 	"time"
 
-	"github.com/launchdarkly/go-server-sdk/v7/internal/fdv2proto"
-
 	"github.com/launchdarkly/go-sdk-common/v3/ldlog"
 	"github.com/launchdarkly/go-server-sdk/v7/interfaces"
 	"github.com/launchdarkly/go-server-sdk/v7/internal"
@@ -72,6 +70,9 @@
 	// Protects status.
 	mu     sync.Mutex
 	status interfaces.DataSourceStatus
+
+	fallbackCond func() bool
+	recoveryCond func() bool
 }
 
 // NewFDv2 creates a new instance of the FDv2 data system. The first argument indicates if the system is enabled or
@@ -111,6 +112,24 @@
 	fdv2.primarySync = cfg.Synchronizers.Primary
 	fdv2.secondarySync = cfg.Synchronizers.Secondary
 	fdv2.disabled = disabled
+	fdv2.fallbackCond = func() bool {
+		status := fdv2.getStatus()
+		fdv2.loggers.Debugf("Status: %s", status.String())
+		interruptedAtRuntime := status.State == interfaces.DataSourceStateInterrupted && time.Since(status.StateSince) > 1*time.Minute
+		cannotInitialize := status.State == interfaces.DataSourceStateInitializing && time.Since(status.StateSince) > 10*time.Second
+		healthyForTooLong := status.State == interfaces.DataSourceStateValid && time.Since(status.StateSince) > 30*time.Second
+
+		return interruptedAtRuntime || cannotInitialize || healthyForTooLong
+	}
+	fdv2.recoveryCond = func() bool {
+		status := fdv2.getStatus()
+		fdv2.loggers.Debugf("Status: %s", status.String())
+
+		interruptedAtRuntime := status.State == interfaces.DataSourceStateInterrupted && time.Since(status.StateSince) > 1*time.Minute
+		healthyForTooLong := status.State == interfaces.DataSourceStateValid && time.Since(status.StateSince) > 5*time.Minute
+		cannotInitialize := status.State == interfaces.DataSourceStateInitializing && time.Since(status.StateSince) > 10*time.Second
+		return interruptedAtRuntime || healthyForTooLong || cannotInitialize
+	}
 
 	if cfg.Store != nil && !disabled {
 		// If there's a persistent Store, we should provide a status monitor and inform Store that it's present.
@@ -158,16 +177,19 @@
 	return len(f.initializers) > 0 || f.primarySync != nil
 }
 
 func (f *FDv2) run(ctx context.Context, closeWhenReady chan struct{}) {
-	selector := f.runInitializers(ctx, closeWhenReady)
+
+	f.UpdateStatus(interfaces.DataSourceStateInitializing, interfaces.DataSourceErrorInfo{})
+
+	f.runInitializers(ctx, closeWhenReady)
 
 	if f.hasDataSources() && f.dataStoreStatusProvider.IsStatusMonitoringEnabled() {
 		f.launchTask(func() {
 			f.runPersistentStoreOutageRecovery(ctx, f.dataStoreStatusProvider.AddStatusListener())
 		})
 	}
 
-	f.runSynchronizers(ctx, closeWhenReady, selector)
+	f.runSynchronizers(ctx, closeWhenReady)
 }
 
 func (f *FDv2) runPersistentStoreOutageRecovery(ctx context.Context, statuses <-chan interfaces.DataStoreStatus) {
@@ -189,12 +211,12 @@
 	}
 }
 
-func (f *FDv2) runInitializers(ctx context.Context, closeWhenReady chan struct{}) fdv2proto.Selector {
+func (f *FDv2) runInitializers(ctx context.Context, closeWhenReady chan struct{}) {
 	for _, initializer := range f.initializers {
 		f.loggers.Infof("Attempting to initialize via %s", initializer.Name())
 		basis, err := initializer.Fetch(ctx)
 		if errors.Is(err, context.Canceled) {
-			return fdv2proto.NoSelector()
+			return
 		}
 		if err != nil {
 			f.loggers.Warnf("Initializer %s failed: %v", initializer.Name(), err)
@@ -205,35 +227,97 @@
 		f.readyOnce.Do(func() {
 			close(closeWhenReady)
 		})
-		return basis.Selector
 	}
-	return fdv2proto.NoSelector()
 }
 
-func (f *FDv2) runSynchronizers(ctx context.Context, closeWhenReady chan struct{}, selector fdv2proto.Selector) {
-	// If the SDK was configured with no synchronizer, then (assuming no initializer succeeded), we should
-	// trigger the ready signal to let the call to MakeClient unblock immediately.
+// The Go SDK is architected in such a way that a "closeWhenReady" channel is passed into the Data System by means
+// of MakeClient. MakeClient is able to block the user until the client reaches a terminal state in regard to
+// its initialization status - either initialized with data, or run out of time initializing, or some fatal error.
+//
+// By closing this channel, the SDK's data sources indicate that MakeClient should unblock.
+//
+// In the FDv2 world, we have the possibility that a synchronizer fails or we fall back to a secondary synchronizer.
+// Perhaps we've already closed the channel, and now a new synchronizer is attempting to do the same.
+//
+// In that case, we need to guarantee that the channel is closed only once. To do this, we "wrap" channel that is passed
-// In that case, we need to guarantee that the channel is closed only once. To do this, we "wrap" channel that is passed
+// In that case, we need to guarantee that the channel is closed only once. To do this, we "wrap" the channel that is passed
-// In that case, we need to guarantee that the channel is closed only once. To do this, we "wrap" channel that is passed
+// In that case, we need to guarantee that the channel is closed only once. To do this, we "wrap" the channel that is passed
+// directly into the synchronizer with another channel. The wrapper is responsible for actually closing the underlying
+// closeWhenReady, and it uses a sync.Once to ensure that happens only once.
+//
+// This design could be improved significantly. Some ideas:
+// 1) Refactor the SDK to listen to Data Source status rather than the special closeWhenReady channel. It's unclear why
+// this isn't currently the case, but there may be good reasons.
+// 2) Use callbacks. Somewhat equivalent to (1), but instead of needing to wrap the channel, we just proxy directly to
+// calling the sync.Once.
+// 3) Make the channel multi-use. Instead of the contract being "close when ready", have it be "send a value when ready".
+func (f *FDv2) closeChannelWrapper(ctx context.Context, closeWhenReady chan struct{}) chan struct{} {
+	ch := make(chan struct{})
+	go func() {
+		select {
+		case <-ctx.Done():
+			return
+		case <-ch:
+			f.readyOnce.Do(func() {
+				close(closeWhenReady)
+			})
+		}
+	}()
+	return ch
+}
+
+func (f *FDv2) runSynchronizers(ctx context.Context, closeWhenReady chan struct{}) {
+	// If the SDK was configured with no synchronizer, then (assuming no initializer succeeded, which would have
+	// already closed the channel), we should close it now so that MakeClient unblocks.
 	if f.primarySync == nil {
 		f.readyOnce.Do(func() {
 			close(closeWhenReady)
 		})
 		return
 	}
 
-	// We can't pass closeWhenReady to the data source, because it might have already been closed.
-	// Instead, create a "proxy" channel just for the data source; if that is closed, we close the real one
-	// using the sync.Once.
-	ready := make(chan struct{})
-	f.primarySync.Sync(ready, selector)
+	f.launchTask(func() {
+		for {
+			f.loggers.Debugf("Primary synchronizer %s is starting", f.primarySync.Name())
+			f.primarySync.Sync(f.closeChannelWrapper(ctx, closeWhenReady), f.store.Selector())
+			if err := f.evaluateCond(ctx, f.fallbackCond); errors.Is(err, context.Canceled) {
+				return
+			}
+			if err := f.primarySync.Close(); err != nil {
+				f.loggers.Errorf("Primary synchronizer %s failed to gracefully close: %v", f.primarySync.Name(), err)
+			}
+			f.loggers.Debugf("Fallback condition met")
+			f.loggers.Debugf("Secondary synchronizer %s is starting", f.secondarySync.Name())
+
+			f.UpdateStatus(interfaces.DataSourceStateInterrupted, interfaces.DataSourceErrorInfo{})
+
+			f.secondarySync.Sync(f.closeChannelWrapper(ctx, closeWhenReady), f.store.Selector())
+			if err := f.evaluateCond(ctx, f.recoveryCond); errors.Is(err, context.Canceled) {
+				return
+			}
+			if err := f.secondarySync.Close(); err != nil {
+				f.loggers.Errorf("Secondary synchronizer %s failed to gracefully close: %v", f.secondarySync.Name(), err)
+			}
+			f.loggers.Debugf("Recovery condition met")
+			select {
+			case <-ctx.Done():
+				return
+			default:
+			}
+		}
+	})
+}
 
+func (f *FDv2) evaluateCond(ctx context.Context, cond func() bool) error {
+	ticker := time.NewTicker(10 * time.Second)
+	defer ticker.Stop()
 	for {
 		select {
-		case <-ready:
-			f.readyOnce.Do(func() {
-				close(closeWhenReady)
-			})
 		case <-ctx.Done():
-			return
+			return ctx.Err()
+		case <-ticker.C:
+			if cond() {
+				return nil
+			}
+			f.loggers.Debugf("Condition check succeeded, continue with current synchronizer")
 		}
 	}
 }
@@ -302,13 +386,17 @@
 }
 
 //nolint:revive // DataSourceStatusReporter method.
-func (f *FDv2) UpdateStatus(status interfaces.DataSourceState, err interfaces.DataSourceErrorInfo) {
+func (f *FDv2) UpdateStatus(state interfaces.DataSourceState, err interfaces.DataSourceErrorInfo) {
 	f.mu.Lock()
 	defer f.mu.Unlock()
-	f.status = interfaces.DataSourceStatus{
-		State:      status,
-		LastError:  err,
-		StateSince: time.Now(),
+
+	if state != f.status.State {
+		f.status.State = state
+		f.status.StateSince = time.Now()
+	}
+
+	if err != f.status.LastError {
+		f.status.LastError = err
 	}
 }