diff --git a/CHANGELOG.md b/CHANGELOG.md index ee4fd54408..216a3d40ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ All notable changes to `src-cli` are documented in this file. ### Changed +- Uploading changeset specs from `src batch apply` and `src batch preview` will now retry up to 5 times by default before failing. This can be controlled through the `-retries` flag. [#30431](https://github.com/sourcegraph/sourcegraph/issues/30431) + ### Fixed ### Removed diff --git a/cmd/src/batch_common.go b/cmd/src/batch_common.go index 0ee98b9bb7..d5c49e2b95 100644 --- a/cmd/src/batch_common.go +++ b/cmd/src/batch_common.go @@ -75,6 +75,7 @@ type batchExecuteFlags struct { file string keepLogs bool parallelism int + retries int timeout time.Duration workspace string cleanArchives bool @@ -122,6 +123,10 @@ func newBatchExecuteFlags(flagSet *flag.FlagSet, workspaceExecution bool, cacheD &caf.parallelism, "j", runtime.GOMAXPROCS(0), "The maximum number of parallel jobs. Default is GOMAXPROCS.", ) + flagSet.IntVar( + &caf.retries, "retries", 5, + "The maximum number of retries that will be made when sending a changeset spec.", + ) flagSet.DurationVar( &caf.timeout, "timeout", 60*time.Minute, "The maximum duration a single batch spec step can take.", @@ -427,7 +432,7 @@ func executeBatchSpec(ctx context.Context, ui ui.ExecUI, opts executeBatchSpecOp ui.UploadingChangesetSpecs(len(specs)) for i, spec := range specs { - id, err := svc.CreateChangesetSpec(ctx, spec) + id, err := svc.CreateChangesetSpec(ctx, spec, opts.flags.retries) if err != nil { return err } diff --git a/internal/batches/service/service.go b/internal/batches/service/service.go index f89245e55b..c946f4851a 100644 --- a/internal/batches/service/service.go +++ b/internal/batches/service/service.go @@ -10,6 +10,7 @@ import ( "path" "strings" "sync" + "time" "github.com/grafana/regexp" @@ -24,6 +25,7 @@ import ( "github.com/sourcegraph/src-cli/internal/batches/executor" "github.com/sourcegraph/src-cli/internal/batches/graphql" "github.com/sourcegraph/src-cli/internal/batches/repozip" + "github.com/sourcegraph/src-cli/internal/retrier" ) type Service struct { @@ -139,20 +141,31 @@ mutation CreateChangesetSpec($spec: String!) { } ` -func (svc *Service) CreateChangesetSpec(ctx context.Context, spec *batcheslib.ChangesetSpec) (graphql.ChangesetSpecID, error) { +func (svc *Service) CreateChangesetSpec(ctx context.Context, spec *batcheslib.ChangesetSpec, maxRetries int) (graphql.ChangesetSpecID, error) { + return svc.createChangesetSpec(ctx, spec, maxRetries, 5*time.Second, 1.6) +} + +func (svc *Service) createChangesetSpec(ctx context.Context, spec *batcheslib.ChangesetSpec, maxRetries int, retryWait time.Duration, multiplier float64) (graphql.ChangesetSpecID, error) { raw, err := json.Marshal(spec) if err != nil { return "", errors.Wrap(err, "marshalling changeset spec JSON") } + req := svc.newRequest(createChangesetSpecMutation, map[string]interface{}{ + "spec": string(raw), + }) var result struct { CreateChangesetSpec struct { ID string } } - if ok, err := svc.newRequest(createChangesetSpecMutation, map[string]interface{}{ - "spec": string(raw), - }).Do(ctx, &result); err != nil || !ok { + if err := retrier.Retry(func() error { + ok, err := req.Do(ctx, &result) + if !ok { + return errors.New("no data is available") + } + return err + }, maxRetries, retryWait, multiplier); err != nil { return "", err } diff --git a/internal/batches/service/service_test.go b/internal/batches/service/service_test.go index d6be4e14b7..bb863101e4 100644 --- a/internal/batches/service/service_test.go +++ b/internal/batches/service/service_test.go @@ -698,3 +698,40 @@ func TestEnsureDockerImages(t *testing.T) { }) } + +const testCreateChangesetSpecFailureResult = ` +{ + "data": { + "createChangesetSpec": { + "id": [] + } + } +} +` + +const testCreateChangesetSpecSuccessResult = ` +{ + "data": { + "createChangesetSpec": { + "id": "foo" + } + } +} +` + +func TestCreateChangesetSpec(t *testing.T) { + ctx := context.Background() + spec := &batcheslib.ChangesetSpec{} + + // Force a retry by sending a failure back first. + client, done := mockGraphQLClient( + testCreateChangesetSpecFailureResult, + testCreateChangesetSpecSuccessResult, + ) + t.Cleanup(done) + + svc := Service{client: client} + id, err := svc.createChangesetSpec(ctx, spec, 5, 0, 0) + assert.Nil(t, err) + assert.EqualValues(t, "foo", id) +} diff --git a/internal/retrier/retrier.go b/internal/retrier/retrier.go new file mode 100644 index 0000000000..07d80fc6c0 --- /dev/null +++ b/internal/retrier/retrier.go @@ -0,0 +1,32 @@ +package retrier + +import ( + "time" + + "github.com/sourcegraph/sourcegraph/lib/errors" +) + +// Retry retries the given function up to max times if it returns an error. +// base and multiplier may be specified to use an exponential backoff between +// retries; if no backoff is required, set both to 0. +// +// The first time the function returns nil, nil will immediately be returned +// from Retry. +func Retry(f func() error, max int, base time.Duration, multiplier float64) error { + var errs errors.MultiError + wait := base + + for { + err := f() + if err == nil { + return nil + } + + errs = errors.Append(errs, err) + if len(errs.Errors()) > max { + return errs + } + time.Sleep(wait) + wait = time.Duration(multiplier * float64(wait)) + } +} diff --git a/internal/retrier/retrier_test.go b/internal/retrier/retrier_test.go new file mode 100644 index 0000000000..57ab52da32 --- /dev/null +++ b/internal/retrier/retrier_test.go @@ -0,0 +1,89 @@ +package retrier + +import ( + "strconv" + "testing" + "time" + + "github.com/sourcegraph/sourcegraph/lib/errors" + "github.com/stretchr/testify/assert" +) + +func TestRetry(t *testing.T) { + knownErr := errors.New("error!") + + t.Run("no backoff", func(t *testing.T) { + for i := 0; i < 100; i += 10 { + t.Run(strconv.Itoa(i), func(t *testing.T) { + t.Run("immediate success", func(t *testing.T) { + retries := -1 + err := Retry(func() error { + retries += 1 + return nil + }, i, 0, 0) + + assert.Nil(t, err) + assert.Equal(t, 0, retries) + }) + + t.Run("delayed success", func(t *testing.T) { + retries := -1 + want := i / 2 + err := Retry(func() error { + retries += 1 + if retries >= want { + return nil + } + return knownErr + }, i, 0, 0) + + assert.Nil(t, err) + assert.Equal(t, want, retries) + }) + + t.Run("no success", func(t *testing.T) { + retries := -1 + err := Retry(func() error { + retries += 1 + return knownErr + }, i, 0, 0) + + assert.NotNil(t, err) + assert.ErrorIs(t, err, knownErr) + assert.Equal(t, i, retries) + }) + }) + } + }) + + t.Run("backoff", func(t *testing.T) { + base := 1 * time.Millisecond + want := []time.Duration{ + 0, // We don't test anything on the first try. + base, + 2 * base, + 4 * base, + 8 * base, + 16 * base, + } + + retries := -1 + var last time.Time + err := Retry(func() error { + retries += 1 + if retries > 0 { + now := time.Now() + assert.GreaterOrEqual(t, now.Sub(last), want[retries]) + last = now + } else { + last = time.Now() + } + + return knownErr + }, 5, base, 2) + + assert.NotNil(t, err) + assert.ErrorIs(t, err, knownErr) + assert.Equal(t, 5, retries) + }) +}