Skip to content

Commit

Permalink
add failed state for create job
Browse files Browse the repository at this point in the history
  • Loading branch information
RidRisR committed Jan 7, 2025
1 parent 6e12192 commit 7c14894
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 10 deletions.
27 changes: 18 additions & 9 deletions pkg/controller/compactbackup/compact_backup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ import (
"k8s.io/utils/ptr"
)

const (
maxInterval = 6 * time.Minute
)

// Controller controls backup.
type Controller struct {
deps *controller.Dependencies
Expand Down Expand Up @@ -281,21 +285,21 @@ func (c *Controller) sync(key string) (err error) {
return nil
}

ok, err := c.precheckCompact(compact)
ok, err := c.checkJobStatus(compact)
if err != nil {
return err
}
if !ok {
klog.Infof("Compact %s/%s is not allowed, skip", ns, name)
klog.Infof("Compact %s/%s is not allowed to create new job, skip", ns, name)
return nil
}

err = c.doCompact(compact.DeepCopy())
err = c.createCompactJob(compact.DeepCopy())
c.statusUpdater.OnCreateJob(context.TODO(), compact, err)
return err
}

func (c *Controller) doCompact(compact *v1alpha1.CompactBackup) error {
func (c *Controller) createCompactJob(compact *v1alpha1.CompactBackup) error {
ns := compact.GetNamespace()
name := compact.GetName()
compactJobName := compact.GetName()
Expand Down Expand Up @@ -474,10 +478,10 @@ func (c *Controller) makeCompactJob(compact *v1alpha1.CompactBackup) (*batchv1.J
return job, "", nil
}

// precheckCompact checks if doCompact is allowed to run
// checkJobStatus checks if doCompact is allowed to run
// Only if there is no other compact job existing, doCompact is allowed
// If the existing job failed, update compact status
func (c *Controller) precheckCompact(compact *v1alpha1.CompactBackup) (bool, error) {
func (c *Controller) checkJobStatus(compact *v1alpha1.CompactBackup) (bool, error) {
ns := compact.GetNamespace()
name := compact.GetName()

Expand Down Expand Up @@ -530,18 +534,23 @@ func (c *Controller) allowCompact(compact *v1alpha1.CompactBackup) bool {
ns := compact.GetNamespace()
name := compact.GetName()

// 10**(attempts-1)
// 10*2^(attempts-1)
expBackoff := func(attempts int) time.Duration {
if attempts <= 1 {
return 0
}
return 10 * time.Duration(math.Pow(10, float64(attempts-1))) * time.Second
interval := time.Duration(10 * int(math.Pow(2, float64(attempts-1)))) * time.Second
if interval > maxInterval {
return maxInterval
}
return interval
}

attempts := len(compact.Status.RetryStatus)
if attempts > 0 {
lastRetry := compact.Status.RetryStatus[attempts-1]
if lastRetry.RetryNum >= int(compact.Spec.MaxRetryTimes) {
if lastRetry.RetryNum > int(compact.Spec.MaxRetryTimes) {
c.statusUpdater.OnJobFailed(context.TODO(), compact, "create job failed, reached max retry times")
return false
}
backoff := expBackoff(attempts)
Expand Down
20 changes: 20 additions & 0 deletions tests/e2e/br/br.go
Original file line number Diff line number Diff line change
Expand Up @@ -1176,9 +1176,29 @@ var _ = ginkgo.Describe("Backup and Restore", func() {
compact.Spec.StartTs = fullBackup.Status.CommitTs
compact.Spec.EndTs = currentTS
compact.Spec.S3 = logBackup.Spec.S3
compact.Spec.MaxRetryTimes = 2
})
framework.ExpectNoError(err)
})

ginkgo.It("test backoff when create job failed", func() {
_, cancel := context.WithCancel(context.Background())
defer cancel()

ginkgo.By("Create RBAC for backup")
err := createRBAC(f)
framework.ExpectNoError(err)

compactName := "compact-backup"
ginkgo.By("Start a compact backup")
_, err = createCompactBackupAndWaitForComplete(f, compactName, "No_Such_Cluster", func(compact *v1alpha1.CompactBackup) {
compact.Spec.StartTs = "1"
compact.Spec.EndTs = "1"
compact.Spec.S3 = nil
compact.Spec.MaxRetryTimes = 2
})
framework.ExpectError(err, "create job failed, reached max retry times")
})
})
})

Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/br/framework/br/wait.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ func WaitForCompactComplete(c versioned.Interface, ns, name string, timeout time
switch cpbk.Status.State {
case string(v1alpha1.BackupComplete):
return true, nil
case string(v1alpha1.BackupCleanFailed):
case string(v1alpha1.BackupFailed):
return false, fmt.Errorf("Compact failed: %s", cpbk.Status.Message)
default:
log.Logf("the current status is: %s %s", cpbk.Status.State, cpbk.Status.Progress)
Expand Down

0 comments on commit 7c14894

Please sign in to comment.