Skip to content

Commit

Permalink
Attempt to refresh credentials if we get access exceptions (#2890)
Browse files Browse the repository at this point in the history
* restore the old refreshing step

* try a mechanism to refresh credentials

* widen the scope a little
  • Loading branch information
hawkowl authored May 4, 2023
1 parent 606f547 commit 3e8fe29
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 4 deletions.
5 changes: 3 additions & 2 deletions pkg/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"github.com/Azure/ARO-RP/pkg/util/billing"
"github.com/Azure/ARO-RP/pkg/util/dns"
"github.com/Azure/ARO-RP/pkg/util/encryption"
"github.com/Azure/ARO-RP/pkg/util/refreshable"
"github.com/Azure/ARO-RP/pkg/util/storage"
"github.com/Azure/ARO-RP/pkg/util/subnet"
)
Expand All @@ -59,7 +60,7 @@ type manager struct {
billing billing.Manager
doc *api.OpenShiftClusterDocument
subscriptionDoc *api.SubscriptionDocument
fpAuthorizer autorest.Authorizer
fpAuthorizer refreshable.Authorizer
localFpAuthorizer autorest.Authorizer
metricsEmitter metrics.Emitter

Expand Down Expand Up @@ -122,7 +123,7 @@ func New(ctx context.Context, log *logrus.Entry, _env env.Interface, db database
return nil, err
}

fpAuthorizer, err := _env.FPAuthorizer(subscriptionDoc.Subscription.Properties.TenantID, _env.Environment().ResourceManagerScope)
fpAuthorizer, err := refreshable.NewAuthorizer(_env, subscriptionDoc.Subscription.Properties.TenantID)
if err != nil {
return nil, err
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/cluster/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ func setFieldCreatedByHive(createdByHive bool) database.OpenShiftClusterDocument

func (m *manager) bootstrap() []steps.Step {
s := []steps.Step{
steps.Action(m.validateResources),
steps.AuthorizationRetryingAction(m.fpAuthorizer, m.validateResources),
steps.Action(m.ensureACRToken),
steps.Action(m.ensureInfraID),
steps.Action(m.ensureSSHKey),
Expand All @@ -242,7 +242,7 @@ func (m *manager) bootstrap() []steps.Step {
steps.Action(m.ensureResourceGroup),
steps.Action(m.enableServiceEndpoints),
steps.Action(m.setMasterSubnetPolicies),
steps.Action(m.deployBaseResourceTemplate),
steps.AuthorizationRetryingAction(m.fpAuthorizer, m.deployBaseResourceTemplate),
steps.Action(m.attachNSGs),
steps.Action(m.updateAPIIPEarly),
steps.Action(m.createOrUpdateRouterIPEarly),
Expand Down
45 changes: 45 additions & 0 deletions pkg/util/refreshable/refreshable.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package refreshable

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"github.com/Azure/go-autorest/autorest"

"github.com/Azure/ARO-RP/pkg/env"
)

type Authorizer interface {
autorest.Authorizer
Rebuild() error
}

type authorizer struct {
auth autorest.Authorizer
env env.Interface
tenantID string
}

func (a *authorizer) Rebuild() error {
auth, err := a.env.FPAuthorizer(a.tenantID, a.env.Environment().ResourceManagerScope)
if err != nil {
return err
}
a.auth = auth
return nil
}

func (a *authorizer) WithAuthorization() autorest.PrepareDecorator {
return a.auth.WithAuthorization()
}

// NewAuthorizer creates an Authorizer that can be rebuilt when needed to force
// token recreation.
func NewAuthorizer(_env env.Interface, tenantID string) (Authorizer, error) {
a := &authorizer{
env: _env,
tenantID: tenantID,
}
err := a.Rebuild()
return a, err
}
92 changes: 92 additions & 0 deletions pkg/util/steps/refreshing.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package steps

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"context"
"errors"
"fmt"
"time"

"github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/util/wait"

"github.com/Azure/ARO-RP/pkg/util/azureerrors"
"github.com/Azure/ARO-RP/pkg/util/refreshable"
)

var ErrWantRefresh = errors.New("want refresh")

// AuthorizationRefreshingAction returns a wrapper Step which will refresh
// `authorizer` if the step returns an Azure AuthenticationError and rerun it.
// The step will be retried until `retryTimeout` is hit. Any other error will be
// returned directly.
func AuthorizationRetryingAction(r refreshable.Authorizer, action actionFunction) Step {
return &authorizationRefreshingActionStep{
auth: r,
f: action,
}
}

type authorizationRefreshingActionStep struct {
f actionFunction
auth refreshable.Authorizer
retryTimeout time.Duration
pollInterval time.Duration
}

func (s *authorizationRefreshingActionStep) run(ctx context.Context, log *logrus.Entry) error {
var pollInterval time.Duration
var retryTimeout time.Duration

// ARM role caching can be 5 minutes
if s.retryTimeout == time.Duration(0) {
retryTimeout = 10 * time.Minute
} else {
retryTimeout = s.retryTimeout
}

// If no pollInterval has been set, use a default
if s.pollInterval == time.Duration(0) {
pollInterval = 30 * time.Second
} else {
pollInterval = s.pollInterval
}

timeoutCtx, cancel := context.WithTimeout(ctx, retryTimeout)
defer cancel()

// Run the step immediately. If an Azure authorization error is returned and
// we have not hit the retry timeout, the authorizer is refreshed and the
// step is called again after runner.pollInterval. If we have timed out or
// any other error is returned, the error from the step is returned
// directly.
return wait.PollImmediateUntil(pollInterval, func() (bool, error) {
// We use the outer context, not the timeout context, as we do not want
// to time out the condition function itself, only stop retrying once
// timeoutCtx's timeout has fired.
err := s.f(ctx)

// If we haven't timed out and there is an error that is either an
// unauthorized client (AADSTS700016) or "AuthorizationFailed" (likely
// role propagation delay) then refresh and retry.
if timeoutCtx.Err() == nil && err != nil &&
(azureerrors.IsUnauthorizedClientError(err) ||
azureerrors.HasAuthorizationFailedError(err)) {
log.Printf("auth error, refreshing and retrying: %v", err)
// Try refreshing auth.
err = s.auth.Rebuild()
return false, err // retry step
}
return true, err
}, timeoutCtx.Done())
}

func (s *authorizationRefreshingActionStep) String() string {
return fmt.Sprintf("[AuthorizationRetryingAction %s]", FriendlyName(s.f))
}

func (s *authorizationRefreshingActionStep) metricsName() string {
return fmt.Sprintf("authorizationretryingaction.%s", shortName(FriendlyName(s.f)))
}

0 comments on commit 3e8fe29

Please sign in to comment.