Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Periodically clean up cached bundles directory #5976

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .changelog/5737.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
go/runtime/bundle: Cleanup bundles on runtime upgrade
5 changes: 5 additions & 0 deletions go/common/version/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ func (v Version) Cmp(other Version) int {
return int(v.Patch) - int(other.Patch)
}

// Less checks if the version is strictly less than the given version.
func (v Version) Less(other Version) bool {
return v.Cmp(other) < 0
}

// ToU64 returns the version as platform-dependent uint64.
func (v Version) ToU64() uint64 {
return (uint64(v.Major) << 32) | (uint64(v.Minor) << 16) | (uint64(v.Patch))
Expand Down
24 changes: 24 additions & 0 deletions go/common/version/version_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,3 +269,27 @@ func TestProtocolVersionCompatible(t *testing.T) {
require.Equal(t, v.isCompatible, Versions.Compatible(v.versions()), v.msg)
}
}

func TestLess(t *testing.T) {
martintomazic marked this conversation as resolved.
Show resolved Hide resolved
for _, tc := range []struct {
name string
v1 Version
v2 Version
expected bool
}{
{"v1 less than v2 (major)", Version{1, 1, 1}, Version{2, 1, 1}, true},
{"v1 less than v2 (minor)", Version{1, 1, 1}, Version{1, 2, 1}, true},
{"v1 less than v2 (patch)", Version{1, 1, 1}, Version{1, 1, 2}, true},
{"v1 greater than v2 (major)", Version{1, 1, 1}, Version{0, 1, 1}, false},
{"v1 greater than v2 (minor)", Version{1, 1, 1}, Version{1, 0, 1}, false},
{"v1 greater than v2 (patch)", Version{1, 1, 1}, Version{1, 1, 0}, false},
{"v1 equal to v2", Version{1, 1, 1}, Version{1, 1, 1}, false},
} {
t.Run(tc.name, func(t *testing.T) {
result := tc.v1.Less(tc.v2)
if result != tc.expected {
t.Errorf("Less(%v, %v) = %v, want %v", tc.v1, tc.v2, result, tc.expected)
}
})
}
}
70 changes: 70 additions & 0 deletions go/oasis-test-runner/scenario/e2e/runtime/runtime_upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,15 @@ import (
"sync/atomic"
"time"

"github.com/oasisprotocol/oasis-core/go/common/crypto/hash"
"github.com/oasisprotocol/oasis-core/go/common/logging"
cmSync "github.com/oasisprotocol/oasis-core/go/common/sync"
consensus "github.com/oasisprotocol/oasis-core/go/consensus/api"
"github.com/oasisprotocol/oasis-core/go/oasis-test-runner/env"
"github.com/oasisprotocol/oasis-core/go/oasis-test-runner/oasis"
"github.com/oasisprotocol/oasis-core/go/oasis-test-runner/oasis/cli"
"github.com/oasisprotocol/oasis-core/go/oasis-test-runner/scenario"
registry "github.com/oasisprotocol/oasis-core/go/registry/api"
"github.com/oasisprotocol/oasis-core/go/runtime/bundle"
)

Expand Down Expand Up @@ -106,9 +109,76 @@ func (sc *runtimeUpgradeImpl) Run(ctx context.Context, childEnv *env.Env) error
// Run client again.
sc.Logger.Info("starting a second client to check if runtime works")
sc.Scenario.TestClient = NewTestClient().WithSeed("seed2").WithScenario(InsertRemoveEncWithSecretsScenarioV2)

// Ensure that after upgrade, every compute worker had its old exploded
// bundle removed from its bundles dir.
for _, worker := range sc.Net.ComputeWorkers() {
if err := sc.verifyBundleDir(ctx, worker); err != nil {
return fmt.Errorf("failed to clean %s's bundle dir after upgrade: %w", worker.Name, err)
}
}

return sc.RunTestClientAndCheckLogs(ctx, childEnv)
}

func (sc *runtimeUpgradeImpl) verifyBundleDir(ctx context.Context, worker *oasis.Compute) error {
sc.Logger.Info("ensuring cached exploded bundle for version 0.0.0 was removed",
"worker", worker.Name,
)

// There should be only one exploded bundle dir.
bundlesDir := bundle.ExplodedPath(worker.DataDir())
entries, err := os.ReadDir(bundlesDir)
if err != nil {
peternose marked this conversation as resolved.
Show resolved Hide resolved
return err
}
if n := len(entries); n != 1 {
return fmt.Errorf("unexpected number of dir entries: expected 1, got %d", n)
}
entry := entries[0]
if !entry.IsDir() {
return fmt.Errorf("%s is not a dir", entry.Name())
}

// Ensure exploded cached bundle is for the latest version (0.1.0).
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This contradicts the logged info comment above, which should be improved.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"ensuring cached exploded bundle for version 0.0.0 was removed" ?

Up it says was removed for 0.0.0 here that that is left for 0.1.0?

Happy to simplify this, agree may be weird?

// We do so by getting the manifest hash of the active deployment,
// and ensuring there is a dir with corresponding hash.

// Fetch registry descriptor.
rt, err := sc.Net.Controller().Registry.GetRuntime(ctx, &registry.GetRuntimeQuery{
Height: consensus.HeightLatest,
ID: KeyValueRuntimeID,
})
if err != nil {
return fmt.Errorf("failed to get runtime descriptor: %w", err)
}
// Fetch current epoch.
epoch, err := sc.Net.Controller().Beacon.GetEpoch(ctx, consensus.HeightLatest)
if err != nil {
return fmt.Errorf("failed to get current epoch: %w", err)
}
// Get active deployment manifest hash.
active := rt.ActiveDeployment(epoch)
if active == nil {
return fmt.Errorf("missing active descriptor for epoch %v", epoch)
}
manifestHash := active.BundleChecksum

// Ensure single entry name (exploded bundle dir) equals to fetched manifest hash.
var want hash.Hash
if err = want.UnmarshalBinary(manifestHash); err != nil {
return fmt.Errorf("failed to unmarshal active deployment manifest hash")
}
var got hash.Hash
if err := got.UnmarshalHex(entry.Name()); err != nil {
return fmt.Errorf("failed to unmarshal dir name to hash")
}
if !want.Equal(&got) {
return fmt.Errorf("unexpected folder name: want %v, got %v", want, got)
}
return nil
}

type bundleServer struct {
startOne cmSync.One

Expand Down
114 changes: 104 additions & 10 deletions go/runtime/bundle/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ import (
"github.com/oasisprotocol/oasis-core/go/common/crypto/hash"
"github.com/oasisprotocol/oasis-core/go/common/logging"
cmSync "github.com/oasisprotocol/oasis-core/go/common/sync"
"github.com/oasisprotocol/oasis-core/go/common/version"
"github.com/oasisprotocol/oasis-core/go/config"
"github.com/oasisprotocol/oasis-core/go/runtime/bundle/component"
)

const (
Expand All @@ -46,6 +48,12 @@ type ManifestStore interface {
// AddManifest adds the provided manifest, whose components were extracted
// to the specified directory, to the store.
AddManifest(manifest *Manifest, dir string) error

// RemoveManifest removes a manifest with provided hash.
RemoveManifest(hash hash.Hash) bool

// Manifests returns all known manifests (regular and detached).
Manifests() []ExplodedManifest
}

// Manager is responsible for managing bundles.
Expand All @@ -62,8 +70,9 @@ type Manager struct {
runtimeBaseURLs map[common.Namespace][]string
globalBaseURLs []string

downloadCh chan struct{}
triggerCh chan struct{}
downloadQueue map[common.Namespace][]hash.Hash
cleanupQueue map[common.Namespace]version.Version

client *http.Client
store ManifestStore
Expand Down Expand Up @@ -119,8 +128,9 @@ func NewManager(dataDir string, runtimeIDs []common.Namespace, store ManifestSto
runtimeIDs: runtimes,
globalBaseURLs: globalBaseURLs,
runtimeBaseURLs: runtimeBaseURLs,
downloadCh: make(chan struct{}, 1),
triggerCh: make(chan struct{}, 1),
downloadQueue: make(map[common.Namespace][]hash.Hash),
cleanupQueue: make(map[common.Namespace]version.Version),
client: &client,
store: store,
logger: *logger,
Expand Down Expand Up @@ -192,21 +202,22 @@ func (m *Manager) run(ctx context.Context) {
for {
select {
case <-ticker.C:
case <-m.downloadCh:
case <-m.triggerCh:
case <-ctx.Done():
m.logger.Info("stopping")
return
}

m.Download()
m.download()
m.clean()
}
}

// Queue updates the checksums of bundles pending download for the given runtime.
// Download updates the checksums of bundles pending download for the given runtime.
//
// Any existing checksums in the download queue for the given runtime are removed
// and replaced with the given ones.
func (m *Manager) Queue(runtimeID common.Namespace, manifestHashes []hash.Hash) {
func (m *Manager) Download(runtimeID common.Namespace, manifestHashes []hash.Hash) {
// Download bundles only for the configured runtimes.
if _, ok := m.runtimeIDs[runtimeID]; !ok {
return
Expand Down Expand Up @@ -236,20 +247,44 @@ func (m *Manager) Queue(runtimeID common.Namespace, manifestHashes []hash.Hash)
}
m.downloadQueue[runtimeID] = hashes

// Trigger immediate download of new bundles.
// Trigger immediate download and clean-up of stale bundles.
select {
case m.downloadCh <- struct{}{}:
case m.triggerCh <- struct{}{}:
default:
}
}

// Download tries to download bundles in the queue.
func (m *Manager) Download() {
// Cleanup initiates the clean-up of regular bundles for the specified runtime,
// removing versions lower than the specified one.
func (m *Manager) Cleanup(runtimeID common.Namespace, version version.Version) {
m.mu.Lock()
defer m.mu.Unlock()

m.cleanupQueue[runtimeID] = version

// Trigger immediate download and clean-up of stale bundles.
select {
case m.triggerCh <- struct{}{}:
default:
}
}

// download tries to download bundles in the queue.
func (m *Manager) download() {
for runtimeID := range m.runtimeIDs {
m.downloadBundles(runtimeID)
}
}

// clean removes outdated manifest and deletes corresponding
// exploded bundles for runtimes in the clean-up queue.
func (m *Manager) clean() {
m.logger.Info("cleaning regular bundles")
for runtimeID := range m.runtimeIDs {
m.cleanStaleBundles(runtimeID)
}
}

func (m *Manager) downloadBundles(runtimeID common.Namespace) {
// Try to download queued bundles.
m.mu.RLock()
Expand Down Expand Up @@ -437,6 +472,65 @@ func (m *Manager) fetchBundle(url string) (string, error) {
return file.Name(), nil
}

func (m *Manager) cleanStaleBundles(runtimeID common.Namespace) {
m.mu.Lock()
maxVersion, ok := m.cleanupQueue[runtimeID]
m.mu.Unlock()

if !ok {
return
}

m.logger.Info("starting clean-up of stale bundles",
"id", runtimeID,
"max_version", maxVersion,
)

for _, manifest := range m.store.Manifests() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to loop twice, once over runtime IDs and then over manifests?

Copy link
Contributor Author

@martintomazic martintomazic Jan 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically we only have few runtimeIDs and also manifests. I would argue is more readable now even though "inefficient"?

Maybe the store should have:
* RegularManifests(runtimeID common.Namespace) []ExplodedManifest instead of Manifests
* since these are already indexed inside registry.
* Or we expose GetVersions from the registry instead of Manifests
* Note in these case RemoveManifest should accept runtimeID and version instead of a hash. A bit annoying since this is no longer generic with hash -> not able to remove detached albeit not used anywhere.

What would you prefer?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer not to loop twice. Not because there is a performance issue, but because the code could be more readable and simpler.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree. Depending on the outcome of this thread it might not be possible though / would make it more complicated ?

if manifest.ID != runtimeID {
continue
}

if manifest.IsDetached() {
continue
}

if ronl := manifest.GetComponentByID(component.ID_RONL); !ronl.Version.Less(maxVersion) {
continue
}

m.cleanStaleBundle(manifest)
}

// Remove runtime ID from the clean-up queue if queued version is less or
// equal to the version we have just finished clean-up for.
// This is safe since download and clean are executed sequentially.
m.mu.Lock()
defer m.mu.Unlock()
if maxVersion.Less(m.cleanupQueue[runtimeID]) {
return
}
delete(m.cleanupQueue, runtimeID)
}

func (m *Manager) cleanStaleBundle(manifest ExplodedManifest) {
m.logger.Info("cleaning bundle",
"manifest_hash", manifest.Hash().Hex(),
)

if ok := m.store.RemoveManifest(manifest.Hash()); !ok {
m.logger.Debug("manifest already removed from the store",
"manifest_hash", manifest.Hash().Hex(),
)
}

if err := m.removeBundle(manifest.ExplodedDataDir); err != nil {
m.logger.Error("failed to remove exploded bundle",
"err", err,
)
}
}

func (m *Manager) loadManifests() (map[string]*Manifest, error) {
m.logger.Info("loading manifests")

Expand Down
8 changes: 8 additions & 0 deletions go/runtime/bundle/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ func (r *mockStore) AddManifest(manifest *Manifest, _ string) error {
return nil
}

func (r *mockStore) Manifests() []ExplodedManifest {
panic("not implemented")
}

func (r *mockStore) RemoveManifest(_ hash.Hash) bool {
panic("not implemented")
}

func TestRegisterManifest(t *testing.T) {
store := newMockStore()
manager, err := NewManager("", nil, store)
Expand Down
9 changes: 9 additions & 0 deletions go/runtime/bundle/manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@ const (
manifestName = manifestPath + "/MANIFEST.MF"
)

// ExplodedManifest is manifest with corresponding exploded bundle dir.
type ExplodedManifest struct {
*Manifest

// ExplodedDataDir is the path to the data directory where the bundle
// represented by manifest has been extracted.
ExplodedDataDir string
}

// Manifest is a deserialized runtime bundle manifest.
type Manifest struct {
// Name is the optional human readable runtime name.
Expand Down
Loading
Loading