From 1596a6a78ee165189ea7ab401939c83f827f9d12 Mon Sep 17 00:00:00 2001 From: Vikas Bhansali <64532198+vibhansa-msft@users.noreply.github.com> Date: Tue, 1 Aug 2023 16:23:54 +0530 Subject: [PATCH] Multiple bug fixes (#1213) * if 'du' is not found, skip checking for disk usage in LRU policy * for unmount check for fusermount3 rather then fusermount * add more lru policy logs to detect why deletions are done * v1 flag for file-cache-timeout-in-seconds is not honored while creating cache policy --- CHANGELOG.md | 8 ++- azure-pipeline-templates/e2e-tests.yml | 15 ++++++ blobfuse2-nightly.yaml | 12 ++--- cmd/health-monitor_stop_all.go | 4 +- cmd/unmount.go | 30 +++++++---- cmd/unmount_test.go | 3 -- component/azstorage/azauth_test.go | 27 ++++++++++ component/file_cache/cache_policy.go | 34 +++++++++--- component/file_cache/cache_policy_test.go | 2 +- component/file_cache/file_cache.go | 13 ++++- component/file_cache/file_cache_test.go | 66 ++++++++++++++++++++++- component/file_cache/lru_policy.go | 17 +++++- component/libfuse/libfuse.go | 4 -- 13 files changed, 197 insertions(+), 38 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6803e3489..c68e92ab7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,9 +4,13 @@ - Added support to read OAuth token from a user given file. **Bug Fixes** -- Fix priority level check of components to reject invalid pipeline entries. -- [#1196](https://github.com/Azure/azure-storage-fuse/issues/1196) 100% CPU usage in 2.0.4 fixed +- Fixed priority level check of components to reject invalid pipeline entries. +- [#1196](https://github.com/Azure/azure-storage-fuse/issues/1196) 100% CPU usage in 2.0.4 fixed. - [#1207](https://github.com/Azure/azure-storage-fuse/issues/1207) Fix log-rotate script. +- Unmount command was looking for `fusermount` while on fuse3 systems it should be looking for `fusermount3`. +- If `du` command is not found skip checking for disk usage in LRU cache-eviction policy. +- V1 flag of `file-cache-timeout-in-seconds` not interpreted correctly by V2 and causing eviction policy to assume its 0. +- If `du` is not found on standard path try paths where it can potentially be found. ## 2.0.4 (2023-07-03) **Features** diff --git a/azure-pipeline-templates/e2e-tests.yml b/azure-pipeline-templates/e2e-tests.yml index 39c8766d9..3bddab122 100755 --- a/azure-pipeline-templates/e2e-tests.yml +++ b/azure-pipeline-templates/e2e-tests.yml @@ -48,6 +48,21 @@ steps: exit $? displayName: 'Verify Mount' + - script: | + out=`pidstat -p \`pidof blobfuse2\` -u 2 5 | tail -n 1` + outData=($out) + cpu=${outData[7]} + echo "CPU is :" $cpu + + if [[ $cpu > 5 ]] + then + echo "CPU usage is high" + exit 1 + else + echo "CPU usage is low" + fi + displayName: 'CPU Usage test' + - task: Go@0 inputs: command: 'test' diff --git a/blobfuse2-nightly.yaml b/blobfuse2-nightly.yaml index 2ef4f21fd..2469e0e32 100755 --- a/blobfuse2-nightly.yaml +++ b/blobfuse2-nightly.yaml @@ -1699,6 +1699,12 @@ stages: timeoutInMinutes: 3 continueOnError: false + - script: | + sudo ps -aux | grep bfusemon + displayName: 'Print bfusemon Process info' + env: + mount_dir: $(MOUNT_DIR) + - script: | touch $(MOUNT_DIR)/h1.txt echo "123456" > $(MOUNT_DIR)/h2.txt @@ -1717,12 +1723,6 @@ stages: displayName: "Print bfusemon output" workingDirectory: $(WORK_DIR) - - script: | - sudo ps -aux | grep bfusemon - displayName: 'Print bfusemon Process info' - env: - mount_dir: $(MOUNT_DIR) - - template: 'azure-pipeline-templates/cleanup.yml' parameters: working_dir: $(WORK_DIR) diff --git a/cmd/health-monitor_stop_all.go b/cmd/health-monitor_stop_all.go index a348e899f..dc4b0085b 100644 --- a/cmd/health-monitor_stop_all.go +++ b/cmd/health-monitor_stop_all.go @@ -38,6 +38,8 @@ import ( "os/exec" "github.com/spf13/cobra" + + hmcommon "github.com/Azure/azure-storage-fuse/v2/tools/health-monitor/common" ) var healthMonStopAll = &cobra.Command{ @@ -57,7 +59,7 @@ var healthMonStopAll = &cobra.Command{ // Attempts to kill all health monitors func stopAll() error { - cliOut := exec.Command("killall", "bfusemon") + cliOut := exec.Command("killall", hmcommon.BfuseMon) _, err := cliOut.Output() if err != nil { return err diff --git a/cmd/unmount.go b/cmd/unmount.go index 2fbfaa1b6..d8b25005c 100644 --- a/cmd/unmount.go +++ b/cmd/unmount.go @@ -70,7 +70,7 @@ var unmountCmd = &cobra.Command{ } else { err := unmountBlobfuse2(args[0]) if err != nil { - return fmt.Errorf("failed to unmount %s [%s]", args[0], err.Error()) + return err } } @@ -87,17 +87,27 @@ var unmountCmd = &cobra.Command{ // Attempts to unmount the directory and returns true if the operation succeeded func unmountBlobfuse2(mntPath string) error { - cliOut := exec.Command("fusermount", "-u", mntPath) + unmountCmd := []string{"fusermount3", "fusermount"} + var errb bytes.Buffer - cliOut.Stderr = &errb - _, err := cliOut.Output() - if err != nil { - log.Err("unmountBlobfuse2 : failed to unmount (%s : %s)", err.Error(), errb.String()) - return fmt.Errorf("%s", errb.String()) - } else { - fmt.Println("Successfully unmounted", mntPath) - return nil + var err error + for _, umntCmd := range unmountCmd { + cliOut := exec.Command(umntCmd, "-u", mntPath) + cliOut.Stderr = &errb + _, err = cliOut.Output() + + if err == nil { + fmt.Println("Successfully unmounted", mntPath) + return nil + } + + if !strings.Contains(err.Error(), "executable file not found") { + log.Err("unmountBlobfuse2 : failed to unmount (%s : %s)", err.Error(), errb.String()) + break + } } + + return fmt.Errorf("%s", errb.String()+" "+err.Error()) } func init() { diff --git a/cmd/unmount_test.go b/cmd/unmount_test.go index 181327305..936d0bcde 100644 --- a/cmd/unmount_test.go +++ b/cmd/unmount_test.go @@ -191,9 +191,6 @@ func (suite *unmountTestSuite) TestUnmountCmdValidArg() { _, err = executeCommandC(rootCmd, "unmount", mountDirectory5+"*") suite.assert.Nil(err) - lst, _ = unmountCmd.ValidArgsFunction(nil, nil, "") - suite.assert.Empty(lst) - lst, _ = unmountCmd.ValidArgsFunction(nil, nil, "abcd") suite.assert.Empty(lst) } diff --git a/component/azstorage/azauth_test.go b/component/azstorage/azauth_test.go index 82e4142c6..80e32c8e8 100644 --- a/component/azstorage/azauth_test.go +++ b/component/azstorage/azauth_test.go @@ -710,6 +710,33 @@ func (suite *authTestSuite) TestBlockInvalidSpn() { } } +func (suite *authTestSuite) TestBlockInvalidTokenPathSpn() { + defer suite.cleanupTest() + + _ = os.WriteFile("newtoken.txt", []byte("abcdef"), 0777) + defer os.Remove("newtoken.txt") + + stgConfig := AzStorageConfig{ + container: storageTestConfigurationParameters.BlockContainer, + authConfig: azAuthConfig{ + AuthMode: EAuthType.SPN(), + AccountType: EAccountType.BLOCK(), + AccountName: storageTestConfigurationParameters.BlockAccount, + ClientID: storageTestConfigurationParameters.SpnClientId, + TenantID: storageTestConfigurationParameters.SpnTenantId, + ClientSecret: "", + Endpoint: generateEndpoint(false, storageTestConfigurationParameters.BlockAccount, EAccountType.BLOCK()), + OAuthTokenFilePath: "newtoken.txt", + }, + } + assert := assert.New(suite.T()) + stg := NewAzStorageConnection(stgConfig) + if stg == nil { + assert.Fail("TestBlockInvalidSpn : Failed to create Storage object") + } + _ = stg.SetupPipeline() +} + func (suite *authTestSuite) TestBlockSpn() { defer suite.cleanupTest() stgConfig := AzStorageConfig{ diff --git a/component/file_cache/cache_policy.go b/component/file_cache/cache_policy.go index 9ef42b149..eec5b8bdb 100644 --- a/component/file_cache/cache_policy.go +++ b/component/file_cache/cache_policy.go @@ -77,36 +77,56 @@ type cachePolicy interface { Name() string // The name of the policy } +var duPath []string = []string{"/usr/bin/du", "/usr/local/bin/du", "/usr/sbin/du", "/usr/local/sbin/du", "/sbin/du", "/bin/du"} +var selectedDuPath string = "" + // getUsage: The current cache usage in MB -func getUsage(path string) float64 { +func getUsage(path string) (float64, error) { log.Trace("cachePolicy::getCacheUsage : %s", path) var currSize float64 var out bytes.Buffer + if selectedDuPath == "" { + selectedDuPath = "-" + for _, dup := range duPath { + _, err := os.Stat(dup) + if err == nil { + selectedDuPath = dup + break + } + } + } + + if selectedDuPath == "-" { + log.Err("cachePolicy::getCacheUsage : error finding du in any configured path") + return 0, fmt.Errorf("failed to find du") + } + // du - estimates file space usage // https://man7.org/linux/man-pages/man1/du.1.html // Note: We cannot just pass -BM as a parameter here since it will result in less accurate estimates of the size of the path // (i.e. du will round up to 1M if the path is smaller than 1M). - cmd := exec.Command("du", "-sh", path) + cmd := exec.Command(selectedDuPath, "-sh", path) cmd.Stdout = &out err := cmd.Run() if err != nil { log.Err("cachePolicy::getCacheUsage : error running du [%s]", err.Error()) - return 0 + return 0, err } size := strings.Split(out.String(), "\t")[0] if size == "0" { - return 0 + return 0, fmt.Errorf("failed to parse du output") } + // some OS's use "," instead of "." that will not work for float parsing - replace it size = strings.Replace(size, ",", ".", 1) parsed, err := strconv.ParseFloat(size[:len(size)-1], 64) if err != nil { log.Err("cachePolicy::getCacheUsage : error parsing folder size [%s]", err.Error()) - return 0 + return 0, fmt.Errorf("failed to parse du output") } switch size[len(size)-1] { @@ -121,7 +141,7 @@ func getUsage(path string) float64 { } log.Debug("cachePolicy::getCacheUsage : current cache usage : %fMB", currSize) - return currSize + return currSize, nil } // getUsagePercentage: The current cache usage as a percentage of the maxSize @@ -130,7 +150,7 @@ func getUsagePercentage(path string, maxSize float64) float64 { return 0 } - currSize := getUsage(path) + currSize, _ := getUsage(path) usagePercent := (currSize / float64(maxSize)) * 100 log.Debug("cachePolicy::getUsagePercentage : current cache usage : %f%%", usagePercent) diff --git a/component/file_cache/cache_policy_test.go b/component/file_cache/cache_policy_test.go index 505a4cdc9..d9acebb19 100644 --- a/component/file_cache/cache_policy_test.go +++ b/component/file_cache/cache_policy_test.go @@ -69,7 +69,7 @@ func (suite *cachePolicyTestSuite) TestGetUsage() { f, _ := os.Create(cache_path + "/test") data := make([]byte, 1024*1024) f.Write(data) - result := getUsage(cache_path) + result, _ := getUsage(cache_path) suite.assert.Equal(float64(1), math.Floor(result)) } diff --git a/component/file_cache/file_cache.go b/component/file_cache/file_cache.go index 735f54972..236458965 100644 --- a/component/file_cache/file_cache.go +++ b/component/file_cache/file_cache.go @@ -309,6 +309,8 @@ func (c *FileCache) Configure(_ bool) error { // OnConfigChange : If component has registered, on config file change this method is called func (c *FileCache) OnConfigChange() { + log.Trace("FileCache::OnConfigChange : %s", c.Name()) + conf := FileCacheOptions{} err := config.UnmarshalKey(compName, &conf) if err != nil { @@ -332,7 +334,10 @@ func (c *FileCache) StatFs() (*syscall.Statfs_t, bool, error) { if maxCacheSize == 0 { return nil, false, nil } - usage := getUsage(c.tmpPath) * MB + + usage, _ := getUsage(c.tmpPath) + usage = usage * MB + available := maxCacheSize - usage statfs := &syscall.Statfs_t{} err := syscall.Statfs("/", statfs) @@ -364,7 +369,7 @@ func (c *FileCache) GetPolicyConfig(conf FileCacheOptions) cachePolicyConfig { maxEviction: conf.MaxEviction, highThreshold: float64(conf.HighThreshold), lowThreshold: float64(conf.LowThreshold), - cacheTimeout: uint32(conf.Timeout), + cacheTimeout: uint32(c.cacheTimeout), maxSizeMB: conf.MaxSizeMB, fileLocks: c.fileLocks, policyTrace: conf.EnablePolicyTrace, @@ -1464,6 +1469,10 @@ func init() { config.BindPFlag(compName+".upload-modified-only", uploadModifiedOnly) uploadModifiedOnly.Hidden = true + cachePolicy := config.AddStringFlag("file-cache-policy", "lru", "Cache eviction policy.") + config.BindPFlag(compName+".policy", cachePolicy) + cachePolicy.Hidden = true + syncToFlush := config.AddBoolFlag("sync-to-flush", false, "Sync call on file will force a upload of the file.") config.BindPFlag(compName+".sync-to-flush", syncToFlush) diff --git a/component/file_cache/file_cache_test.go b/component/file_cache/file_cache_test.go index 92fa43e59..e0ea18e9a 100644 --- a/component/file_cache/file_cache_test.go +++ b/component/file_cache/file_cache_test.go @@ -100,7 +100,7 @@ func (suite *fileCacheTestSuite) SetupTest() { rand := randomString(8) suite.cache_path = filepath.Join(home_dir, "file_cache"+rand) suite.fake_storage_path = filepath.Join(home_dir, "fake_storage"+rand) - defaultConfig := fmt.Sprintf("file_cache:\n path: %s\n offload-io: true\n\nloopbackfs:\n path: %s", suite.cache_path, suite.fake_storage_path) + defaultConfig := fmt.Sprintf("file_cache:\n path: %s\n offload-io: true\n timeout-sec: 0\n\nloopbackfs:\n path: %s", suite.cache_path, suite.fake_storage_path) log.Debug(defaultConfig) // Delete the temp directories created @@ -189,6 +189,70 @@ func (suite *fileCacheTestSuite) TestConfig() { suite.assert.Equal(suite.fileCache.cleanupOnStart, cleanupOnStart) } +func (suite *fileCacheTestSuite) TestConfigPolicyTimeout() { + defer suite.cleanupTest() + suite.cleanupTest() // teardown the default file cache generated + policy := "lfu" + maxSizeMb := 1024 + cacheTimeout := 60 + maxDeletion := 10 + highThreshold := 90 + lowThreshold := 10 + createEmptyFile := true + allowNonEmptyTemp := true + cleanupOnStart := true + config := fmt.Sprintf("file_cache:\n path: %s\n offload-io: true\n policy: %s\n max-size-mb: %d\n timeout-sec: %d\n max-eviction: %d\n high-threshold: %d\n low-threshold: %d\n create-empty-file: %t\n allow-non-empty-temp: %t\n cleanup-on-start: %t", + suite.cache_path, policy, maxSizeMb, cacheTimeout, maxDeletion, highThreshold, lowThreshold, createEmptyFile, allowNonEmptyTemp, cleanupOnStart) + suite.setupTestHelper(config) // setup a new file cache with a custom config (teardown will occur after the test as usual) + + suite.assert.Equal(suite.fileCache.Name(), "file_cache") + suite.assert.Equal(suite.fileCache.tmpPath, suite.cache_path) + suite.assert.Equal(suite.fileCache.policy.Name(), policy) + + suite.assert.EqualValues(suite.fileCache.policy.(*lfuPolicy).maxSizeMB, maxSizeMb) + suite.assert.EqualValues(suite.fileCache.policy.(*lfuPolicy).maxEviction, maxDeletion) + suite.assert.EqualValues(suite.fileCache.policy.(*lfuPolicy).highThreshold, highThreshold) + suite.assert.EqualValues(suite.fileCache.policy.(*lfuPolicy).lowThreshold, lowThreshold) + suite.assert.EqualValues(suite.fileCache.policy.(*lfuPolicy).cacheTimeout, cacheTimeout) + + suite.assert.Equal(suite.fileCache.createEmptyFile, createEmptyFile) + suite.assert.Equal(suite.fileCache.allowNonEmpty, allowNonEmptyTemp) + suite.assert.EqualValues(suite.fileCache.cacheTimeout, cacheTimeout) + suite.assert.Equal(suite.fileCache.cleanupOnStart, cleanupOnStart) +} + +func (suite *fileCacheTestSuite) TestConfigPolicyDefaultTimeout() { + defer suite.cleanupTest() + suite.cleanupTest() // teardown the default file cache generated + policy := "lfu" + maxSizeMb := 1024 + cacheTimeout := defaultFileCacheTimeout + maxDeletion := 10 + highThreshold := 90 + lowThreshold := 10 + createEmptyFile := true + allowNonEmptyTemp := true + cleanupOnStart := true + config := fmt.Sprintf("file_cache:\n path: %s\n offload-io: true\n policy: %s\n max-size-mb: %d\n max-eviction: %d\n high-threshold: %d\n low-threshold: %d\n create-empty-file: %t\n allow-non-empty-temp: %t\n cleanup-on-start: %t", + suite.cache_path, policy, maxSizeMb, maxDeletion, highThreshold, lowThreshold, createEmptyFile, allowNonEmptyTemp, cleanupOnStart) + suite.setupTestHelper(config) // setup a new file cache with a custom config (teardown will occur after the test as usual) + + suite.assert.Equal(suite.fileCache.Name(), "file_cache") + suite.assert.Equal(suite.fileCache.tmpPath, suite.cache_path) + suite.assert.Equal(suite.fileCache.policy.Name(), policy) + + suite.assert.EqualValues(suite.fileCache.policy.(*lfuPolicy).maxSizeMB, maxSizeMb) + suite.assert.EqualValues(suite.fileCache.policy.(*lfuPolicy).maxEviction, maxDeletion) + suite.assert.EqualValues(suite.fileCache.policy.(*lfuPolicy).highThreshold, highThreshold) + suite.assert.EqualValues(suite.fileCache.policy.(*lfuPolicy).lowThreshold, lowThreshold) + suite.assert.EqualValues(suite.fileCache.policy.(*lfuPolicy).cacheTimeout, cacheTimeout) + + suite.assert.Equal(suite.fileCache.createEmptyFile, createEmptyFile) + suite.assert.Equal(suite.fileCache.allowNonEmpty, allowNonEmptyTemp) + suite.assert.EqualValues(suite.fileCache.cacheTimeout, cacheTimeout) + suite.assert.Equal(suite.fileCache.cleanupOnStart, cleanupOnStart) +} + func (suite *fileCacheTestSuite) TestConfigZero() { defer suite.cleanupTest() suite.cleanupTest() // teardown the default file cache generated diff --git a/component/file_cache/lru_policy.go b/component/file_cache/lru_policy.go index f2028eb63..5528877a1 100644 --- a/component/file_cache/lru_policy.go +++ b/component/file_cache/lru_policy.go @@ -75,6 +75,9 @@ type lruPolicy struct { // Channel to check for file eviction based on file-cache timeout cacheTimeoutMonitor <-chan time.Time + + // DU utility was found on the path or not + duPresent bool } const ( @@ -99,6 +102,7 @@ func NewLRUPolicy(cfg cachePolicyConfig) cachePolicy { name: "##", usage: -1, }, + duPresent: false, } return obj @@ -118,10 +122,21 @@ func (p *lruPolicy) StartPolicy() error { p.deleteEvent = make(chan string, 1000) p.validateChan = make(chan string, 10000) - p.diskUsageMonitor = time.Tick(time.Duration(DiskUsageCheckInterval * time.Minute)) + _, err := getUsage(p.tmpPath) + if err == nil { + p.duPresent = true + } else { + log.Err("lruPolicy::StartPolicy : 'du' command not found, disabling disk usage checks") + } + + if p.duPresent { + p.diskUsageMonitor = time.Tick(time.Duration(DiskUsageCheckInterval * time.Minute)) + } // Only start the timeoutMonitor if evictTime is non-zero. // If evictTime=0, we delete on invalidate so there is no need for a timeout monitor signal to be sent. + log.Info("lruPolicy::StartPolicy : Policy set with %v timeout", p.cacheTimeout) + if p.cacheTimeout != 0 { p.cacheTimeoutMonitor = time.Tick(time.Duration(time.Duration(p.cacheTimeout) * time.Second)) } diff --git a/component/libfuse/libfuse.go b/component/libfuse/libfuse.go index 7305248b0..74b0692d8 100644 --- a/component/libfuse/libfuse.go +++ b/component/libfuse/libfuse.go @@ -302,10 +302,6 @@ func (lf *Libfuse) Configure(_ bool) error { return nil } -// OnConfigChange : If component has registered, on config file change this method is called -func (lf *Libfuse) OnConfigChange() { -} - // ------------------------- Factory ------------------------------------------- // Pipeline will call this method to create your object, initialize your variables here