diff --git a/Build-COSFPkgs.ps1 b/Build-COSFPkgs.ps1 index bab7a4e7..ef7eeaf4 100644 --- a/Build-COSFPkgs.ps1 +++ b/Build-COSFPkgs.ps1 @@ -23,11 +23,11 @@ function Build-SFPkg { try { Push-Location $scriptPath - Build-SFPkg "Microsoft.ServiceFabricApps.ClusterObserver.Linux.SelfContained.2.2.7" "$scriptPath\bin\release\ClusterObserver\linux-x64\self-contained\ClusterObserverType" - Build-SFPkg "Microsoft.ServiceFabricApps.ClusterObserver.Linux.FrameworkDependent.2.2.7" "$scriptPath\bin\release\ClusterObserver\linux-x64\framework-dependent\ClusterObserverType" + Build-SFPkg "Microsoft.ServiceFabricApps.ClusterObserver.Linux.SelfContained.2.2.8" "$scriptPath\bin\release\ClusterObserver\linux-x64\self-contained\ClusterObserverType" + Build-SFPkg "Microsoft.ServiceFabricApps.ClusterObserver.Linux.FrameworkDependent.2.2.8" "$scriptPath\bin\release\ClusterObserver\linux-x64\framework-dependent\ClusterObserverType" - Build-SFPkg "Microsoft.ServiceFabricApps.ClusterObserver.Windows.SelfContained.2.2.7" "$scriptPath\bin\release\ClusterObserver\win-x64\self-contained\ClusterObserverType" - Build-SFPkg "Microsoft.ServiceFabricApps.ClusterObserver.Windows.FrameworkDependent.2.2.7" "$scriptPath\bin\release\ClusterObserver\win-x64\framework-dependent\ClusterObserverType" + Build-SFPkg "Microsoft.ServiceFabricApps.ClusterObserver.Windows.SelfContained.2.2.8" "$scriptPath\bin\release\ClusterObserver\win-x64\self-contained\ClusterObserverType" + Build-SFPkg "Microsoft.ServiceFabricApps.ClusterObserver.Windows.FrameworkDependent.2.2.8" "$scriptPath\bin\release\ClusterObserver\win-x64\framework-dependent\ClusterObserverType" } finally { Pop-Location diff --git a/Build-SFPkgs.ps1 b/Build-SFPkgs.ps1 index 7290c7dc..6bd4a717 100644 --- a/Build-SFPkgs.ps1 +++ b/Build-SFPkgs.ps1 @@ -23,11 +23,11 @@ function Build-SFPkg { try { Push-Location $scriptPath - Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Linux.SelfContained.3.2.13" "$scriptPath\bin\release\FabricObserver\linux-x64\self-contained\FabricObserverType" - Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Linux.FrameworkDependent.3.2.13" "$scriptPath\bin\release\FabricObserver\linux-x64\framework-dependent\FabricObserverType" + Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Linux.SelfContained.3.2.14" "$scriptPath\bin\release\FabricObserver\linux-x64\self-contained\FabricObserverType" + Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Linux.FrameworkDependent.3.2.14" "$scriptPath\bin\release\FabricObserver\linux-x64\framework-dependent\FabricObserverType" - Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Windows.SelfContained.3.2.13" "$scriptPath\bin\release\FabricObserver\win-x64\self-contained\FabricObserverType" - Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Windows.FrameworkDependent.3.2.13" "$scriptPath\bin\release\FabricObserver\win-x64\framework-dependent\FabricObserverType" + Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Windows.SelfContained.3.2.14" "$scriptPath\bin\release\FabricObserver\win-x64\self-contained\FabricObserverType" + Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Windows.FrameworkDependent.3.2.14" "$scriptPath\bin\release\FabricObserver\win-x64\framework-dependent\FabricObserverType" } finally { Pop-Location diff --git a/ClusterObserver.nuspec.template b/ClusterObserver.nuspec.template index 976b802c..ee38a3b6 100644 --- a/ClusterObserver.nuspec.template +++ b/ClusterObserver.nuspec.template @@ -2,10 +2,11 @@ %PACKAGE_ID% - 2.2.7 + 2.2.8 -- Bug Fixes. -- Performance and Code improvements. +- *Breaking Change*: Telemetry configuration settings are now required to be overridden in ApplicationManifest.xml to support versionless, parameter-only application upgrades for telemetry settings. See [Issue 292](https://github.com/microsoft/service-fabric-observer/issues/292) for details. Just move your related settings' Value strings from Settings.xml to ApplicationManifest.xml app parameter (the names of these settings are the same). +- Bug fix in app param update for log path and max archive lifetime settings. +- Updated nuget package dependencies to latest versions. Microsoft MIT diff --git a/ClusterObserver/ClusterObserver.cs b/ClusterObserver/ClusterObserver.cs index f3f86b7a..1c0eb228 100644 --- a/ClusterObserver/ClusterObserver.cs +++ b/ClusterObserver/ClusterObserver.cs @@ -104,7 +104,7 @@ public ClusterObserver(StatelessServiceContext serviceContext, bool ignoreDefaul public override async Task ObserveAsync(CancellationToken token) { if (!IsEnabled || (!IsTelemetryEnabled && !IsEtwEnabled) - || (RunInterval > TimeSpan.MinValue && DateTime.Now.Subtract(LastRunDateTime) < RunInterval)) + || (RunInterval > TimeSpan.Zero && DateTime.Now.Subtract(LastRunDateTime) < RunInterval)) { return; } @@ -816,7 +816,7 @@ private async Task ProcessEntityHealthAsync(EntityHealth entityHea Description = healthEvent.HealthInformation.Description, HealthState = healthEvent.HealthInformation.HealthState, Source = healthEvent.HealthInformation.SourceId, - ObserverName = ClusterObserverConstants.ClusterObserverName + ObserverName = ObserverName }; // Telemetry. @@ -856,7 +856,7 @@ private async Task ProcessEntityHealthAsync(EntityHealth entityHea Description = healthEvent.HealthInformation.Description, HealthState = healthEvent.HealthInformation.HealthState, Source = healthEvent.HealthInformation.SourceId, - ObserverName = ClusterObserverConstants.ClusterObserverName + ObserverName = ObserverName }; // Telemetry. @@ -909,7 +909,7 @@ await FabricClientInstance.QueryManager.GetDeployedServicePackageListAsync( Description = healthEvent.HealthInformation.Description, HealthState = healthEvent.HealthInformation.HealthState, Source = healthEvent.HealthInformation.SourceId, - ObserverName = ClusterObserverConstants.ClusterObserverName + ObserverName = ObserverName }; // Telemetry. @@ -987,7 +987,7 @@ await FabricClientInstance.QueryManager.GetDeployedServicePackageListAsync( Description = healthEvent.HealthInformation.Description, HealthState = healthEvent.HealthInformation.HealthState, Source = healthEvent.HealthInformation.SourceId, - ObserverName = ClusterObserverConstants.ClusterObserverName + ObserverName = ObserverName }; // Telemetry. @@ -1044,7 +1044,7 @@ await FabricClientInstance.QueryManager.GetReplicaListAsync( HealthState = healthEvent.HealthInformation.HealthState, ServiceKind = serviceKind, Source = healthEvent.HealthInformation.SourceId, - ObserverName = ClusterObserverConstants.ClusterObserverName + ObserverName = ObserverName }; // Telemetry. @@ -1104,7 +1104,7 @@ await FabricClientInstance.QueryManager.GetReplicaListAsync( Description = healthEvent.HealthInformation.Description, HealthState = healthEvent.HealthInformation.HealthState, Source = healthEvent.HealthInformation.SourceId, - ObserverName = ClusterObserverConstants.ClusterObserverName + ObserverName = ObserverName }; // Telemetry. diff --git a/ClusterObserver/ClusterObserver.csproj b/ClusterObserver/ClusterObserver.csproj index fe803570..0fe0ac49 100644 --- a/ClusterObserver/ClusterObserver.csproj +++ b/ClusterObserver/ClusterObserver.csproj @@ -10,8 +10,8 @@ win-x64;linux-x64 True ClusterObserver - 2.2.7 - 2.2.7 + 2.2.8 + 2.2.8 true false ClusterObserver.Program diff --git a/ClusterObserver/ClusterObserverManager.cs b/ClusterObserver/ClusterObserverManager.cs index 529bd501..a9bcece2 100644 --- a/ClusterObserver/ClusterObserverManager.cs +++ b/ClusterObserver/ClusterObserverManager.cs @@ -39,7 +39,7 @@ public sealed class ClusterObserverManager : IDisposable private bool appParamsUpdating; // Folks often use their own version numbers. This is for internal diagnostic telemetry. - private const string InternalVersionNumber = "2.2.7"; + private const string InternalVersionNumber = "2.2.8"; public bool EnableOperationalTelemetry { @@ -129,9 +129,21 @@ public ClusterObserverManager(ServiceProvider serviceProvider, CancellationToken } LogPath = logFolderBasePath; + _ = bool.TryParse(GetConfigSettingValue(ObserverConstants.EnableETWProvider, null), out bool enableEtwProvider); + EtwEnabled = enableEtwProvider; - // This logs error/warning/info messages for ObserverManager. - Logger = new Logger(ClusterObserverConstants.ClusterObserverManagerName, logFolderBasePath); + // ObserverManager logger EnableVerboseLogging. + _ = bool.TryParse(GetConfigSettingValue(ObserverConstants.EnableVerboseLoggingParameter, null), out bool enableVerboseLogging); + + // Log archive lifetime. + _ = int.TryParse(GetConfigSettingValue(ObserverConstants.MaxArchivedLogFileLifetimeDaysParameter, null), out int maxArchivedLogFileLifetimeDays); + + // This logs error/warning/info messages for ClusterObserverManager (local text log and optionally ETW). + Logger = new Logger(ClusterObserverConstants.ClusterObserverManagerName, logFolderBasePath, maxArchivedLogFileLifetimeDays) + { + EnableETWLogging = EtwEnabled, + EnableVerboseLogging = enableVerboseLogging + }; SetPropertiesFromConfigurationParameters(); } @@ -186,12 +198,38 @@ private void SetPropertiesFromConfigurationParameters(ConfigurationSettings sett observerExecTimeout = TimeSpan.FromSeconds(result); } - // Logger - if (bool.TryParse(GetConfigSettingValue(ObserverConstants.EnableVerboseLoggingParameter, settings), out bool enableVerboseLogging)) + // Logger settings - Overrides. Config update. \\ + + // settings are not null if this is running due to a config update. Could also check for isConfigurationUpdateInProgress. + if (settings != null && Logger != null) { + // ObserverManager logger EnableETWLogging - Override. + _ = bool.TryParse(GetConfigSettingValue(ObserverConstants.EnableETWProvider, settings), out bool enableEtwProvider); + EtwEnabled = enableEtwProvider; + Logger.EnableETWLogging = enableEtwProvider; + + // ObserverManager logger EnableVerboseLogging - Override. + _ = bool.TryParse(GetConfigSettingValue(ObserverConstants.EnableVerboseLoggingParameter, settings), out bool enableVerboseLogging); Logger.EnableVerboseLogging = enableVerboseLogging; + + // ObserverManager/Observer logger MaxArchiveLifetimeDays - Override. + _ = int.TryParse(GetConfigSettingValue(ObserverConstants.EnableVerboseLoggingParameter, settings), out int maxArchiveLifetimeDays); + Logger.MaxArchiveFileLifetimeDays = maxArchiveLifetimeDays; + + // ObserverManager/Observer logger ObserverLogPath - Override. + string loggerBasePath = GetConfigSettingValue(ObserverConstants.ObserverLogPathParameter, settings); + + if (!string.IsNullOrWhiteSpace(loggerBasePath)) + { + Logger.LogFolderBasePath = loggerBasePath; + } + + // This will reset existing logger instance's config state and employ updated settings immediately. See Logger.cs. + Logger.InitializeLoggers(true); } + // End Logger settings - Overrides. \\ + if (int.TryParse(GetConfigSettingValue(ClusterObserverConstants.ObserverLoopSleepTimeSecondsParameter, settings), out int execFrequency)) { ObserverExecutionLoopSleepSeconds = execFrequency; @@ -220,55 +258,54 @@ private void SetPropertiesFromConfigurationParameters(ConfigurationSettings sett TelemetryEnabled = telemEnabled; } - if (TelemetryEnabled) + if (!TelemetryEnabled) { - string telemetryProviderType = GetConfigSettingValue(ClusterObserverConstants.TelemetryProviderTypeParameter, settings); + return; + } - if (string.IsNullOrWhiteSpace(telemetryProviderType)) - { - TelemetryEnabled = false; - return; - } + string telemetryProviderType = GetConfigSettingValue(ClusterObserverConstants.TelemetryProviderTypeParameter, settings); - if (!Enum.TryParse(telemetryProviderType, out TelemetryProviderType telemetryProvider)) - { - TelemetryEnabled = false; - return; - } + if (string.IsNullOrWhiteSpace(telemetryProviderType)) + { + TelemetryEnabled = false; + return; + } - switch (telemetryProvider) - { - case TelemetryProviderType.AzureLogAnalytics: - - string logAnalyticsLogType = GetConfigSettingValue(ObserverConstants.LogAnalyticsLogTypeParameter, settings) ?? "Application"; - string logAnalyticsSharedKey = GetConfigSettingValue(ObserverConstants.LogAnalyticsSharedKeyParameter, settings); - string logAnalyticsWorkspaceId = GetConfigSettingValue(ObserverConstants.LogAnalyticsWorkspaceIdParameter, settings); + if (!Enum.TryParse(telemetryProviderType, out TelemetryProviderType telemetryProvider)) + { + TelemetryEnabled = false; + return; + } - if (string.IsNullOrWhiteSpace(logAnalyticsSharedKey) || string.IsNullOrWhiteSpace(logAnalyticsWorkspaceId)) - { - TelemetryEnabled = false; - return; - } + switch (telemetryProvider) + { + case TelemetryProviderType.AzureLogAnalytics: - TelemetryClient = new LogAnalyticsTelemetry( - logAnalyticsWorkspaceId, - logAnalyticsSharedKey, - logAnalyticsLogType); - break; - - case TelemetryProviderType.AzureApplicationInsights: - - string aiConnString = GetConfigSettingValue(ObserverConstants.AppInsightsConnectionString, settings); - - if (string.IsNullOrWhiteSpace(aiConnString)) - { - TelemetryEnabled = false; - return; - } + string logAnalyticsLogType = GetConfigSettingValue(ObserverConstants.LogAnalyticsLogTypeParameter, settings) ?? "Application"; + string logAnalyticsSharedKey = GetConfigSettingValue(ObserverConstants.LogAnalyticsSharedKeyParameter, settings); + string logAnalyticsWorkspaceId = GetConfigSettingValue(ObserverConstants.LogAnalyticsWorkspaceIdParameter, settings); - TelemetryClient = new AppInsightsTelemetry(aiConnString); - break; - } + if (string.IsNullOrWhiteSpace(logAnalyticsSharedKey) || string.IsNullOrWhiteSpace(logAnalyticsWorkspaceId)) + { + TelemetryEnabled = false; + return; + } + + TelemetryClient = new LogAnalyticsTelemetry(logAnalyticsWorkspaceId, logAnalyticsSharedKey, logAnalyticsLogType); + break; + + case TelemetryProviderType.AzureApplicationInsights: + + string aiConnString = GetConfigSettingValue(ObserverConstants.AppInsightsConnectionString, settings); + + if (string.IsNullOrWhiteSpace(aiConnString)) + { + TelemetryEnabled = false; + return; + } + + TelemetryClient = new AppInsightsTelemetry(aiConnString); + break; } } @@ -574,28 +611,33 @@ private async Task RunAsync() private async void CodePackageActivationContext_ConfigurationPackageModifiedEvent(object sender, PackageModifiedEventArgs e) { - Logger.LogWarning("Application Parameter upgrade started..."); - try { + Logger.LogWarning("Application Parameter upgrade started..."); + appParamsUpdating = true; await StopAsync(isAppParamUpdate: true); var newSettings = e.NewPackage.Settings; + // ClusterObserverManager settings. + SetPropertiesFromConfigurationParameters(newSettings); + // ClusterObserver and plugin observer settings. foreach (var observer in Observers) { string configSectionName = observer.ConfigurationSettings.ConfigSection.Name; observer.ConfigPackage = e.NewPackage; observer.ConfigurationSettings = new ConfigSettings(newSettings, configSectionName); - observer.ObserverLogger.EnableVerboseLogging = observer.ConfigurationSettings.EnableVerboseLogging; + observer.InitializeObserverLoggingInfra(isConfigUpdate: true); // Reset last run time so the observer restarts (if enabled) after the app parameter update completes. observer.LastRunDateTime = DateTime.MinValue; } - // ClusterObserverManager settings. - SetPropertiesFromConfigurationParameters(newSettings); + // Refresh CO CancellationTokenSources. + cts = new CancellationTokenSource(); + linkedSFRuntimeObserverTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, this.token); + Logger.LogWarning("Application Parameter upgrade completed..."); } catch (Exception ex) when (ex is not OutOfMemoryException) { @@ -607,21 +649,17 @@ private async void CodePackageActivationContext_ConfigurationPackageModifiedEven HealthMessage = $"Error updating ClusterObserver with new configuration settings:{Environment.NewLine}{ex}", NodeName = FabricServiceContext.NodeContext.NodeName, State = HealthState.Ok, - Property = "Configuration_Upate_Error", + Property = "CO_Configuration_Upate_Error", EmitLogEvent = true }; ObserverHealthReporter healthReporter = new(Logger); healthReporter.ReportHealthToServiceFabric(healthReport); } - - // Refresh CO CancellationTokenSources. - cts?.Dispose(); - linkedSFRuntimeObserverTokenSource?.Dispose(); - cts = new CancellationTokenSource(); - linkedSFRuntimeObserverTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, this.token); - Logger.LogWarning("Application Parameter upgrade completed..."); - appParamsUpdating = false; + finally + { + appParamsUpdating = false; + } } private void Dispose(bool disposing) diff --git a/ClusterObserver/PackageRoot/Config/Settings.xml b/ClusterObserver/PackageRoot/Config/Settings.xml index cb44f382..dbb6d99f 100644 --- a/ClusterObserver/PackageRoot/Config/Settings.xml +++ b/ClusterObserver/PackageRoot/Config/Settings.xml @@ -21,26 +21,35 @@ + - + - - - - - - + + + + + + + - - - - - - + + + + - + - + + + + + + diff --git a/ClusterObserver/PackageRoot/Data/Plugins/Readme.txt b/ClusterObserver/PackageRoot/Data/Plugins/Readme.txt index d2747f7b..4ad27483 100644 --- a/ClusterObserver/PackageRoot/Data/Plugins/Readme.txt +++ b/ClusterObserver/PackageRoot/Data/Plugins/Readme.txt @@ -68,5 +68,5 @@ cd C:\Users\me\source\repos\service-fabric-observer ./Build-FabricObserver ./Build-NugetPackages -The output from the above commands, FabricObserver platform-specific nupkgs and a package you have to use for plugin authoring named Microsoft.ServiceFabricApps.FabricObserver.Extensibility.3.2.13.nupkg, would be located in +The output from the above commands, FabricObserver platform-specific nupkgs and a package you have to use for plugin authoring named Microsoft.ServiceFabricApps.FabricObserver.Extensibility.3.2.14.nupkg, would be located in C:\Users\me\source\repos\service-fabric-observer\bin\release\FabricObserver\Nugets. \ No newline at end of file diff --git a/ClusterObserver/PackageRoot/ServiceManifest.xml b/ClusterObserver/PackageRoot/ServiceManifest.xml index 3eb0299c..43b4e5ca 100644 --- a/ClusterObserver/PackageRoot/ServiceManifest.xml +++ b/ClusterObserver/PackageRoot/ServiceManifest.xml @@ -1,6 +1,6 @@  @@ -11,7 +11,7 @@ - + ClusterObserver @@ -21,11 +21,11 @@ - + - + diff --git a/ClusterObserver/Readme.md b/ClusterObserver/Readme.md index de11e459..221f0385 100644 --- a/ClusterObserver/Readme.md +++ b/ClusterObserver/Readme.md @@ -1,4 +1,4 @@ -### ClusterObserver 2.2.7 +### ClusterObserver 2.2.8 #### This version - and all subsequent versions - requires SF Runtime >= 9.0 and targets .NET 6 ClusterObserver (CO) is a stateless singleton Service Fabric .NET 6 service that runs on one node in a cluster. CO observes cluster health (aggregated) @@ -30,7 +30,7 @@ Application Parameter Upgrade Example: ```Powershell $appName = "fabric:/ClusterObserver" -$appVersion = "2.2.7" +$appVersion = "2.2.8" $application = Get-ServiceFabricApplication -ApplicationName $appName @@ -161,7 +161,7 @@ Start-ServiceFabricApplicationUpgrade -ApplicationName $appName -ApplicationType ``` XML - + @@ -190,7 +190,7 @@ Start-ServiceFabricApplicationUpgrade -ApplicationName $appName -ApplicationType should match the Name and Version attributes of the ServiceManifest element defined in the ServiceManifest.xml file. --> - + diff --git a/ClusterObserver/Utilities/ClusterObserverConstants.cs b/ClusterObserver/Utilities/ClusterObserverConstants.cs index 6cdb03a1..753ccf84 100644 --- a/ClusterObserver/Utilities/ClusterObserverConstants.cs +++ b/ClusterObserver/Utilities/ClusterObserverConstants.cs @@ -19,6 +19,7 @@ public static class ClusterObserverConstants public const string ClusterObserverETWEventName = "ClusterObserverDataEvent"; public const string DefaultEventSourceProviderName = "ClusterObserverETWProvider"; public const string FabricObserverName = "FabricObserver"; + public const string MaxArchivedLogFileLifetimeDays = "MaxArchivedLogFileLifetimeDays"; // The name of the package that contains this Observer's configuration. public const string ObserverConfigurationPackageName = "Config"; diff --git a/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml b/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml index f7b9f90f..1bca1d7c 100644 --- a/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml +++ b/ClusterObserverApp/ApplicationPackageRoot/ApplicationManifest.xml @@ -1,19 +1,34 @@  - + + + + + + - + + + + + + + + + - + + - + @@ -28,7 +43,7 @@ should match the Name and Version attributes of the ServiceManifest element defined in the ServiceManifest.xml file. --> - + @@ -36,12 +51,20 @@ + - + + + + + + + +
diff --git a/Documentation/Deployment/service-fabric-cluster-observer.json b/Documentation/Deployment/service-fabric-cluster-observer.json index b05dbab9..958c15c5 100644 --- a/Documentation/Deployment/service-fabric-cluster-observer.json +++ b/Documentation/Deployment/service-fabric-cluster-observer.json @@ -11,7 +11,7 @@ }, "applicationTypeVersionClusterObserver": { "type": "string", - "defaultValue": "2.2.7", + "defaultValue": "2.2.8", "metadata": { "description": "Provide the app version number of ClusterObserver. This must be identical to the version specified in the corresponding sfpkg." } diff --git a/Documentation/Deployment/service-fabric-cluster-observer.v2.2.7.parameters.json b/Documentation/Deployment/service-fabric-cluster-observer.v2.2.8.parameters.json similarity index 90% rename from Documentation/Deployment/service-fabric-cluster-observer.v2.2.7.parameters.json rename to Documentation/Deployment/service-fabric-cluster-observer.v2.2.8.parameters.json index b0d523c0..a6e4ad41 100644 --- a/Documentation/Deployment/service-fabric-cluster-observer.v2.2.7.parameters.json +++ b/Documentation/Deployment/service-fabric-cluster-observer.v2.2.8.parameters.json @@ -6,7 +6,7 @@ "value": "" }, "applicationTypeVersionClusterObserver": { - "value": "2.2.7" + "value": "2.2.8" }, "packageUrlClusterObserver": { "value": "" diff --git a/Documentation/Deployment/service-fabric-observer.json b/Documentation/Deployment/service-fabric-observer.json index 9b5b612d..546bddc3 100644 --- a/Documentation/Deployment/service-fabric-observer.json +++ b/Documentation/Deployment/service-fabric-observer.json @@ -11,16 +11,16 @@ }, "applicationTypeVersionFabricObserver": { "type": "string", - "defaultValue": "3.2.13", + "defaultValue": "3.2.14", "metadata": { - "description": "Provide the app version number of FabricObserver. This must be identical to the version, 3.2.13, in the referenced sfpkg specified in packageUrlFabricObserver." + "description": "Provide the app version number of FabricObserver. This must be identical to the version, 3.2.14, in the referenced sfpkg specified in packageUrlFabricObserver." } }, "packageUrlFabricObserver": { "type": "string", "defaultValue": "", "metadata": { - "description": "This has to be a public accessible URL for the sfpkg file which contains the FabricObserver app package. Example: https://github.com/microsoft/service-fabric-observer/releases/download/[xxxxxxxx]/Microsoft.ServiceFabricApps.FabricObserver.Windows.SelfContained.3.2.13.sfpkg" + "description": "This has to be a public accessible URL for the sfpkg file which contains the FabricObserver app package. Example: https://github.com/microsoft/service-fabric-observer/releases/download/[xxxxxxxx]/Microsoft.ServiceFabricApps.FabricObserver.Windows.SelfContained.3.2.14.sfpkg" } } }, diff --git a/Documentation/Deployment/service-fabric-observer.v3.2.13.parameters.json b/Documentation/Deployment/service-fabric-observer.v3.2.13.parameters.json index 6700cd6e..8ad6fdc6 100644 --- a/Documentation/Deployment/service-fabric-observer.v3.2.13.parameters.json +++ b/Documentation/Deployment/service-fabric-observer.v3.2.13.parameters.json @@ -6,7 +6,7 @@ "value": "" }, "applicationTypeVersionFabricObserver": { - "value": "3.2.13" + "value": "3.2.14" }, "packageUrlFabricObserver": { "value": "" diff --git a/Documentation/OperationalTelemetry.md b/Documentation/OperationalTelemetry.md index 6164b898..4cc237e9 100644 --- a/Documentation/OperationalTelemetry.md +++ b/Documentation/OperationalTelemetry.md @@ -18,7 +18,7 @@ As with most of FabricObserver's application settings, you can also do this with Connect-ServiceFabricCluster ... $appParams = @{ "ObserverManagerEnableOperationalFOTelemetry" = "false"; } -Start-ServiceFabricApplicationUpgrade -ApplicationName fabric:/FabricObserver -ApplicationParameter $appParams -ApplicationTypeVersion 3.2.13 -UnMonitoredAuto +Start-ServiceFabricApplicationUpgrade -ApplicationName fabric:/FabricObserver -ApplicationParameter $appParams -ApplicationTypeVersion 3.2.14 -UnMonitoredAuto ``` @@ -44,7 +44,7 @@ Here is a full example of exactly what is sent in one of these telemetry events, "ClusterId": "00000000-1111-1111-0000-00f00d000d", "ClusterType": "SFRP", "NodeNameHash": "3e83569d4c6aad78083cd081215dafc81e5218556b6a46cb8dd2b183ed0095ad", - "FOVersion": "3.2.13", + "FOVersion": "3.2.14", "HasPlugins": "False", "SFRuntimeVersion":"9.0.1028.9590" "UpTime": "1.00:30:18.8058379", diff --git a/Documentation/Plugins.md b/Documentation/Plugins.md index 1eb8111f..a3d0c3ab 100644 --- a/Documentation/Plugins.md +++ b/Documentation/Plugins.md @@ -72,5 +72,5 @@ cd C:\Users\me\source\repos\service-fabric-observer ./Build-FabricObserver ./Build-NugetPackages ``` -The output from the above commands contains FabricObserver platform-specific nupkgs and a nupkg you have to use for plugin authoring named Microsoft.ServiceFabricApps.FabricObserver.Extensibility.3.2.13.nupkg. Nuget packages will be located in +The output from the above commands contains FabricObserver platform-specific nupkgs and a nupkg you have to use for plugin authoring named Microsoft.ServiceFabricApps.FabricObserver.Extensibility.3.2.14.nupkg. Nuget packages will be located in C:\Users\me\source\repos\service-fabric-observer\bin\release\FabricObserver\Nugets. \ No newline at end of file diff --git a/Documentation/Using.md b/Documentation/Using.md index 5851f8a4..067cfb8b 100644 --- a/Documentation/Using.md +++ b/Documentation/Using.md @@ -710,7 +710,7 @@ $appParams = @{ "FabricSystemObserverEnabled" = "true"; "FabricSystemObserverMem Then execute the application upgrade with ```Powershell -Start-ServiceFabricApplicationUpgrade -ApplicationName fabric:/FabricObserver -ApplicationTypeVersion 3.2.13 -ApplicationParameter $appParams -Monitored -FailureAction rollback +Start-ServiceFabricApplicationUpgrade -ApplicationName fabric:/FabricObserver -ApplicationTypeVersion 3.2.14 -ApplicationParameter $appParams -Monitored -FailureAction rollback ``` **Important**: This action will overwrite previous app paramemter changes that were made in an earlier application upgrade, for example. If you want to preserve any earlier changes, then you will need to @@ -718,7 +718,7 @@ supply those parameter values again along with the new ones. You do this in the ```PowerShell $appName = "fabric:/FabricObserver" -$appVersion = "3.2.13" +$appVersion = "3.2.14" $application = Get-ServiceFabricApplication -ApplicationName $appName $appParamCollection = $application.ApplicationParameters $applicationParameterMap = @{} diff --git a/FabricObserver.Extensibility.nuspec.template b/FabricObserver.Extensibility.nuspec.template index 4b2aa67c..64388e55 100644 --- a/FabricObserver.Extensibility.nuspec.template +++ b/FabricObserver.Extensibility.nuspec.template @@ -2,9 +2,14 @@ %PACKAGE_ID% - 3.2.13 + 3.2.14 -Note: This is library is required for observer plugins that target FabricObserver 3.2.13. +This is version is required for observer plugins that target FabricObserver 3.2.14 or ClusterObserver 2.2.8. +#### Changes +- Updated Logger implementatio to fix app param update bug related to log path and max archive lifetime settings not being honored. +- Updated ObserverBase to include guard rails to prevent high CPU consumption if an observer's MonitorDuration or MonitorSleepTimeMilliseconds settings are misconfigured (to high or too low, respectively). +- Performance improvements. +- Updated nuget package dependencies to latest versions. Microsoft MIT @@ -13,7 +18,7 @@ Note: This is library is required for observer plugins that target FabricObserve icon.png foextlib.md en-US - This package contains the FabricObserver Extensibility library (.NET 6) for use in building FabricObserver and ClusterObserver plugins. NOTE: This is no longer a .NET Standard 2.0 library. + This package contains the FabricObserver Extensibility library (.NET 6) for use in building FabricObserver and ClusterObserver observers and observer plugins. You need to ensure you use the same versions of this library and CO/FO if you are building observer plugins. diff --git a/FabricObserver.Extensibility/FabricObserver.Extensibility.csproj b/FabricObserver.Extensibility/FabricObserver.Extensibility.csproj index 5f82fdbb..07d9f72a 100644 --- a/FabricObserver.Extensibility/FabricObserver.Extensibility.csproj +++ b/FabricObserver.Extensibility/FabricObserver.Extensibility.csproj @@ -5,20 +5,20 @@ FabricObserver Copyright © 2023 FabricObserver - 3.2.13 - 3.2.13 + 3.2.14 + 3.2.14 CA1416 - - + + - - + + - - - + + + diff --git a/FabricObserver.Extensibility/ObserverBase.cs b/FabricObserver.Extensibility/ObserverBase.cs index 7e64a1ed..d6e5fb6e 100644 --- a/FabricObserver.Extensibility/ObserverBase.cs +++ b/FabricObserver.Extensibility/ObserverBase.cs @@ -33,6 +33,17 @@ public abstract class ObserverBase : IDisposable private bool disposed; private ConcurrentDictionary ServiceDumpCountDictionary; private readonly object lockObj = new(); + + private bool IsTelemetryProviderEnabled + { + get; set; + } + + protected ITelemetryProvider TelemetryClient + { + get; set; + } + public volatile bool HasActiveFabricErrorOrWarning; public volatile int CurrentErrorCount; public volatile int CurrentWarningCount; @@ -230,14 +241,6 @@ public bool EnableCsvLogging } } - /// - /// The maximum number of days an archived observer log file will be stored. After this time, it will be deleted from disk. - /// - public int MaxLogArchiveFileLifetimeDays - { - get; set; - } - /// /// The maximum number of days a csv file produced by CsvLogger will be stored. After this time, it will be deleted from disk. /// @@ -261,11 +264,6 @@ public ConcurrentQueue ServiceNames get; set; } = new ConcurrentQueue(); - public string ServiceNamesLogPath - { - get; set; - } - public int MonitoredServiceProcessCount { get; set; @@ -278,13 +276,13 @@ public int MonitoredAppCount public TimeSpan RunInterval { - get => ConfigurationSettings?.RunInterval ?? TimeSpan.MinValue; + get => ConfigurationSettings?.RunInterval ?? TimeSpan.FromMinutes(1); set { if (ConfigurationSettings != null) { - ConfigurationSettings.RunInterval = value; + ConfigurationSettings.RunInterval = value > TimeSpan.Zero ? value : TimeSpan.FromMinutes(1); } } } @@ -320,40 +318,31 @@ public bool UseCircularBuffer } } - public TimeSpan MonitorDuration + public TimeSpan CpuMonitorDuration { - get => ConfigurationSettings?.MonitorDuration ?? TimeSpan.MinValue; + get => ConfigurationSettings?.CpuMonitorDuration ?? TimeSpan.FromSeconds(3); set { if (ConfigurationSettings != null) { - ConfigurationSettings.MonitorDuration = value; + ConfigurationSettings.CpuMonitorDuration = value; } } } - public TimeSpan MonitorSleepDuration + public TimeSpan CpuMonitorLoopSleepDuration { - get => ConfigurationSettings?.MonitorSleepDuration ?? TimeSpan.MinValue; + get => ConfigurationSettings?.CpuMonitorSleepDuration ?? TimeSpan.FromMilliseconds(1000); set { if (ConfigurationSettings != null) { - ConfigurationSettings.MonitorSleepDuration = value; + // Prevent bad values. + ConfigurationSettings.CpuMonitorSleepDuration = value >= TimeSpan.FromMilliseconds(500) ? value : TimeSpan.FromMilliseconds(1000); } } } - protected bool IsTelemetryProviderEnabled - { - get; set; - } - - protected ITelemetryProvider TelemetryClient - { - get; set; - } - public bool IsEtwProviderEnabled { get; set; @@ -389,18 +378,30 @@ protected ObserverBase(FabricClient fabricClient, StatelessServiceContext servic ConfigPackage = serviceContext.CodePackageActivationContext.GetConfigurationPackageObject("Config"); CodePackage = serviceContext.CodePackageActivationContext.GetCodePackageObject("Code"); FabricServiceContext = serviceContext; - - SetObserverStaticConfiguration(); + ConfigurationSettings = new ConfigSettings(ConfigPackage.Settings, ConfigurationSectionName); if (ObserverName == ObserverConstants.AppObserverName) { ServiceNames.Enqueue(ServiceName.OriginalString); } + InitializeObserverLoggingInfra(); + + HealthReporter = new ObserverHealthReporter(ObserverLogger); + + IsObserverWebApiAppDeployed = + bool.TryParse( + GetSettingParameterValue( + ObserverConstants.ObserverManagerConfigurationSectionName, + ObserverConstants.ObserverWebApiEnabled), out bool obsWeb) && obsWeb && IsObserverWebApiAppInstalled(); + } + + public void InitializeObserverLoggingInfra(bool isConfigUpdate = false) + { // Observer Logger setup. string logFolderBasePath; string observerLogPath = GetSettingParameterValue(ObserverConstants.ObserverManagerConfigurationSectionName, ObserverConstants.ObserverLogPathParameter); - + if (!string.IsNullOrWhiteSpace(observerLogPath)) { logFolderBasePath = observerLogPath; @@ -411,21 +412,30 @@ protected ObserverBase(FabricClient fabricClient, StatelessServiceContext servic logFolderBasePath = logFolderBase; } - ObserverLogger = new Logger(ObserverName, logFolderBasePath, MaxLogArchiveFileLifetimeDays) - { - EnableETWLogging = IsEtwProviderEnabled - }; + // Archive file lifetime - ObserverLogger files. + _ = int.TryParse( + GetSettingParameterValue( + ObserverConstants.ObserverManagerConfigurationSectionName, + ObserverConstants.MaxArchivedLogFileLifetimeDaysParameter), out int maxFileArchiveLifetime); - ConfigurationSettings = new ConfigSettings(ConfigPackage.Settings, ConfigurationSectionName); - ObserverLogger.EnableVerboseLogging = ConfigurationSettings.EnableVerboseLogging; - HealthReporter = new ObserverHealthReporter(ObserverLogger); - IsObserverWebApiAppDeployed = - bool.TryParse( - GetSettingParameterValue( - ObserverConstants.ObserverManagerConfigurationSectionName, - ObserverConstants.ObserverWebApiEnabled), out bool obsWeb) && obsWeb && IsObserverWebApiAppInstalled(); + SetObserverEtwTelemetryConfiguration(); - ServiceNamesLogPath = Path.Combine(ObserverLogger.LogFolderBasePath, ObserverName, "ServiceNames", "Services.txt"); + if (ObserverLogger == null) + { + ObserverLogger = new Logger(ObserverName, logFolderBasePath, maxFileArchiveLifetime > 0 ? maxFileArchiveLifetime : 7) + { + EnableETWLogging = IsEtwProviderEnabled && ConfigurationSettings.IsObserverEtwEnabled, + EnableVerboseLogging = ConfigurationSettings.EnableVerboseLogging + }; + } + else if (isConfigUpdate) + { + ObserverLogger.EnableETWLogging = IsEtwProviderEnabled && ConfigurationSettings.IsObserverEtwEnabled; + ObserverLogger.EnableVerboseLogging = ConfigurationSettings.EnableVerboseLogging; + ObserverLogger.LogFolderBasePath = logFolderBasePath; + ObserverLogger.MaxArchiveFileLifetimeDays = maxFileArchiveLifetime > 0 ? maxFileArchiveLifetime : 7; + ObserverLogger.InitializeLoggers(isConfigUpdate); + } } /// @@ -1468,11 +1478,11 @@ public TimeSpan GetHealthReportTTL() { return TimeSpan.FromSeconds(obsSleepTime) .Add(TimeSpan.FromMinutes(TtlAddMinutes)) - .Add(RunInterval > TimeSpan.MinValue ? RunInterval : TimeSpan.Zero); + .Add(RunInterval > TimeSpan.Zero ? RunInterval : TimeSpan.Zero); } return DateTime.Now.Subtract(LastRunDateTime) - .Add(TimeSpan.FromSeconds(RunDuration > TimeSpan.MinValue ? RunDuration.TotalSeconds : 0)) + .Add(TimeSpan.FromSeconds(RunDuration > TimeSpan.Zero ? RunDuration.TotalSeconds : 0)) .Add(TimeSpan.FromSeconds(obsSleepTime)); } @@ -1491,31 +1501,23 @@ protected virtual void Dispose(bool disposing) } } - // Non-App parameters settings (set in Settings.xml only). - private void SetObserverStaticConfiguration() + public void SetObserverEtwTelemetryConfiguration() { - // Archive file lifetime - ObserverLogger files. - if (int.TryParse( - GetSettingParameterValue(ObserverConstants.ObserverManagerConfigurationSectionName, ObserverConstants.MaxArchivedLogFileLifetimeDaysParameter), out int maxFileArchiveLifetime)) - { - MaxLogArchiveFileLifetimeDays = maxFileArchiveLifetime; - } - - // ETW + // ETW Provider. if (bool.TryParse( GetSettingParameterValue(ObserverConstants.ObserverManagerConfigurationSectionName, ObserverConstants.EnableETWProvider), out bool etwProviderEnabled)) { IsEtwProviderEnabled = etwProviderEnabled; } - // Telemetry. + // Telemetry Provider. if (bool.TryParse( - GetSettingParameterValue(ObserverConstants.ObserverManagerConfigurationSectionName, ObserverConstants.TelemetryEnabled), out bool telemEnabled)) + GetSettingParameterValue(ObserverConstants.ObserverManagerConfigurationSectionName, ObserverConstants.TelemetryProviderEnabled), out bool telemEnabled)) { IsTelemetryProviderEnabled = telemEnabled; } - if (!IsTelemetryProviderEnabled) + if (!IsTelemetryProviderEnabled || !IsTelemetryEnabled) { return; } @@ -1524,15 +1526,9 @@ private void SetObserverStaticConfiguration() GetSettingParameterValue( ObserverConstants.ObserverManagerConfigurationSectionName, ObserverConstants.TelemetryProviderType); - if (string.IsNullOrWhiteSpace(telemetryProviderType)) - { - IsTelemetryProviderEnabled = false; - return; - } - - if (!Enum.TryParse(telemetryProviderType, out TelemetryProviderType telemetryProvider)) + if (string.IsNullOrWhiteSpace(telemetryProviderType) || !Enum.TryParse(telemetryProviderType, out TelemetryProviderType telemetryProvider)) { - IsTelemetryProviderEnabled = false; + IsTelemetryEnabled = false; return; } @@ -1554,7 +1550,7 @@ private void SetObserverStaticConfiguration() if (string.IsNullOrWhiteSpace(logAnalyticsWorkspaceId) || string.IsNullOrWhiteSpace(logAnalyticsSharedKey)) { - IsTelemetryProviderEnabled = false; + IsTelemetryEnabled = false; return; } @@ -1568,7 +1564,7 @@ private void SetObserverStaticConfiguration() if (string.IsNullOrWhiteSpace(aiConnString)) { - IsTelemetryProviderEnabled = false; + IsTelemetryEnabled = false; return; } @@ -1577,7 +1573,7 @@ private void SetObserverStaticConfiguration() default: - IsTelemetryProviderEnabled = false; + IsTelemetryEnabled = false; break; } } diff --git a/FabricObserver.Extensibility/Utilities/ConfigSettings.cs b/FabricObserver.Extensibility/Utilities/ConfigSettings.cs index a9b0365a..8b2f6245 100644 --- a/FabricObserver.Extensibility/Utilities/ConfigSettings.cs +++ b/FabricObserver.Extensibility/Utilities/ConfigSettings.cs @@ -13,20 +13,23 @@ namespace FabricObserver.Observers.Utilities { public class ConfigSettings { + // Default run interval is 1m. public TimeSpan RunInterval { get; set; - } + } = TimeSpan.FromMinutes(1); - public TimeSpan MonitorDuration + // Default monitor duration is 4s. + public TimeSpan CpuMonitorDuration { get; set; - } + } = TimeSpan.FromSeconds(4); - public TimeSpan MonitorSleepDuration + // Default monitor sleep duration is 1000ms. + public TimeSpan CpuMonitorSleepDuration { get; set; - } + } = TimeSpan.FromMilliseconds(1000); // Default enablement for any observer is enabled (true). public bool IsEnabled @@ -148,7 +151,7 @@ private void SetConfigSettings() ObserverConstants.MonitorDurationParameter), out TimeSpan monitorDuration)) { - MonitorDuration = monitorDuration; + CpuMonitorDuration = monitorDuration; } // Monitor sleep duration. @@ -157,7 +160,7 @@ private void SetConfigSettings() ObserverConstants.MonitorSleepDurationParameter), out int monitorSleepDuration)) { - MonitorSleepDuration = TimeSpan.FromMilliseconds(monitorSleepDuration); + CpuMonitorSleepDuration = TimeSpan.FromMilliseconds(monitorSleepDuration); } // Async cluster operation timeout setting.. @@ -236,7 +239,7 @@ public void UpdateConfigSettings(IEnumerable props) { if (TimeSpan.TryParse(prop.Value, out TimeSpan monitorDuration)) { - MonitorDuration = monitorDuration; + CpuMonitorDuration = monitorDuration; } } @@ -245,7 +248,7 @@ public void UpdateConfigSettings(IEnumerable props) { if (TimeSpan.TryParse(prop.Value, out TimeSpan monitorSleepDuration)) { - MonitorSleepDuration = monitorSleepDuration; + CpuMonitorSleepDuration = monitorSleepDuration; } } diff --git a/FabricObserver.Extensibility/Utilities/CpuUtilization/WindowsCpuUtilizationProvider.cs b/FabricObserver.Extensibility/Utilities/CpuUtilization/WindowsCpuUtilizationProvider.cs index 85dc2ffc..5afc9ebe 100644 --- a/FabricObserver.Extensibility/Utilities/CpuUtilization/WindowsCpuUtilizationProvider.cs +++ b/FabricObserver.Extensibility/Utilities/CpuUtilization/WindowsCpuUtilizationProvider.cs @@ -15,30 +15,26 @@ public class WindowsCpuUtilizationProvider : CpuUtilizationProvider // \Processor(_Total)\% Processor Time // This counter includes all processors on the system. The value range is 0 - 100. - private static PerformanceCounter systemCpuPerfCtr = null; - - private static PerformanceCounter SystemMemoryPerfCtr + private static PerformanceCounter systemCpuPerfCounter = null; + + private static PerformanceCounter SystemCpuPerfCounter { get { - if (systemCpuPerfCtr == null) - { - systemCpuPerfCtr = new(ProcessorCategoryName, ProcessorTimePct, ProcessorTimeInstanceName); - } - - return systemCpuPerfCtr; + systemCpuPerfCounter ??= new(ProcessorCategoryName, ProcessorTimePct, ProcessorTimeInstanceName); + return systemCpuPerfCounter; } } public override float GetProcessorTimePercentage() { - return SystemMemoryPerfCtr.NextValue(); + return SystemCpuPerfCounter.NextValue(); } public override void Dispose() { - systemCpuPerfCtr?.Dispose(); - systemCpuPerfCtr = null; + systemCpuPerfCounter?.Dispose(); + systemCpuPerfCounter = null; } } } diff --git a/FabricObserver.Extensibility/Utilities/Logger.cs b/FabricObserver.Extensibility/Utilities/Logger.cs index 89f10f8c..c8cb461e 100644 --- a/FabricObserver.Extensibility/Utilities/Logger.cs +++ b/FabricObserver.Extensibility/Utilities/Logger.cs @@ -7,6 +7,7 @@ using System.Diagnostics.Tracing; using System.Fabric.Health; using System.IO; +using System.Linq; using System.Threading; using FabricObserver.Observers.Utilities.Telemetry; using NLog; @@ -64,9 +65,9 @@ public string FilePath /// The maximum number of days that archive files will be stored. /// 0 means there is no limit set. /// - private int MaxArchiveFileLifetimeDays + public int MaxArchiveFileLifetimeDays { - get; + get; set; } /// @@ -160,7 +161,7 @@ public void LogEtw(string eventName, T eventData) ServiceEventSource.Current.Write(new { data }, eventName, keywords); } - private void InitializeLoggers() + public void InitializeLoggers(bool isConfigUpdate = false) { string logFolderBase; @@ -210,14 +211,13 @@ private void InitializeLoggers() } FilePath = file; - - var targetName = loggerName + "LogFile"; - + string targetName = loggerName + "LogFile"; LogManager.Configuration ??= new LoggingConfiguration(); + FileTarget target; - if ((FileTarget)LogManager.Configuration?.FindTargetByName(targetName) == null) + if (LogManager.Configuration.FindTargetByName(targetName) is not FileTarget) { - var target = new FileTarget + target = new FileTarget { Name = targetName, ConcurrentWrites = true, @@ -232,11 +232,43 @@ private void InitializeLoggers() }; LogManager.Configuration.AddTarget(loggerName + "LogFile", target); - var ruleInfo = new LoggingRule(loggerName, NLog.LogLevel.Debug, target); + LoggingRule ruleInfo = new(loggerName, NLog.LogLevel.Debug, target); LogManager.Configuration.LoggingRules.Add(ruleInfo); - LogManager.ReconfigExistingLoggers(); + } + else if (isConfigUpdate) + { + // Remove existing config. + LogManager.Configuration.RemoveTarget(targetName); + + target = new FileTarget + { + Name = targetName, + ConcurrentWrites = true, + EnableFileDelete = true, + FileName = file, + Layout = "${longdate}--${uppercase:${level}}--${message}", + OpenFileCacheTimeout = 5, + ArchiveEvery = FileArchivePeriod.Day, + ArchiveNumbering = ArchiveNumberingMode.DateAndSequence, + MaxArchiveDays = MaxArchiveFileLifetimeDays <= 0 ? 7 : MaxArchiveFileLifetimeDays, + AutoFlush = true + }; + LogManager.Configuration.AddTarget(targetName, target); + LoggingRule rule; + + if (LogManager.Configuration.LoggingRules.Any(r => r.LoggerNamePattern == loggerName)) + { + rule = LogManager.Configuration.LoggingRules.First(r => r.LoggerNamePattern == loggerName); + rule.Targets.Add(target); + } + else + { + rule = new(loggerName, NLog.LogLevel.Debug, target); + LogManager.Configuration.LoggingRules.Add(rule); + } } + LogManager.ReconfigExistingLoggers(); TimeSource.Current = new AccurateUtcTimeSource(); OLogger = LogManager.GetLogger(loggerName); diff --git a/FabricObserver.Extensibility/Utilities/NativeMethods.cs b/FabricObserver.Extensibility/Utilities/NativeMethods.cs index 9a64e2de..b788e0b1 100644 --- a/FabricObserver.Extensibility/Utilities/NativeMethods.cs +++ b/FabricObserver.Extensibility/Utilities/NativeMethods.cs @@ -43,11 +43,11 @@ public static class NativeMethods "taskhostw.exe", "TextInputHost.exe", "wininit.exe", "winlogon.exe", "WmiPrvSE.exe", "WUDFHost.exe", "vmcompute.exe", "vmms.exe", "vmwp.exe", "vmmem" }; - private static readonly string[] ignoreFabricSystemServicesList = new string[] + private static readonly string[] fabricSystemServicesList = new string[] { "EventStore.Service.exe", "Fabric.exe", "FabricHost.exe", "FabricApplicationGateway.exe", "FabricCAS.exe", "FabricDCA.exe", "FabricDnsService.exe", "FabricFAS.exe", "FabricGateway.exe", - "FabricHost.exe", "FabricIS.exe", "FabricRM.exe", "FabricUS.exe" + "FabricHost.exe", "FabricImage.exe", "FabricIS.exe", "FabricRM.exe", "FabricUS.exe" }; // These are only read from concurrently. These do not need to be ConcurrentDictionaries. @@ -1332,8 +1332,8 @@ public static bool RefreshSFUserProcessDataCache(bool getChildProcesses = false) result.Add((procName.Replace(".exe", string.Empty), (int)pid)); } - // We only care about FabricHost's direct children, which are SF system service processes. - if (!FindInStringArray(ignoreFabricSystemServicesList, procName)) + // We only care about FabricHost's direct descendants that are SF system service processes. + if (!FindInStringArray(fabricSystemServicesList, procName)) { continue; } @@ -1534,7 +1534,7 @@ public static void ClearSFUserProcessDataCache() // Filter out the procs we know are not the droids we're looking for just by name or pid. if (procEntry.th32ProcessID == 0 || FindInStringArray(ignoreProcessList, procEntry.szExeFile) - || FindInStringArray(ignoreFabricSystemServicesList, procEntry.szExeFile)) + || FindInStringArray(fabricSystemServicesList, procEntry.szExeFile)) { continue; } @@ -1967,7 +1967,7 @@ internal static bool GetSytemPerformanceInfo(ref PerformanceInformation pi) string procName = Path.GetFileName(snapshot.ImageFileName); // We don't care about SF system service procs. - if (FindInStringArray(ignoreFabricSystemServicesList, procName)) + if (FindInStringArray(fabricSystemServicesList, procName)) { continue; } diff --git a/FabricObserver.Extensibility/Utilities/ObserverConstants.cs b/FabricObserver.Extensibility/Utilities/ObserverConstants.cs index 8553dc10..ef7af357 100644 --- a/FabricObserver.Extensibility/Utilities/ObserverConstants.cs +++ b/FabricObserver.Extensibility/Utilities/ObserverConstants.cs @@ -164,7 +164,7 @@ public sealed class ObserverConstants // Telemetry Settings Parameters. public const string AiKey = "AppInsightsInstrumentationKey"; public const string AppInsightsConnectionString = "AppInsightsConnectionString"; - public const string TelemetryEnabled = "EnableTelemetryProvider"; + public const string TelemetryProviderEnabled = "EnableTelemetryProvider"; public const string TelemetryProviderType = "TelemetryProvider"; public const string LogAnalyticsLogTypeParameter = "LogAnalyticsLogType"; public const string LogAnalyticsSharedKeyParameter = "LogAnalyticsSharedKey"; diff --git a/FabricObserver.Extensibility/Utilities/ProcessInfo/WindowsProcessInfoProvider.cs b/FabricObserver.Extensibility/Utilities/ProcessInfo/WindowsProcessInfoProvider.cs index 1ac15418..3e160855 100644 --- a/FabricObserver.Extensibility/Utilities/ProcessInfo/WindowsProcessInfoProvider.cs +++ b/FabricObserver.Extensibility/Utilities/ProcessInfo/WindowsProcessInfoProvider.cs @@ -37,7 +37,7 @@ private static string WinFabDbCategoryName { try { - return ServiceFabricConfiguration.Instance.FabricVersion.StartsWith("1") ? "MSExchange Database" : "Windows Fabric Database"; + return ServiceFabricConfiguration.Instance.FabricVersion.StartsWith('1') ? "MSExchange Database" : "Windows Fabric Database"; } catch (ArgumentException ae) { @@ -319,6 +319,10 @@ categoryName and counterName are never null (they are const strings). using PerformanceCounter LvidCounter = new(WinFabDbCategoryName, LVIDCounterName, internalProcName, true); float result = LvidCounter.NextValue(); + + // DEBUG + //ProcessInfoLogger.LogInfo($"GetProcessKvsLvidsUsagePercentage: {internalProcName} LVIDs used: {result}. CategoryName: {WinFabDbCategoryName}"); + double usedPct = (double)(result * 100) / int.MaxValue; return usedPct; } diff --git a/FabricObserver.Extensibility/Utilities/ServiceFabric/FabricClientUtilities.cs b/FabricObserver.Extensibility/Utilities/ServiceFabric/FabricClientUtilities.cs index f274832f..3453164d 100644 --- a/FabricObserver.Extensibility/Utilities/ServiceFabric/FabricClientUtilities.cs +++ b/FabricObserver.Extensibility/Utilities/ServiceFabric/FabricClientUtilities.cs @@ -68,7 +68,7 @@ public static FabricClient FabricClientSingleton { // This call with throw an ObjectDisposedException if fabricClient was disposed by, say, a plugin or if the runtime // disposed of it for some reason (FO replica restart, for example). This is just a test to ensure it is not in a disposed state. - if (fabricClient.Settings.HealthReportSendInterval > TimeSpan.MinValue) + if (fabricClient.Settings.HealthReportSendInterval > TimeSpan.Zero) { return fabricClient; } diff --git a/FabricObserver.Extensibility/Utilities/Telemetry/AppInsightsTelemetry.cs b/FabricObserver.Extensibility/Utilities/Telemetry/AppInsightsTelemetry.cs index 423b410a..73cbbc2e 100644 --- a/FabricObserver.Extensibility/Utilities/Telemetry/AppInsightsTelemetry.cs +++ b/FabricObserver.Extensibility/Utilities/Telemetry/AppInsightsTelemetry.cs @@ -336,7 +336,14 @@ public Task ReportHealthAsync(TelemetryDataBase telemetryData, CancellationToken if (properties != null) { - telemetryClient.TrackEvent("FabricObserver.EntityHealthData", properties, metric); + string source = "FabricObserver"; + + if (telemetryData.ObserverName == "ClusterObserver") + { + source = "ClusterObserver"; + } + + telemetryClient.TrackEvent($"{source}.EntityHealthData", properties, metric); } } catch (Exception e) when (e is not OutOfMemoryException) diff --git a/FabricObserver.nuspec.template b/FabricObserver.nuspec.template index 918cdc7f..d0fdd23a 100644 --- a/FabricObserver.nuspec.template +++ b/FabricObserver.nuspec.template @@ -2,9 +2,12 @@ %PACKAGE_ID% - 3.2.13 + 3.2.14 -- Added feature for applications to set AppObserver thresholds in Application Manifest. This release only allows for specifying RG memory threshold settings for AppObserver in FO's ApplicationManifest. Support for other AppObserver Application Parameter settings will come in future releases. +- *Breaking Change*: Telemetry configuration settings are now required to be overridden in ApplicationManifest.xml to support versionless, parameter-only application upgrades for telemetry settings. See [Issue 292](https://github.com/microsoft/service-fabric-observer/issues/292) for details. Just move your related settings' Value strings from Settings.xml to ApplicationManifest.xml app parameter (the names of these settings are the same). +- Bug fixes and performance improvements to address AppObserver (in concurrency mode) consuming high CPU if misconfigured. There are now throttles and guardrails in place that will override any related user configuration that is deemed "dangerous" with respect to CPU consumption. +- Bug fix in app param update for log path and max archive lifetime settings. +- Updated nuget package dependencies to latest versions. Microsoft MIT @@ -13,7 +16,7 @@ icon.png fonuget.md en-US - This package contains the FabricObserver(FO) Application - built for .NET 6.0 and SF Runtime 9.x. FO a highly configurable and extensible resource usage watchdog service that is designed to be run in Azure Service Fabric Windows and Linux clusters. This package contains the entire application and can be used to build .NET Standard 2.0 observer plugins. NOTE: If you want to target .NET 6 for your plugins, then you must use Microsoft.ServiceFabricApps.FabricObserver.Extensibility.3.2.13 nuget package to build them. + This package contains the FabricObserver(FO) Application - built for .NET 6.0 and SF Runtime 9.x. FO a highly configurable and extensible resource usage watchdog service that is designed to be run in Azure Service Fabric Windows and Linux clusters. This package contains the entire application and can be used to build .NET Standard 2.0 observer plugins. NOTE: If you want to target .NET 6 for your plugins, then you must use Microsoft.ServiceFabricApps.FabricObserver.Extensibility.3.2.14 nuget package to build them. diff --git a/FabricObserver.sln b/FabricObserver.sln index 491785e5..9374ff23 100644 --- a/FabricObserver.sln +++ b/FabricObserver.sln @@ -35,8 +35,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution README.md = README.md SECURITY.md = SECURITY.md Documentation\Deployment\service-fabric-cluster-observer.json = Documentation\Deployment\service-fabric-cluster-observer.json - Documentation\Deployment\service-fabric-cluster-observer.v2.2.7.parameters.json = Documentation\Deployment\service-fabric-cluster-observer.v2.2.7.parameters.json Documentation\Deployment\service-fabric-observer.json = Documentation\Deployment\service-fabric-observer.json + Documentation\Deployment\service-fabric-cluster-observer.v2.2.8.parameters.json = Documentation\Deployment\service-fabric-cluster-observer.v2.2.8.parameters.json Documentation\Deployment\service-fabric-observer.v3.2.13.parameters.json = Documentation\Deployment\service-fabric-observer.v3.2.13.parameters.json Documentation\Using.md = Documentation\Using.md EndProjectSection diff --git a/FabricObserver/FabricObserver.csproj b/FabricObserver/FabricObserver.csproj index 72842c2e..2d0dca0e 100644 --- a/FabricObserver/FabricObserver.csproj +++ b/FabricObserver/FabricObserver.csproj @@ -11,8 +11,8 @@ True Copyright © 2022 FabricObserver - 3.2.13 - 3.2.13 + 3.2.14 + 3.2.14 true true FabricObserver.Program @@ -30,9 +30,9 @@ - - - + + + @@ -72,7 +72,7 @@ PreserveNewest - PreserveNewest + Always PreserveNewest diff --git a/FabricObserver/NLog.config b/FabricObserver/NLog.config index 966f81ae..84f76440 100644 --- a/FabricObserver/NLog.config +++ b/FabricObserver/NLog.config @@ -1,38 +1,35 @@ - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/FabricObserver/Observers/AppObserver.cs b/FabricObserver/Observers/AppObserver.cs index f15e0267..35fe0d72 100644 --- a/FabricObserver/Observers/AppObserver.cs +++ b/FabricObserver/Observers/AppObserver.cs @@ -186,14 +186,14 @@ public override async Task ObserveAsync(CancellationToken token) { ObserverLogger.LogInfo($"Started ObserveAsync."); - if (RunInterval > TimeSpan.MinValue && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) + if (RunInterval > TimeSpan.Zero && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) { - ObserverLogger.LogInfo($"RunInterval ({RunInterval}) has not elapsed. Exiting."); + ObserverLogger.LogInfo($"ObserveAsync: RunInterval ({RunInterval}) has not elapsed. Exiting."); return; } Token = token; - stopwatch.Start(); + stopwatch.Restart(); try { @@ -203,7 +203,6 @@ public override async Task ObserveAsync(CancellationToken token) { ObserverLogger.LogWarning("AppObserver was unable to initialize correctly due to misconfiguration. " + "Please check your AppObserver configuration settings."); - stopwatch.Stop(); stopwatch.Reset(); CleanUp(); LastRunDateTime = DateTime.Now; @@ -227,26 +226,36 @@ public override async Task ObserveAsync(CancellationToken token) throw; } - ParallelLoopResult result = await MonitorDeployedAppsAsync(token); - - if (result.IsCompleted) - { - await ReportAsync(token); + try + { + ParallelLoopResult result = await MonitorDeployedAppsAsync(token); + + if (result.IsCompleted) + { + await ReportAsync(token); + } + + stopwatch.Stop(); + RunDuration = stopwatch.Elapsed; + + if (EnableVerboseLogging) + { + ObserverLogger.LogInfo($"Run Duration ({ReplicaOrInstanceList?.Count} service processes observed) {(parallelOptions.MaxDegreeOfParallelism == 1 ? "without" : "with")} " + + $"Parallel Processing (MaxDegreeOfParallelism: {parallelOptions.MaxDegreeOfParallelism}): {RunDuration}."); + } + + ObserverLogger.LogInfo($"Completed ObserveAsync."); + } + catch (Exception e) when (e is not OutOfMemoryException) + { + ObserverLogger.LogError($"Unhandled exception in ObserveAsync: {e.Message}"); + throw; } - - stopwatch.Stop(); - RunDuration = stopwatch.Elapsed; - - if (EnableVerboseLogging) - { - ObserverLogger.LogInfo($"Run Duration ({ReplicaOrInstanceList?.Count} service processes observed) {(parallelOptions.MaxDegreeOfParallelism == 1 ? "without" : "with")} " + - $"Parallel Processing (MaxDegreeOfParallelism: {parallelOptions.MaxDegreeOfParallelism}): {RunDuration}."); + finally + { + CleanUp(); + LastRunDateTime = DateTime.Now; } - - CleanUp(); - stopwatch.Reset(); - ObserverLogger.LogInfo($"Completed ObserveAsync."); - LastRunDateTime = DateTime.Now; } public override Task ReportAsync(CancellationToken token) @@ -2524,9 +2533,9 @@ any processes that the service process (parent) created/spawned (children). */ { capacity = DataCapacity > 0 ? DataCapacity : 5; } - else if (MonitorDuration > TimeSpan.MinValue) + else if (CpuMonitorDuration > TimeSpan.Zero) { - capacity = MonitorDuration.Seconds * 4; + capacity = CpuMonitorDuration.Seconds * 4; } // CPU @@ -2743,11 +2752,6 @@ any processes that the service process (parent) created/spawned (children). */ exceptions.Enqueue(e); } }); - - if (!exceptions.IsEmpty) - { - throw new AggregateException(exceptions); - } // Perf string threads = string.Empty; @@ -2756,10 +2760,16 @@ any processes that the service process (parent) created/spawned (children). */ threadData.Clear(); threadData = null; execTimer.Stop(); - ObserverLogger.LogInfo("Completed MonitorDeployedAppsAsync."); ObserverLogger.LogInfo($"MonitorDeployedAppsAsync Execution time: {execTimer.Elapsed}{threads}"); - execTimer = null; - + execTimer = null; + + if (!exceptions.IsEmpty) + { + ObserverLogger.LogInfo("Completed MonitorDeployedAppsAsync with one or more exceptions inside task. Throwing inner exceptions aggregate."); + throw new AggregateException(exceptions); + } + + ObserverLogger.LogInfo("Completed MonitorDeployedAppsAsync."); return Task.FromResult(result); } @@ -2809,19 +2819,6 @@ private void ComputeResourceUsage( return; } - TimeSpan duration = TimeSpan.FromSeconds(1); - TimeSpan sleep = TimeSpan.FromMilliseconds(50); - - if (MonitorDuration > TimeSpan.MinValue) - { - duration = MonitorDuration; - } - - if (MonitorSleepDuration > TimeSpan.MinValue) - { - sleep = MonitorSleepDuration; - } - // Handles/FDs if (checkHandles) { @@ -3133,10 +3130,18 @@ private void ComputeResourceUsage( // CPU \\ + SafeProcessHandle procHandle = null; ICpuUsage cpuUsage; if (IsWindows) { + procHandle = NativeMethods.GetSafeProcessHandle(procId); + + if (procHandle == null || procHandle.IsClosed || procHandle.IsInvalid) + { + return; + } + cpuUsage = new CpuUsageWin32(); } else @@ -3144,96 +3149,121 @@ private void ComputeResourceUsage( cpuUsage = new CpuUsageProcess(); } - Stopwatch timer = Stopwatch.StartNew(); - SafeProcessHandle procHandle = null; - - if (IsWindows) - { - procHandle = NativeMethods.GetSafeProcessHandle(procId); + // CpuMonitorDuration can't be set to greater than 10s. + TimeSpan cpuMonitorDuration = CpuMonitorDuration <= TimeSpan.FromSeconds(10) ? CpuMonitorDuration : TimeSpan.FromSeconds(10); - if (procHandle == null || procHandle.IsClosed || procHandle.IsInvalid) - { - return; - } + // CpuMonitorLoopSleepDuration can't be set to less than 500 milliseconds. + TimeSpan cpuMonitorLoopSleepTime = CpuMonitorLoopSleepDuration; + + // At least one value is needed to compute CPU Time % (in fact, more than one is best on Windows). If the user misconfigures sleep time to be greater than monitor duration, + // then we'll just set it to 1000 ms. + if (cpuMonitorLoopSleepTime > cpuMonitorDuration) + { + cpuMonitorLoopSleepTime = TimeSpan.FromMilliseconds(1000); } - while (timer.Elapsed <= duration) + // Limit potential for high CPU usage by throttling max duration when monitoring CPU usage with multiple threads. + if (EnableConcurrentMonitoring) { - if (token.IsCancellationRequested) + if (cpuMonitorDuration >= TimeSpan.FromSeconds(5)) { - state.Stop(); + cpuMonitorDuration = TimeSpan.FromSeconds(5); + + // Always force 1s sleep time for concurrent monitoring when duration is >= 5s. + cpuMonitorLoopSleepTime = TimeSpan.FromSeconds(1000); } + } - if (checkCpu || (MonitorResourceGovernanceLimits && repOrInst.RGCpuEnabled && rgCpuPercentThreshold > 0)) - { - double cpu = cpuUsage.GetCurrentCpuUsagePercentage(procId, IsWindows ? procName : null, procHandle); + Stopwatch timer = Stopwatch.StartNew(); - // Process id is no longer mapped to expected process name or some internal error occured that is non-retryable. Ignore this process. - // See CpuUsageProcess.cs/CpuUsageWin32.cs impls. - if (cpu == -1) + try + { +#if DEBUG + ObserverLogger.LogInfo($"ComputeResourceUsage: Entering CPU monitor while loop. MonitorDuration = {CpuMonitorDuration}. CpuMonitorLoopSleepDuration = {CpuMonitorLoopSleepDuration}."); +#endif + while (timer.Elapsed <= cpuMonitorDuration) + { + if (token.IsCancellationRequested) { - return; + state.Stop(); } - // CPU (all cores) \\ - if (checkCpu) + if (checkCpu || (MonitorResourceGovernanceLimits && repOrInst.RGCpuEnabled && rgCpuPercentThreshold > 0)) { - if (procId == parentPid) + double cpu = cpuUsage.GetCurrentCpuUsagePercentage(procId, IsWindows ? procName : null, procHandle); + + // Process id is no longer mapped to expected process name or some internal error occured that is non-retryable. Ignore this process. + // See CpuUsageProcess.cs/CpuUsageWin32.cs impls. + if (cpu == -1) { - AllAppCpuData[id].AddData(cpu); + return; } - else + + // CPU (all cores) \\ + if (checkCpu) { - // Add new child proc entry if not already present in dictionary. - _ = AllAppCpuData.TryAdd( - $"{id}:{procName}{procId}", - new FabricResourceUsageData( - ErrorWarningProperty.CpuTime, + if (procId == parentPid) + { + AllAppCpuData[id].AddData(cpu); + } + else + { + // Add new child proc entry if not already present in dictionary. + _ = AllAppCpuData.TryAdd( $"{id}:{procName}{procId}", - capacity, - UseCircularBuffer, - EnableConcurrentMonitoring)); + new FabricResourceUsageData( + ErrorWarningProperty.CpuTime, + $"{id}:{procName}{procId}", + capacity, + UseCircularBuffer, + EnableConcurrentMonitoring)); - AllAppCpuData[$"{id}:{procName}{procId}"].AddData(cpu); + AllAppCpuData[$"{id}:{procName}{procId}"].AddData(cpu); + } } - } - if (MonitorResourceGovernanceLimits && repOrInst.RGCpuEnabled && repOrInst.RGAppliedCpuLimitCores > 0 && rgCpuPercentThreshold > 0) - { - double pct = cpu * Environment.ProcessorCount / repOrInst.RGAppliedCpuLimitCores; - - if (procId == parentPid) - { - AllAppRGCpuUsagePercent[id].AddData(pct); - } - else + if (MonitorResourceGovernanceLimits && repOrInst.RGCpuEnabled && repOrInst.RGAppliedCpuLimitCores > 0 && rgCpuPercentThreshold > 0) { - // Add new child proc entry if not already present in dictionary. - _ = AllAppRGCpuUsagePercent.TryAdd( - $"{id}:{procName}{procId}", - new FabricResourceUsageData( - ErrorWarningProperty.RGCpuUsagePercent, + double pct = cpu * Environment.ProcessorCount / repOrInst.RGAppliedCpuLimitCores; + + if (procId == parentPid) + { + AllAppRGCpuUsagePercent[id].AddData(pct); + } + else + { + // Add new child proc entry if not already present in dictionary. + _ = AllAppRGCpuUsagePercent.TryAdd( $"{id}:{procName}{procId}", - capacity, - UseCircularBuffer, - EnableConcurrentMonitoring)); + new FabricResourceUsageData( + ErrorWarningProperty.RGCpuUsagePercent, + $"{id}:{procName}{procId}", + capacity, + UseCircularBuffer, + EnableConcurrentMonitoring)); - AllAppRGCpuUsagePercent[$"{id}:{procName}{procId}"].AddData(pct); + AllAppRGCpuUsagePercent[$"{id}:{procName}{procId}"].AddData(pct); + } } } - } - Thread.Sleep(sleep); - } - - if (IsWindows) + Thread.Sleep(cpuMonitorLoopSleepTime); + } +#if DEBUG + ObserverLogger.LogInfo($"ComputeResourceUsage: Exiting CPU monitoring while loop. Ran for {timer.Elapsed}."); +#endif + } + finally { - procHandle?.Dispose(); - procHandle = null; + if (IsWindows) + { + procHandle?.Dispose(); + procHandle = null; + } + + timer.Stop(); + timer = null; } - - timer.Stop(); - timer = null; }); } diff --git a/FabricObserver/Observers/AzureStorageUploadObserver.cs b/FabricObserver/Observers/AzureStorageUploadObserver.cs index 4a8fb0f0..449d2ecc 100644 --- a/FabricObserver/Observers/AzureStorageUploadObserver.cs +++ b/FabricObserver/Observers/AzureStorageUploadObserver.cs @@ -82,8 +82,9 @@ public override async Task ObserveAsync(CancellationToken token) } // If set, this observer will only run during the supplied interval. - if (RunInterval > TimeSpan.MinValue && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) + if (RunInterval > TimeSpan.Zero && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) { + ObserverLogger.LogInfo($"ObserveAsync: RunInterval ({RunInterval}) has not elapsed. Exiting."); return; } diff --git a/FabricObserver/Observers/CertificateObserver.cs b/FabricObserver/Observers/CertificateObserver.cs index 46a3bc93..1eedf6b1 100644 --- a/FabricObserver/Observers/CertificateObserver.cs +++ b/FabricObserver/Observers/CertificateObserver.cs @@ -86,8 +86,9 @@ public SecurityConfiguration SecurityConfiguration public override async Task ObserveAsync(CancellationToken token) { // Only run once per specified time in Settings.xml. (default is already set to 1 day for CertificateObserver) - if (RunInterval > TimeSpan.MinValue && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) + if (RunInterval > TimeSpan.Zero && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) { + ObserverLogger.LogInfo($"ObserveAsync: RunInterval ({RunInterval}) has not elapsed. Exiting."); return; } @@ -192,7 +193,7 @@ public override async Task ReportAsync(CancellationToken token) NodeName = NodeName, HealthMessage = "All cluster and monitored app certificates are healthy.", State = HealthState.Ok, - HealthReportTimeToLive = RunInterval > TimeSpan.MinValue ? RunInterval : HealthReportTimeToLive + HealthReportTimeToLive = RunInterval > TimeSpan.Zero ? RunInterval : HealthReportTimeToLive }; HasActiveFabricErrorOrWarning = false; @@ -231,7 +232,7 @@ public override async Task ReportAsync(CancellationToken token) NodeName = NodeName, HealthMessage = healthMessage, State = HealthState.Warning, - HealthReportTimeToLive = RunInterval > TimeSpan.MinValue ? RunInterval : HealthReportTimeToLive + HealthReportTimeToLive = RunInterval > TimeSpan.Zero ? RunInterval : HealthReportTimeToLive }; HasActiveFabricErrorOrWarning = true; diff --git a/FabricObserver/Observers/ContainerObserver.cs b/FabricObserver/Observers/ContainerObserver.cs index 9b305202..029ca6f3 100644 --- a/FabricObserver/Observers/ContainerObserver.cs +++ b/FabricObserver/Observers/ContainerObserver.cs @@ -62,8 +62,9 @@ public ContainerObserver(StatelessServiceContext context) : base(null, context) public override async Task ObserveAsync(CancellationToken token) { // If set, this observer will only run during the supplied interval. - if (RunInterval > TimeSpan.MinValue && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) + if (RunInterval > TimeSpan.Zero && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) { + ObserverLogger.LogInfo($"ObserveAsync: RunInterval ({RunInterval}) has not elapsed. Exiting."); return; } diff --git a/FabricObserver/Observers/DiskObserver.cs b/FabricObserver/Observers/DiskObserver.cs index a026209e..f0c84078 100644 --- a/FabricObserver/Observers/DiskObserver.cs +++ b/FabricObserver/Observers/DiskObserver.cs @@ -82,8 +82,9 @@ public DiskObserver(StatelessServiceContext context) : base(null, context) public override async Task ObserveAsync(CancellationToken token) { // If set, this observer will only run during the supplied interval. - if (RunInterval > TimeSpan.MinValue && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) + if (RunInterval > TimeSpan.Zero && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) { + ObserverLogger.LogInfo($"ObserveAsync: RunInterval ({RunInterval}) has not elapsed. Exiting."); return; } diff --git a/FabricObserver/Observers/FabricSystemObserver.cs b/FabricObserver/Observers/FabricSystemObserver.cs index 28c7546e..cdb4ec44 100644 --- a/FabricObserver/Observers/FabricSystemObserver.cs +++ b/FabricObserver/Observers/FabricSystemObserver.cs @@ -83,6 +83,7 @@ public FabricSystemObserver(StatelessServiceContext context) : base(null, contex "FabricDnsService", "FabricFAS", "FabricGateway", + "FabricImage", "FabricHost", "FabricIS", "FabricRM" @@ -183,8 +184,9 @@ public override async Task ObserveAsync(CancellationToken token) } // If set, this observer will only run during the supplied interval. - if (RunInterval > TimeSpan.MinValue && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) + if (RunInterval > TimeSpan.Zero && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) { + ObserverLogger.LogInfo($"ObserveAsync: RunInterval ({RunInterval}) has not elapsed. Exiting."); return; } @@ -355,7 +357,7 @@ public override Task ReportAsync(CancellationToken token) return Task.CompletedTask; } - // Windows Event Log + // OBSOLETE: Windows Event Log if (IsWindows && IsObserverWebApiAppDeployed && monitorWinEventLog) { // SF Eventlog Errors? @@ -588,9 +590,9 @@ private void Initialize() { frudCapacity = DataCapacity > 0 ? DataCapacity : 5; } - else if (MonitorDuration > TimeSpan.MinValue) + else if (CpuMonitorDuration > TimeSpan.Zero) { - frudCapacity = (int)MonitorDuration.TotalSeconds * 4; + frudCapacity = (int)CpuMonitorDuration.TotalSeconds * 4; } stopwatch ??= new Stopwatch(); @@ -1010,7 +1012,7 @@ private async Task GetProcessInfoAsync(string procName, CancellationToken token) // KVS LVIDs if (EnableKvsLvidMonitoring && (dotnetArg == "Fabric" || dotnetArg == "FabricRM")) { - double lvidPct = ProcessInfoProvider.Instance.GetProcessKvsLvidsUsagePercentage(dotnetArg, Token); + double lvidPct = ProcessInfoProvider.Instance.GetProcessKvsLvidsUsagePercentage(dotnetArg, token); // GetProcessKvsLvidsUsedPercentage internally handles exceptions and will always return -1 when it fails. if (lvidPct > -1) @@ -1025,7 +1027,7 @@ private async Task GetProcessInfoAsync(string procName, CancellationToken token) // Memory MB if (MemErrorUsageThresholdMb > 0 || MemWarnUsageThresholdMb > 0) { - float processMem = ProcessInfoProvider.Instance.GetProcessWorkingSetMb(procId, dotnetArg, Token, checkPrivateWorkingSet); + float processMem = ProcessInfoProvider.Instance.GetProcessWorkingSetMb(procId, dotnetArg, token, checkPrivateWorkingSet); if (allMemData.ContainsKey(dotnetArg)) { @@ -1041,33 +1043,26 @@ private async Task GetProcessInfoAsync(string procName, CancellationToken token) if (IsWindows) { cpuUsage = new CpuUsageWin32(); + procHandle = NativeMethods.GetSafeProcessHandle(procId); } else { cpuUsage = new CpuUsageProcess(); } - TimeSpan duration = TimeSpan.FromSeconds(1); - TimeSpan sleep = TimeSpan.FromMilliseconds(150); - - if (MonitorDuration > TimeSpan.MinValue) - { - duration = MonitorDuration; - } - - if (MonitorSleepDuration > TimeSpan.MinValue) - { - sleep = MonitorSleepDuration; - } - Stopwatch timer = Stopwatch.StartNew(); - - if (IsWindows) + TimeSpan cpuMonitorDuration = CpuMonitorDuration; + TimeSpan cpuMonitorLoopSleepTime = CpuMonitorLoopSleepDuration; + + // At least one value is needed to compute CPU Time % (in fact, more than one is best on Windows). If the user misconfigures sleep time to be greater than monitor duration, + // then we'll just set it to 1000 ms. + if (cpuMonitorLoopSleepTime > cpuMonitorDuration) { - procHandle = NativeMethods.GetSafeProcessHandle(procId); + // CpuMonitorDuration can't be set to less than 1 second. + cpuMonitorLoopSleepTime = TimeSpan.FromMilliseconds(1000); } - while (timer.Elapsed <= duration) + while (timer.Elapsed <= cpuMonitorDuration) { token.ThrowIfCancellationRequested(); @@ -1084,7 +1079,7 @@ private async Task GetProcessInfoAsync(string procName, CancellationToken token) } } - await Task.Delay(sleep, Token); + await Task.Delay(cpuMonitorLoopSleepTime, token); } catch (Exception e) when (e is not (OperationCanceledException or TaskCanceledException)) { diff --git a/FabricObserver/Observers/NetworkObserver.cs b/FabricObserver/Observers/NetworkObserver.cs index ad77913e..b9a52676 100644 --- a/FabricObserver/Observers/NetworkObserver.cs +++ b/FabricObserver/Observers/NetworkObserver.cs @@ -41,18 +41,15 @@ public sealed class NetworkObserver : ObserverBase TargetApp = "fabric:/test", Endpoints = new List { - new Endpoint - { + new() { HostName = "www.microsoft.com", Port = 443 }, - new Endpoint - { + new() { HostName = "www.facebook.com", Port = 443 }, - new Endpoint - { + new() { HostName = "www.google.com", Port = 443 } @@ -79,8 +76,9 @@ public NetworkObserver(StatelessServiceContext context) : base(null, context) public override async Task ObserveAsync(CancellationToken token) { // If set, this observer will only run during the supplied interval. - if (RunInterval > TimeSpan.MinValue && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) + if (RunInterval > TimeSpan.Zero && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) { + ObserverLogger.LogInfo($"ObserveAsync: RunInterval ({RunInterval}) has not elapsed. Exiting."); return; } diff --git a/FabricObserver/Observers/NodeObserver.cs b/FabricObserver/Observers/NodeObserver.cs index bfd4a0b3..d44587f0 100644 --- a/FabricObserver/Observers/NodeObserver.cs +++ b/FabricObserver/Observers/NodeObserver.cs @@ -144,8 +144,9 @@ public NodeObserver(StatelessServiceContext context) : base(null, context) public override async Task ObserveAsync(CancellationToken token) { // If set, this observer will only run during the supplied interval. - if (RunInterval > TimeSpan.MinValue && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) + if (RunInterval > TimeSpan.Zero && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) { + ObserverLogger.LogInfo($"ObserveAsync: RunInterval ({RunInterval}) has not elapsed. Exiting."); return; } @@ -428,9 +429,9 @@ private void InitializeDataContainers() { frudCapacity = DataCapacity > 0 ? DataCapacity : 4; } - else if (MonitorDuration > TimeSpan.MinValue) + else if (CpuMonitorDuration > TimeSpan.Zero) { - frudCapacity = (int)MonitorDuration.TotalSeconds * 4; + frudCapacity = (int)CpuMonitorDuration.TotalSeconds * 4; } if (CpuTimeData == null && (CpuErrorUsageThresholdPct > 0 || CpuWarningUsageThresholdPct > 0)) @@ -690,20 +691,7 @@ private async Task ComputeMachineResourceUsage(CancellationToken token) int firewalls = NetworkUsage.GetActiveFirewallRulesCount(); FirewallData.AddData(firewalls); } - - TimeSpan duration = TimeSpan.FromSeconds(10); - TimeSpan sleep = TimeSpan.FromMilliseconds(1000); - - if (MonitorDuration > TimeSpan.MinValue) - { - duration = MonitorDuration; - } - - if (MonitorSleepDuration > TimeSpan.MinValue) - { - sleep = MonitorSleepDuration; - } - + // OS-level file handle monitoring only makes sense for Linux, where the Maximum system-wide number of handles the kernel will allocate is a user-configurable setting. // Windows does not have a configurable setting for Max Handles as the number of handles available to the system is dynamic (even if the max per process is not). // As such, for Windows, GetMaximumConfiguredFileHandlesCount always return -1, by design. Also, GetTotalAllocatedFileHandlesCount is not implemented for Windows (just returns -1). @@ -793,28 +781,23 @@ private async Task ComputeMachineResourceUsage(CancellationToken token) } // No need to proceed. - if (CpuTimeData == null) + if (CpuTimeData == null || (CpuErrorUsageThresholdPct <= 0 && CpuWarningUsageThresholdPct <= 0)) { return; } // Warm up counter. _ = CpuUtilizationProvider.Instance.GetProcessorTimePercentage(); - await Task.Delay(sleep, Token); + await Task.Delay(CpuMonitorLoopSleepDuration, token); timer.Start(); - while (timer.Elapsed <= duration) + while (timer.Elapsed <= CpuMonitorDuration) { token.ThrowIfCancellationRequested(); - - // CPU - if (CpuErrorUsageThresholdPct > 0 || CpuWarningUsageThresholdPct > 0) - { - CpuTimeData.AddData(CpuUtilizationProvider.Instance.GetProcessorTimePercentage()); - } - - await Task.Delay(sleep, Token); + + CpuTimeData.AddData(CpuUtilizationProvider.Instance.GetProcessorTimePercentage()); + await Task.Delay(CpuMonitorLoopSleepDuration, token); } timer.Stop(); diff --git a/FabricObserver/Observers/OSObserver.cs b/FabricObserver/Observers/OSObserver.cs index e28c39ea..d2140f16 100644 --- a/FabricObserver/Observers/OSObserver.cs +++ b/FabricObserver/Observers/OSObserver.cs @@ -55,8 +55,9 @@ public OSObserver(StatelessServiceContext context) : base(null, context) public override async Task ObserveAsync(CancellationToken token) { // If set, this observer will only run during the supplied interval. - if (RunInterval > TimeSpan.MinValue && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) + if (RunInterval > TimeSpan.Zero && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) { + ObserverLogger.LogInfo($"ObserveAsync: RunInterval ({RunInterval}) has not elapsed. Exiting."); return; } diff --git a/FabricObserver/Observers/ObserverManager.cs b/FabricObserver/Observers/ObserverManager.cs index 56a8019e..7ccd4a2f 100644 --- a/FabricObserver/Observers/ObserverManager.cs +++ b/FabricObserver/Observers/ObserverManager.cs @@ -23,6 +23,7 @@ using System.Runtime; using FabricObserver.Utilities.ServiceFabric; using ConfigurationSettings = System.Fabric.Description.ConfigurationSettings; +using Microsoft.VisualBasic; namespace FabricObserver.Observers { @@ -46,7 +47,6 @@ private List Observers private readonly string sfVersion; private readonly bool isWindows; private readonly ConfigurationPackage configurationPackage; - private System.Fabric.Description.ConfigurationSection configurationSection; private volatile bool shutdownSignaled; private DateTime StartDateTime; private bool isConfigurationUpdateInProgress; @@ -54,7 +54,7 @@ private List Observers private CancellationTokenSource linkedSFRuntimeObserverTokenSource; // Folks often use their own version numbers. This is for internal diagnostic telemetry. - private const string InternalVersionNumber = "3.2.13"; + private const string InternalVersionNumber = "3.2.14"; private static FabricClient FabricClientInstance => FabricClientUtilities.FabricClientSingleton; @@ -91,11 +91,6 @@ private TimeSpan ObserverExecutionTimeout get; set; } = TimeSpan.FromMinutes(30); - private int MaxArchivedLogFileLifetimeDays - { - get; - } - private DateTime LastTelemetrySendDate { get; set; @@ -111,7 +106,7 @@ public static StatelessServiceContext FabricServiceContext get; set; } - public static bool TelemetryEnabled + public static bool TelemetryProviderEnabled { get; set; } @@ -163,7 +158,6 @@ public ObserverManager(IServiceProvider serviceProvider, CancellationToken token isWindows = OperatingSystem.IsWindows(); sfVersion = GetServiceFabricRuntimeVersion(); configurationPackage = FabricServiceContext.CodePackageActivationContext.GetConfigurationPackageObject("Config"); - configurationSection = configurationPackage.Settings.Sections[ObserverConstants.ObserverManagerConfigurationSectionName]; // Observer Logger setup. string logFolderBasePath; @@ -179,12 +173,17 @@ public ObserverManager(IServiceProvider serviceProvider, CancellationToken token logFolderBasePath = logFolderBase; } - if (int.TryParse(GetConfigSettingValue(ObserverConstants.MaxArchivedLogFileLifetimeDaysParameter, null), out int maxArchivedLogFileLifetimeDays)) + _ = int.TryParse(GetConfigSettingValue(ObserverConstants.MaxArchivedLogFileLifetimeDaysParameter, null), out int maxArchivedLogFileLifetimeDays); + _ = bool.TryParse(GetConfigSettingValue(ObserverConstants.EnableETWProvider, null), out bool enableEtwProvider); + _ = bool.TryParse(GetConfigSettingValue(ObserverConstants.EnableVerboseLoggingParameter, null), out bool enableVerboseLogging); + EtwEnabled = enableEtwProvider; + + Logger = new Logger("ObserverManager", logFolderBasePath, maxArchivedLogFileLifetimeDays) { - MaxArchivedLogFileLifetimeDays = maxArchivedLogFileLifetimeDays; - } + EnableETWLogging = enableEtwProvider, + EnableVerboseLogging = enableVerboseLogging + }; - Logger = new Logger("ObserverManager", logFolderBasePath, MaxArchivedLogFileLifetimeDays); SetPropertiesFromConfigurationParameters(); Observers = serviceProvider.GetServices().ToList(); HealthReporter = new ObserverHealthReporter(Logger); @@ -196,11 +195,16 @@ private string GetServiceFabricRuntimeVersion() try { var config = ServiceFabricConfiguration.Instance; - return config.FabricVersion; + return config.FabricVersion.Trim(); } - catch (Exception e) when (e is not (OperationCanceledException or TaskCanceledException)) + catch (Exception e) when (e is not OutOfMemoryException) { - Logger.LogWarning($"GetServiceFabricRuntimeVersion failure:{Environment.NewLine}{e.Message}"); + if (e is TaskCanceledException or OperationCanceledException) + { + throw; + } + + Logger.LogWarning($"GetServiceFabricRuntimeVersion failure: {e.Message}"); } return null; @@ -228,64 +232,71 @@ public async Task StartObserversAsync() // Observers run sequentially. See RunObservers impl. while (true) { - if (!isConfigurationUpdateInProgress && (shutdownSignaled || runAsyncToken.IsCancellationRequested)) + try { - await ShutDownAsync(); - break; - } + if (!isConfigurationUpdateInProgress && (shutdownSignaled || runAsyncToken.IsCancellationRequested)) + { + await ShutDownAsync(); + break; + } - await RunObserversAsync(); + await RunObserversAsync(); - // Identity-agnostic internal operational telemetry sent to Service Fabric team (only) for use in - // understanding generic behavior of FH in the real world (no PII). This data is sent once a day and will be retained for no more - // than 90 days. - if (FabricObserverOperationalTelemetryEnabled && !(shutdownSignaled || runAsyncToken.IsCancellationRequested) - && DateTime.UtcNow.Subtract(LastTelemetrySendDate) >= OperationalTelemetryRunInterval) - { - try + // Identity-agnostic internal operational telemetry sent to Service Fabric team (only) for use in + // understanding generic behavior of FH in the real world (no PII). This data is sent once a day and will be retained for no more + // than 90 days. + if (FabricObserverOperationalTelemetryEnabled && !(shutdownSignaled || runAsyncToken.IsCancellationRequested) + && DateTime.UtcNow.Subtract(LastTelemetrySendDate) >= OperationalTelemetryRunInterval) { - using var telemetryEvents = new TelemetryEvents(nodeName); - var foData = GetFabricObserverInternalTelemetryData(); - - if (foData != null) + try { - string filepath = Path.Combine(Logger.LogFolderBasePath, $"fo_operational_telemetry.log"); + using var telemetryEvents = new TelemetryEvents(nodeName); + var foData = GetFabricObserverInternalTelemetryData(); - if (telemetryEvents.EmitFabricObserverOperationalEvent(foData, OperationalTelemetryRunInterval, filepath)) + if (foData != null) { - LastTelemetrySendDate = DateTime.UtcNow; - ResetInternalErrorWarningDataCounters(); + string filepath = Path.Combine(Logger.LogFolderBasePath, $"fo_operational_telemetry.log"); + + if (telemetryEvents.EmitFabricObserverOperationalEvent(foData, OperationalTelemetryRunInterval, filepath)) + { + LastTelemetrySendDate = DateTime.UtcNow; + ResetInternalErrorWarningDataCounters(); + } } } + catch (Exception ex) when (ex is not OutOfMemoryException) + { + // Telemetry is non-critical and should *not* take down FO. + Logger.LogWarning($"Unable to send internal diagnostic telemetry: {ex.Message}"); + } } - catch (Exception ex) when (ex is not OutOfMemoryException) + + // Check for new version once a day. + if (!(shutdownSignaled || runAsyncToken.IsCancellationRequested) && DateTime.UtcNow.Subtract(LastVersionCheckDateTime) >= OperationalTelemetryRunInterval) { - // Telemetry is non-critical and should *not* take down FO. - Logger.LogWarning($"Unable to send internal diagnostic telemetry: {ex.Message}"); + await CheckGithubForNewVersionAsync(); + LastVersionCheckDateTime = DateTime.UtcNow; } - } - // Check for new version once a day. - if (!(shutdownSignaled || runAsyncToken.IsCancellationRequested) && DateTime.UtcNow.Subtract(LastVersionCheckDateTime) >= OperationalTelemetryRunInterval) - { - await CheckGithubForNewVersionAsync(); - LastVersionCheckDateTime = DateTime.UtcNow; - } + // Time to tale a nap before running observers again. 30 seconds is the minimum sleep time. + if (ObserverExecutionLoopSleepSeconds >= 30) + { + await Task.Delay(TimeSpan.FromSeconds(ObserverExecutionLoopSleepSeconds), runAsyncToken); + } + else + { + // Prevent loop spinning. Let threads drain (in the case of AppObserver monitoring with concurrent Tasks). Be conservative here. + await Task.Delay(TimeSpan.FromSeconds(30), runAsyncToken); + } - if (ObserverExecutionLoopSleepSeconds > 0) - { - await Task.Delay(TimeSpan.FromSeconds(ObserverExecutionLoopSleepSeconds), runAsyncToken); + // All observers have run at this point. Try and empty the trash now. + GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce; + GC.Collect(2, GCCollectionMode.Forced, true, true); } - else if (Observers.Count == 1) + catch (Exception e) when (e is FabricException or TimeoutException) { - // This protects against loop spinning when you run FO with one observer enabled and no sleep time set. - await Task.Delay(TimeSpan.FromSeconds(5), runAsyncToken); - } - // All observers have run at this point. Try and empty the trash now. - GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce; - GC.Collect(2, GCCollectionMode.Forced, true, true); - await Task.Delay(TimeSpan.FromSeconds(5), runAsyncToken); + } } } catch (Exception e) when (e is OperationCanceledException or TaskCanceledException) @@ -308,7 +319,7 @@ public async Task StartObserversAsync() await ShutDownAsync(); // Telemetry. - if (TelemetryEnabled) + if (TelemetryProviderEnabled) { var telemetryData = new NodeTelemetryData() { @@ -319,23 +330,23 @@ public async Task StartObserversAsync() Source = ObserverConstants.ObserverManagerName }; - await TelemetryClient.ReportHealthAsync(telemetryData, runAsyncToken); + await TelemetryClient.ReportHealthAsync(telemetryData, CancellationToken.None); } // ETW. if (EtwEnabled) { Logger.LogEtw( - ObserverConstants.FabricObserverETWEventName, - new - { - Description = message, - HealthState = "Error", - Metric = $"{ObserverConstants.FabricObserverName}_ServiceHealth", - NodeName = nodeName, - ObserverName = ObserverConstants.ObserverManagerName, - Source = ObserverConstants.FabricObserverName - }); + ObserverConstants.FabricObserverETWEventName, + new + { + Description = message, + HealthState = "Error", + Metric = $"{ObserverConstants.FabricObserverName}_ServiceHealth", + NodeName = nodeName, + ObserverName = ObserverConstants.ObserverManagerName, + Source = ObserverConstants.FabricObserverName + }); } // Operational telemetry sent to FO developer for use in understanding generic behavior of FO in the real world (no PII). @@ -440,7 +451,7 @@ public async Task ClearHealthReportsAsync(string configUpdateLinux) var healthReporter = new ObserverHealthReporter(Logger); healthReporter.ReportHealthToServiceFabric(healthReport); } - catch (FabricException) + catch (Exception e) when (e is FabricException or TimeoutException) { } @@ -478,7 +489,7 @@ public async Task ClearHealthReportsAsync(string configUpdateLinux) var healthReporter = new ObserverHealthReporter(Logger); healthReporter.ReportHealthToServiceFabric(healthReport); } - catch (FabricException) + catch (Exception e) when (e is FabricException or TimeoutException) { } @@ -506,7 +517,7 @@ public async Task ClearHealthReportsAsync(string configUpdateLinux) var healthReporter = new ObserverHealthReporter(Logger); healthReporter.ReportHealthToServiceFabric(healthReport); } - catch (FabricException) + catch (Exception e) when (e is FabricException or TimeoutException) { } @@ -544,7 +555,7 @@ public async Task ClearHealthReportsAsync(string configUpdateLinux) var healthReporter = new ObserverHealthReporter(Logger); healthReporter.ReportHealthToServiceFabric(healthReport); } - catch (FabricException) + catch (Exception e) when (e is FabricException or TimeoutException) { } @@ -564,29 +575,36 @@ public async Task ClearHealthReportsAsync(string configUpdateLinux) observer.ObserverName == ObserverConstants.NodeObserverName || observer.ObserverName == ObserverConstants.OSObserverName) { - var nodeHealth = await FabricClientInstance.HealthManager.GetNodeHealthAsync(observer.NodeName); - var fabricObserverNodeHealthEvents = nodeHealth.HealthEvents?.Where(s => s.HealthInformation.SourceId.Contains(observer.ObserverName)); - - if (fabricObserverNodeHealthEvents != null && fabricObserverNodeHealthEvents.Any()) + try { - healthReport.EntityType = EntityType.Machine; + var nodeHealth = await FabricClientInstance.HealthManager.GetNodeHealthAsync(observer.NodeName); + var fabricObserverNodeHealthEvents = nodeHealth.HealthEvents?.Where(s => s.HealthInformation.SourceId.Contains(observer.ObserverName)); - foreach (var evt in fabricObserverNodeHealthEvents) + if (fabricObserverNodeHealthEvents != null && fabricObserverNodeHealthEvents.Any()) { - try - { - healthReport.Property = evt.HealthInformation.Property; - healthReport.SourceId = evt.HealthInformation.SourceId; + healthReport.EntityType = EntityType.Machine; - var healthReporter = new ObserverHealthReporter(Logger); - healthReporter.ReportHealthToServiceFabric(healthReport); - } - catch (FabricException) + foreach (var evt in fabricObserverNodeHealthEvents) { + try + { + healthReport.Property = evt.HealthInformation.Property; + healthReport.SourceId = evt.HealthInformation.SourceId; + var healthReporter = new ObserverHealthReporter(Logger); + healthReporter.ReportHealthToServiceFabric(healthReport); + } + catch (Exception e) when (e is FabricException or TimeoutException) + { + + } } } } + catch (Exception e) when (e is FabricException or TimeoutException) + { + + } } // Reset warning/error states. @@ -630,7 +648,7 @@ private async Task RemoveObserverManagerHealthReportsAsync() var healthReporter = new ObserverHealthReporter(Logger); healthReporter.ReportHealthToServiceFabric(healthReport); } - catch (FabricException) + catch (Exception e) when (e is FabricException or TimeoutException) { } @@ -656,7 +674,7 @@ private async Task RemoveObserverManagerHealthReportsAsync() var healthReporter = new ObserverHealthReporter(Logger); healthReporter.ReportHealthToServiceFabric(healthReport); } - catch (FabricException) + catch (Exception e) when (e is FabricException or TimeoutException) { } @@ -883,10 +901,10 @@ ObserverConstants.NetworkObserverName or /// Contains the information necessary for setting new config params from updated package. private async void CodePackageActivationContext_ConfigurationPackageModifiedEvent(object sender, PackageModifiedEventArgs e) { - Logger.LogWarning("Application Parameter upgrade started..."); - try { + Logger.LogWarning("Application Parameter upgrade started..."); + // For Linux, we need to restart the FO process due to the Linux Capabilities impl that enables us to run docker and netstat commands as elevated user (FO Linux should always be run as standard user on Linux). // During an upgrade event, SF touches the cap binaries which removes the cap settings so we need to run the FO app setup script again to reset them. if (!isWindows) @@ -902,20 +920,25 @@ private async void CodePackageActivationContext_ConfigurationPackageModifiedEven await StopObserversAsync(false).ConfigureAwait(false); var newSettings = e.NewPackage.Settings; + // ObserverManager settings. + SetPropertiesFromConfigurationParameters(newSettings); + // Observer settings. foreach (var observer in Observers) { string configSectionName = observer.ConfigurationSettings.ConfigSection.Name; observer.ConfigPackage = e.NewPackage; observer.ConfigurationSettings = new ConfigSettings(newSettings, configSectionName); - observer.ObserverLogger.EnableVerboseLogging = observer.ConfigurationSettings.EnableVerboseLogging; - + observer.InitializeObserverLoggingInfra(isConfigUpdate: true); + // Reset last run time so the observer restarts (if enabled) after the app parameter update completes. observer.LastRunDateTime = DateTime.MinValue; } - // ObserverManager settings. - SetPropertiesFromConfigurationParameters(newSettings); + // Refresh FO CancellationTokenSources. + cts = new CancellationTokenSource(); + linkedSFRuntimeObserverTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, runAsyncToken); + Logger.LogWarning("Application Parameter upgrade completed..."); } catch (Exception ex) when (ex is not OutOfMemoryException) { @@ -927,20 +950,16 @@ private async void CodePackageActivationContext_ConfigurationPackageModifiedEven HealthMessage = $"Error updating FabricObserver with new configuration settings:{Environment.NewLine}{ex}", NodeName = FabricServiceContext.NodeContext.NodeName, State = HealthState.Ok, - Property = "Configuration_Upate_Error", + Property = "FO_Configuration_Upate_Error", EmitLogEvent = true }; HealthReporter.ReportHealthToServiceFabric(healthReport); } - - // Refresh FO CancellationTokenSources. - cts?.Dispose(); - linkedSFRuntimeObserverTokenSource?.Dispose(); - cts = new CancellationTokenSource(); - linkedSFRuntimeObserverTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, runAsyncToken); - Logger.LogWarning("Application Parameter upgrade completed..."); - isConfigurationUpdateInProgress = false; + finally + { + isConfigurationUpdateInProgress = false; + } } /// @@ -957,32 +976,44 @@ private void SetPropertiesFromConfigurationParameters(ConfigurationSettings sett IsLvidCounterEnabled = IsLVIDPerfCounterEnabled(settings); } - // ETW. - if (bool.TryParse(GetConfigSettingValue(ObserverConstants.EnableETWProvider, settings), out bool etwEnabled)) - { - EtwEnabled = etwEnabled; - - if (Logger != null) - { - Logger.EnableETWLogging = etwEnabled; - } - } - // Maximum time, in seconds, that an observer can run - Override. if (int.TryParse(GetConfigSettingValue(ObserverConstants.ObserverExecutionTimeout, settings), out int timeoutSeconds)) { ObserverExecutionTimeout = TimeSpan.FromSeconds(timeoutSeconds); } - // ObserverManager verbose logging - Override. - if (bool.TryParse(GetConfigSettingValue(ObserverConstants.EnableVerboseLoggingParameter, settings), out bool enableVerboseLogging)) + // Logger settings - Overrides. Config update. \\ + + // settings are not null if this is running due to a config update. Could also check for isConfigurationUpdateInProgress. + if (settings != null && Logger != null) { - if (Logger != null) + // ObserverManager logger EnableETWLogging - Override. + _ = bool.TryParse(GetConfigSettingValue(ObserverConstants.EnableETWProvider, settings), out bool enableEtwProvider); + EtwEnabled = enableEtwProvider; + Logger.EnableETWLogging = enableEtwProvider; + + // ObserverManager logger EnableVerboseLogging - Override. + _ = bool.TryParse(GetConfigSettingValue(ObserverConstants.EnableVerboseLoggingParameter, settings), out bool enableVerboseLogging); + Logger.EnableVerboseLogging = enableVerboseLogging; + + // ObserverManager/Observer logger MaxArchiveLifetimeDays - Override. + _ = int.TryParse(GetConfigSettingValue(ObserverConstants.EnableVerboseLoggingParameter, settings), out int maxArchiveLifetimeDays); + Logger.MaxArchiveFileLifetimeDays = maxArchiveLifetimeDays > 0 ? maxArchiveLifetimeDays : 7; + + // ObserverManager/Observer logger ObserverLogPath - Override. + string loggerBasePath = GetConfigSettingValue(ObserverConstants.ObserverLogPathParameter, settings); + + if (!string.IsNullOrWhiteSpace(loggerBasePath)) { - Logger.EnableVerboseLogging = enableVerboseLogging; + Logger.LogFolderBasePath = loggerBasePath; } + + // This will reset existing logger instance's config state and employ updated settings immediately. See Logger.cs. + Logger.InitializeLoggers(true); } + // End Logger settings - Overrides. \\ + if (int.TryParse(GetConfigSettingValue(ObserverConstants.ObserverLoopSleepTimeSeconds, settings), out int execFrequency)) { ObserverExecutionLoopSleepSeconds = execFrequency; @@ -992,7 +1023,7 @@ private void SetPropertiesFromConfigurationParameters(ConfigurationSettings sett // This only makes sense when you have the FabricObserverWebApi app installed. string fqdn = GetConfigSettingValue(ObserverConstants.Fqdn, settings); - if (!string.IsNullOrEmpty(fqdn)) + if (!string.IsNullOrWhiteSpace(fqdn)) { Fqdn = fqdn; } @@ -1020,12 +1051,12 @@ private void SetPropertiesFromConfigurationParameters(ConfigurationSettings sett } // Telemetry (AppInsights, LogAnalytics, etc) - Override - if (bool.TryParse(GetConfigSettingValue(ObserverConstants.TelemetryEnabled, settings), out bool telemEnabled)) + if (bool.TryParse(GetConfigSettingValue(ObserverConstants.TelemetryProviderEnabled, settings), out bool telemProviderEnabled)) { - TelemetryEnabled = telemEnabled; + TelemetryProviderEnabled = telemProviderEnabled; } - if (!TelemetryEnabled) + if (!TelemetryProviderEnabled) { return; } @@ -1034,13 +1065,13 @@ private void SetPropertiesFromConfigurationParameters(ConfigurationSettings sett if (string.IsNullOrEmpty(telemetryProviderType)) { - TelemetryEnabled = false; + TelemetryProviderEnabled = false; return; } if (!Enum.TryParse(telemetryProviderType, out TelemetryProviderType telemetryProvider)) { - TelemetryEnabled = false; + TelemetryProviderEnabled = false; return; } @@ -1054,32 +1085,29 @@ private void SetPropertiesFromConfigurationParameters(ConfigurationSettings sett if (string.IsNullOrEmpty(logAnalyticsWorkspaceId) || string.IsNullOrEmpty(logAnalyticsSharedKey)) { - TelemetryEnabled = false; + TelemetryProviderEnabled = false; return; } - TelemetryClient = new LogAnalyticsTelemetry( - logAnalyticsWorkspaceId, - logAnalyticsSharedKey, - logAnalyticsLogType); + TelemetryClient = new LogAnalyticsTelemetry(logAnalyticsWorkspaceId, logAnalyticsSharedKey, logAnalyticsLogType); break; case TelemetryProviderType.AzureApplicationInsights: - string aiKey = GetConfigSettingValue(ObserverConstants.AiKey, settings); + string aiConnString = GetConfigSettingValue(ObserverConstants.AppInsightsConnectionString, settings); - if (string.IsNullOrEmpty(aiKey)) + if (string.IsNullOrEmpty(aiConnString)) { - TelemetryEnabled = false; + TelemetryProviderEnabled = false; return; } - TelemetryClient = new AppInsightsTelemetry(aiKey); + TelemetryClient = new AppInsightsTelemetry(aiConnString); break; default: - TelemetryEnabled = false; + TelemetryProviderEnabled = false; break; } } @@ -1159,7 +1187,7 @@ private async Task RunObserversAsync() linkedSFRuntimeObserverTokenSource = CancellationTokenSource.CreateLinkedTokenSource(cts.Token, runAsyncToken); // Telemetry. - if (TelemetryEnabled) + if (TelemetryProviderEnabled) { var telemetryData = new NodeTelemetryData() { @@ -1359,7 +1387,7 @@ private async Task CheckGithubForNewVersionAsync() }; // Telemetry. - if (TelemetryEnabled) + if (TelemetryProviderEnabled) { await TelemetryClient?.ReportHealthAsync(telemetryData, runAsyncToken); } @@ -1383,7 +1411,7 @@ private async Task CheckGithubForNewVersionAsync() private bool IsLVIDPerfCounterEnabled(ConfigurationSettings settings = null) { - if (!isWindows /*|| ServiceFabricConfiguration.Instance.FabricVersion.StartsWith("10")*/) + if (!isWindows) { return false; } @@ -1392,7 +1420,7 @@ private bool IsLVIDPerfCounterEnabled(ConfigurationSettings settings = null) if (IsLvidCounterEnabled) { // DEBUG - Logger.LogInfo("IsLVIDPerfCounterEnabled: Counter has already been determined to be enabled. Not running the check again.."); + Logger.LogInfo("IsLVIDPerfCounterEnabled: Counter has already been determined to be enabled. Not running the check again."); return true; } @@ -1412,15 +1440,17 @@ private bool IsLVIDPerfCounterEnabled(ConfigurationSettings settings = null) } // DEBUG - Logger.LogInfo("IsLVIDPerfCounterEnabled: Running check since a supported observer is enabled for LVID monitoring."); + Logger.LogInfo($"IsLVIDPerfCounterEnabled: Running check since a supported observer is enabled for LVID monitoring. Detected Fabric version is {sfVersion}"); string categoryName = "Windows Fabric Database"; - - if (sfVersion.StartsWith("1")) - { + if (sfVersion.StartsWith('1')) + { categoryName = "MSExchange Database"; } + // DEBUG + Logger.LogInfo($"IsLVIDPerfCounterEnabled: using '{categoryName}' for CategoryName."); + // If there is corrupted state on the machine with respect to performance counters, an AV can occur (in native code, then wrapped in AccessViolationException) // when calling PerformanceCounterCategory.Exists below. This is actually a symptom of a problem that extends beyond just this counter category.. // *Do not catch AV exception*. FO will crash, of course, but that is safer than pretending nothing is wrong. @@ -1432,7 +1462,7 @@ private bool IsLVIDPerfCounterEnabled(ConfigurationSettings settings = null) } catch (Exception e) when (e is ArgumentException or InvalidOperationException or UnauthorizedAccessException or Win32Exception) { - Logger.LogWarning($"IsLVIDPerfCounterEnabled: Failed to determine LVID perf counter state: {e.Message}"); + Logger.LogWarning($"IsLVIDPerfCounterEnabled: Failed to determine LVID perf counter state: {e.Message}. Detected SF Runtime Version: {sfVersion}"); } return false; diff --git a/FabricObserver/Observers/SFConfigurationObserver.cs b/FabricObserver/Observers/SFConfigurationObserver.cs index 0c2f49c2..4fb5ca45 100644 --- a/FabricObserver/Observers/SFConfigurationObserver.cs +++ b/FabricObserver/Observers/SFConfigurationObserver.cs @@ -54,8 +54,9 @@ public SFConfigurationObserver(StatelessServiceContext context) : base(null, con public override async Task ObserveAsync(CancellationToken token) { - if (!IsObserverWebApiAppDeployed || (RunInterval > TimeSpan.MinValue && DateTime.Now.Subtract(LastRunDateTime) < RunInterval)) + if (!IsObserverWebApiAppDeployed || (RunInterval > TimeSpan.Zero && DateTime.Now.Subtract(LastRunDateTime) < RunInterval)) { + ObserverLogger.LogInfo($"ObserveAsync: RunInterval ({RunInterval}) has not elapsed. Exiting."); return; } diff --git a/FabricObserver/PackageRoot/Config/Settings.xml b/FabricObserver/PackageRoot/Config/Settings.xml index a324a12c..52c01e9a 100644 --- a/FabricObserver/PackageRoot/Config/Settings.xml +++ b/FabricObserver/PackageRoot/Config/Settings.xml @@ -8,7 +8,7 @@ - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - -
diff --git a/FabricObserver/PackageRoot/Data/Plugins/Readme.txt b/FabricObserver/PackageRoot/Data/Plugins/Readme.txt index fad8a443..e61ec389 100644 --- a/FabricObserver/PackageRoot/Data/Plugins/Readme.txt +++ b/FabricObserver/PackageRoot/Data/Plugins/Readme.txt @@ -7,8 +7,8 @@ Note that the observer API lives in its own library, FabricObserver.Extensibilit 1. Create a new .NET 6 Library project. 2. Install the same version of the Microsoft.ServiceFabricApps.FabricObserver.Extensibility nupkg from https://www.nuget.org/profiles/ServiceFabricApps as the version of FabricObserver you are deploying. - E.g., 3.2.13 if you are going to deploy FO 3.2.13. - NOTE: You can also consume the entire FabricObserver 3.2.13 nupkg to build your plugin. Please see the SampleObserverPlugin project's csproj file for more information. + E.g., 3.2.14 if you are going to deploy FO 3.2.14. + NOTE: You can also consume the entire FabricObserver 3.2.14 nupkg to build your plugin. Please see the SampleObserverPlugin project's csproj file for more information. 3. Write an observer! @@ -68,5 +68,5 @@ cd C:\Users\me\source\repos\service-fabric-observer ./Build-FabricObserver ./Build-NugetPackages -The output from the above commands contains FabricObserver platform-specific nupkgs and a package you have to use for plugin authoring named Microsoft.ServiceFabricApps.FabricObserver.Extensibility.3.2.13.nupkg. Nupkg files from above command would be located in +The output from the above commands contains FabricObserver platform-specific nupkgs and a package you have to use for plugin authoring named Microsoft.ServiceFabricApps.FabricObserver.Extensibility.3.2.14.nupkg. Nupkg files from above command would be located in C:\Users\me\source\repos\service-fabric-observer\bin\release\FabricObserver\Nugets. \ No newline at end of file diff --git a/FabricObserver/PackageRoot/ServiceManifest.xml b/FabricObserver/PackageRoot/ServiceManifest.xml index e7646843..6f62098b 100644 --- a/FabricObserver/PackageRoot/ServiceManifest.xml +++ b/FabricObserver/PackageRoot/ServiceManifest.xml @@ -1,6 +1,6 @@  @@ -9,7 +9,7 @@ This name must match the string used in RegisterServiceType call in Program.cs. --> - + install_lvid_perfcounter.bat @@ -25,10 +25,10 @@ - + - + \ No newline at end of file diff --git a/FabricObserver/PackageRoot/ServiceManifest_linux.xml b/FabricObserver/PackageRoot/ServiceManifest_linux.xml index 26226e2a..445d1db7 100644 --- a/FabricObserver/PackageRoot/ServiceManifest_linux.xml +++ b/FabricObserver/PackageRoot/ServiceManifest_linux.xml @@ -1,6 +1,6 @@  @@ -11,7 +11,7 @@ - + setcaps.sh @@ -27,10 +27,10 @@ - + - + \ No newline at end of file diff --git a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml index e42477a5..ed202917 100644 --- a/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml +++ b/FabricObserverApp/ApplicationPackageRoot/ApplicationManifest.xml @@ -1,30 +1,39 @@  - + - + + + + + + + + - + + - - - - - - + + + + + + + + + @@ -35,9 +44,9 @@ - + - + @@ -48,9 +57,9 @@ - - + @@ -60,12 +69,12 @@ - + - + @@ -76,17 +85,21 @@ - - - - - + + + + + - + @@ -169,7 +182,7 @@ - + @@ -236,7 +249,7 @@ should match the Name and Version attributes of the ServiceManifest element defined in the ServiceManifest.xml file. --> - + @@ -419,6 +432,13 @@ + + + + + + + @@ -440,4 +460,4 @@ - + \ No newline at end of file diff --git a/FabricObserverTests/ObserverTests.cs b/FabricObserverTests/ObserverTests.cs index 9ab5f647..071ed74a 100644 --- a/FabricObserverTests/ObserverTests.cs +++ b/FabricObserverTests/ObserverTests.cs @@ -101,6 +101,9 @@ public static async Task TestClassStartUp(TestContext testContext) Guid.NewGuid(), long.MaxValue); + // Ensure clean test app state. + await RemoveTestApplicationsAsync(); + // Install required SF test applications. await DeployTestAppsAppsAsync(); } @@ -112,6 +115,9 @@ private static async Task DeployTestAppsAppsAsync() await DeployVotingAppAsync(); await DeployCpuStressAppAsync(); await DeployPortTestAppAsync(); + + // Wait a little extra time for apps to be fully in ready state. + await Task.Delay(5000); } [ClassCleanup] @@ -207,20 +213,6 @@ private static ConfigurationPackage BuildConfigurationPackageFromSettingsFile(st private static async Task DeployHealthMetricsAppAsync() { string appName = "fabric:/HealthMetrics"; - - // If fabric:/HealthMetrics is already installed, exit. - var deployedTestApp = - await FabricClientSingleton.QueryManager.GetDeployedApplicationListAsync( - NodeName, - new Uri(appName), - TimeSpan.FromSeconds(30), - Token); - - if (deployedTestApp?.Count > 0) - { - return; - } - string appType = "HealthMetricsType"; string appVersion = "1.0.0.0"; string serviceName1 = "fabric:/HealthMetrics/BandActorService"; @@ -302,20 +294,6 @@ await FabricClientSingleton.QueryManager.GetDeployedApplicationListAsync( private static async Task DeployTestApp42Async() { string appName = "fabric:/TestApp42"; - - // If fabric:/TestApp42 is already installed, exit. - var deployedTestApp = - await FabricClientSingleton.QueryManager.GetDeployedApplicationListAsync( - NodeName, - new Uri(appName), - TimeSpan.FromSeconds(30), - Token); - - if (deployedTestApp?.Count > 0) - { - return; - } - string appType = "TestApp42Type"; string appVersion = "1.0.0"; @@ -367,20 +345,6 @@ await FabricClientSingleton.QueryManager.GetDeployedApplicationListAsync( private static async Task DeployVotingAppAsync() { string appName = "fabric:/Voting"; - - // If fabric:/Voting is already installed, exit. - var deployedTestApp = - await FabricClientSingleton.QueryManager.GetDeployedApplicationListAsync( - NodeName, - new Uri(appName), - TimeSpan.FromSeconds(30), - Token); - - if (deployedTestApp?.Count > 0) - { - return; - } - string appType = "VotingType"; string appVersion = "1.0.0"; @@ -432,20 +396,6 @@ await FabricClientSingleton.QueryManager.GetDeployedApplicationListAsync( private static async Task DeployCpuStressAppAsync() { string appName = "fabric:/CpuStress"; - - // If fabric:/Voting is already installed, exit. - var deployedTestApp = - await FabricClientSingleton.QueryManager.GetDeployedApplicationListAsync( - NodeName, - new Uri(appName), - TimeSpan.FromSeconds(30), - Token); - - if (deployedTestApp?.Count > 0) - { - return; - } - string appType = "CpuStressType"; string appVersion = "1.0.0"; @@ -497,20 +447,6 @@ await FabricClientSingleton.QueryManager.GetDeployedApplicationListAsync( private static async Task DeployPortTestAppAsync() { string appName = "fabric:/PortTest"; - - // If fabric:/Voting is already installed, exit. - var deployedTestApp = - await FabricClientSingleton.QueryManager.GetDeployedApplicationListAsync( - NodeName, - new Uri(appName), - TimeSpan.FromSeconds(30), - Token); - - if (deployedTestApp?.Count > 0) - { - return; - } - string appType = "PortTestType"; string appVersion = "1.0.0"; @@ -761,6 +697,8 @@ private static async Task RemoveTestApplicationsAsync() // Un-provision the application type. await FabricClientSingleton.ApplicationManager.UnprovisionApplicationAsync(appType, appVersion); } + + await Task.Delay(5000); } private static async Task EnsureTestServicesExistAsync(string appName, int numServices = 0) @@ -786,7 +724,7 @@ private static async Task EnsureTestServicesExistAsync(string appName, int public void AppObserver_Constructor_Test() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext); @@ -800,7 +738,7 @@ public void AppObserver_Constructor_Test() public void AzureStorageUploadObserver_Constructor_Test() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AzureStorageUploadObserver(TestServiceContext); @@ -814,7 +752,7 @@ public void AzureStorageUploadObserver_Constructor_Test() public void CertificateObserver_Constructor_test() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new CertificateObserver(TestServiceContext); @@ -828,7 +766,7 @@ public void CertificateObserver_Constructor_test() public void ContainerObserver_Constructor_test() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new ContainerObserver(TestServiceContext); @@ -842,7 +780,7 @@ public void ContainerObserver_Constructor_test() public void DiskObserver_Constructor_Test() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; ObserverManager.ObserverWebAppDeployed = true; @@ -857,7 +795,7 @@ public void DiskObserver_Constructor_Test() public void FabricSystemObserver_Constructor_Test() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new FabricSystemObserver(TestServiceContext); @@ -871,7 +809,7 @@ public void FabricSystemObserver_Constructor_Test() public void NetworkObserver_Constructor_Test() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; ObserverManager.ObserverWebAppDeployed = true; @@ -886,7 +824,7 @@ public void NetworkObserver_Constructor_Test() public void NodeObserver_Constructor_Test() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new NodeObserver(TestServiceContext); @@ -900,7 +838,7 @@ public void NodeObserver_Constructor_Test() public void OSObserver_Constructor_Test() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new OSObserver(TestServiceContext); @@ -917,7 +855,7 @@ public void SFConfigurationObserver_Constructor_Test() ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; ObserverManager.ObserverWebAppDeployed = true; @@ -937,7 +875,7 @@ public void SFConfigurationObserver_Constructor_Test() public async Task AppObserver_InitializeAsync_MalformedTargetAppValue_GeneratesWarning() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) @@ -955,7 +893,7 @@ public async Task AppObserver_InitializeAsync_MalformedTargetAppValue_GeneratesW public async Task AppObserver_InitializeAsync_InvalidJson_GeneratesWarning() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) @@ -973,7 +911,7 @@ public async Task AppObserver_InitializeAsync_InvalidJson_GeneratesWarning() public async Task AppObserver_InitializeAsync_NoConfigFound_GeneratesWarning() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) @@ -993,7 +931,7 @@ public async Task AppObserver_InitializeAsync_NoConfigFound_GeneratesWarning() public async Task AppObserver_InitializeAsync_TargetAppType_ServiceExcludeList_EnsureExcluded() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) @@ -1011,7 +949,7 @@ public async Task AppObserver_InitializeAsync_TargetAppType_ServiceExcludeList_E public async Task AppObserver_InitializeAsync_TargetApp_ServiceExcludeList_EnsureExcluded() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) @@ -1029,7 +967,7 @@ public async Task AppObserver_InitializeAsync_TargetApp_ServiceExcludeList_Ensur public async Task AppObserver_InitializeAsync_TargetAppType_ServiceIncludeList_EnsureIncluded() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) @@ -1047,7 +985,7 @@ public async Task AppObserver_InitializeAsync_TargetAppType_ServiceIncludeList_E public async Task AppObserver_InitializeAsync_TargetApp_ServiceIncludeList_EnsureIncluded() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) @@ -1067,7 +1005,7 @@ public async Task AppObserver_InitializeAsync_TargetApp_ServiceIncludeList_Ensur public async Task AppObserver_InitializeAsync_TargetAppType_MultiServiceExcludeList_EnsureNotExcluded() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) @@ -1090,7 +1028,7 @@ public async Task AppObserver_InitializeAsync_TargetAppType_MultiServiceExcludeL public async Task AppObserver_InitializeAsync_TargetApp_MultiServiceExcludeList_EnsureNotExcluded() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) @@ -1113,7 +1051,7 @@ public async Task AppObserver_InitializeAsync_TargetApp_MultiServiceExcludeList_ public async Task AppObserver_InitializeAsync_TargetAppType_MultiServiceIncludeList_EnsureIncluded() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) @@ -1131,7 +1069,7 @@ public async Task AppObserver_InitializeAsync_TargetAppType_MultiServiceIncludeL public async Task AppObserver_InitializeAsync_TargetApp_MultiServiceIncludeList_EnsureIncluded() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) @@ -1159,7 +1097,7 @@ public async Task AppObserver_ObserveAsync_Successful_IsHealthy() var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = true; using var obs = new AppObserver(TestServiceContext) @@ -1185,7 +1123,7 @@ public async Task AppObserver_ObserveAsync_Successful_WarningsGenerated() var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = true; using var obs = new AppObserver(TestServiceContext) @@ -1211,7 +1149,7 @@ public async Task AppObserver_ObserveAsync_PrivateBytes_Successful_WarningsGener var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = true; using var obs = new AppObserver(TestServiceContext) @@ -1239,7 +1177,7 @@ public async Task AppObserver_ObserveAsync_Successful_RGMemoryLimitWarningGenera var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) @@ -1334,7 +1272,7 @@ public async Task AppObserver_ObserveAsync_Successful_RGCpuLimitWarningGenerated var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = true; using var obs = new AppObserver(TestServiceContext) @@ -1344,6 +1282,8 @@ public async Task AppObserver_ObserveAsync_Successful_RGCpuLimitWarningGenerated await obs.ObserveAsync(Token); + await Task.Delay(5000); + // observer ran to completion with no errors. Assert.IsTrue(obs.LastRunDateTime > startDateTime); @@ -1360,7 +1300,7 @@ public async Task AppObserver_ObserveAsync_Successful_RGLimit_Validate_Multiple_ var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext); @@ -1491,7 +1431,7 @@ public async Task AppObserver_ObserveAsync_OldConfigStyle_Successful_WarningsGen var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) @@ -1517,7 +1457,7 @@ public async Task AppObserver_ObserveAsync_OldConfigStyle_Successful_NoWarningsG var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) @@ -1861,7 +1801,7 @@ public async Task AppObserver_DumpProcessOnWarning_SuccessfulDumpCreation() var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) @@ -1902,7 +1842,7 @@ public async Task AppObserver_DumpProcessOnError_SuccessfulDumpCreation() var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) @@ -1944,7 +1884,7 @@ public async Task ContainerObserver_ObserveAsync_Successful_IsHealthy() var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new ContainerObserver(TestServiceContext) @@ -2140,7 +2080,7 @@ public async Task CertificateObserver_validCerts() var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new CertificateObserver(TestServiceContext); @@ -2189,7 +2129,7 @@ public async Task CertificateObserver_expiredAndexpiringCerts() var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new CertificateObserver(TestServiceContext); @@ -2245,13 +2185,13 @@ public async Task NodeObserver_Integer_Greater_Than_100_CPU_Warn_Threshold_No_Fa var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new NodeObserver(TestServiceContext) { DataCapacity = 2, - MonitorDuration = TimeSpan.FromSeconds(1), + CpuMonitorDuration = TimeSpan.FromSeconds(1), CpuWarningUsageThresholdPct = 10000 }; @@ -2273,13 +2213,13 @@ public async Task NodeObserver_Negative_Integer_CPU_Mem_Ports_Firewalls_Values_N var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new NodeObserver(TestServiceContext) { DataCapacity = 2, - MonitorDuration = TimeSpan.FromSeconds(1), + CpuMonitorDuration = TimeSpan.FromSeconds(1), CpuWarningUsageThresholdPct = -1000, MemWarningUsageThresholdMb = -2500, EphemeralPortsRawErrorThreshold = -42, @@ -2302,13 +2242,13 @@ public async Task NodeObserver_Negative_Integer_Thresholds_CPU_Mem_Ports_Firewal var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new NodeObserver(TestServiceContext) { DataCapacity = 2, - MonitorDuration = TimeSpan.FromSeconds(1), + CpuMonitorDuration = TimeSpan.FromSeconds(1), CpuWarningUsageThresholdPct = -1000, MemWarningUsageThresholdMb = -2500, EphemeralPortsRawErrorThreshold = -42, @@ -2338,7 +2278,7 @@ public async Task OSObserver_ObserveAsync_Successful_IsHealthy_NoWarningsOrError var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = true; using var obs = new OSObserver(TestServiceContext) @@ -2378,7 +2318,7 @@ public async Task OSObserver_IsWindowsDevCluster_True() var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new OSObserver(TestServiceContext) @@ -2410,7 +2350,7 @@ public async Task DiskObserver_ObserveAsync_Successful_IsHealthy_NoWarningsOrErr var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = true; var warningDictionary = new Dictionary @@ -2422,7 +2362,7 @@ public async Task DiskObserver_ObserveAsync_Successful_IsHealthy_NoWarningsOrErr { // This is required since output files are only created if fo api app is also deployed to cluster.. IsObserverWebApiAppDeployed = true, - MonitorDuration = TimeSpan.FromSeconds(1), + CpuMonitorDuration = TimeSpan.FromSeconds(1), FolderSizeMonitoringEnabled = true, FolderSizeConfigDataWarning = warningDictionary, IsEtwProviderEnabled = true @@ -2456,7 +2396,7 @@ public async Task DiskObserver_ObserveAsync_Successful_IsHealthy_WarningsOrError var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = true; var warningDictionary = new Dictionary @@ -2575,13 +2515,13 @@ public async Task NodeObserver_ObserveAsync_Successful_IsHealthy_NoWarningsOrErr var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = true; using var obs = new NodeObserver(TestServiceContext) { IsEnabled = true, - MonitorDuration = TimeSpan.FromSeconds(5), + CpuMonitorDuration = TimeSpan.FromSeconds(5), CpuWarningUsageThresholdPct = 90, // This will generate Warning for sure. ActivePortsWarningThreshold = 10000, MemoryWarningLimitPercent = 90, @@ -2607,12 +2547,12 @@ public async Task NodeObserver_ObserveAsync_Successful_IsHealthy_WarningsOrError var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = true; using var obs = new NodeObserver(TestServiceContext) { - MonitorDuration = TimeSpan.FromSeconds(1), + CpuMonitorDuration = TimeSpan.FromSeconds(1), DataCapacity = 5, UseCircularBuffer = false, IsEtwProviderEnabled = true, @@ -2638,7 +2578,7 @@ public async Task SFConfigurationObserver_ObserveAsync_Successful_IsHealthy() var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new SFConfigurationObserver(TestServiceContext) @@ -2675,15 +2615,10 @@ public async Task SFConfigurationObserver_ObserveAsync_Successful_IsHealthy() [TestMethod] public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_NoWarningsOrErrors() { - var nodeList = await FabricClientSingleton.QueryManager.GetNodeListAsync(); - - // This is meant to be run on your dev machine's one node test cluster. - Assert.IsTrue(nodeList?.Count == 1); - var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = true; using var obs = new FabricSystemObserver(TestServiceContext) @@ -2710,15 +2645,10 @@ public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_NoWarni [TestMethod] public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_MemoryWarningsOrErrorsDetected() { - var nodeList = await FabricClientSingleton.QueryManager.GetNodeListAsync(); - - // This is meant to be run on your dev machine's one node test cluster. - Assert.IsTrue(nodeList?.Count == 1); - var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = true; using var obs = new FabricSystemObserver(TestServiceContext) @@ -2741,20 +2671,15 @@ public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_MemoryW [TestMethod] public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_ActiveTcpPortsWarningsOrErrorsDetected() { - var nodeList = await FabricClientSingleton.QueryManager.GetNodeListAsync(); - - // This is meant to be run on your dev machine's one node test cluster. - Assert.IsTrue(nodeList?.Count == 1); - var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new FabricSystemObserver(TestServiceContext) { - MonitorDuration = TimeSpan.FromSeconds(1), + CpuMonitorDuration = TimeSpan.FromSeconds(1), ActiveTcpPortCountWarning = 3 }; @@ -2774,20 +2699,15 @@ public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_ActiveT [TestMethod] public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_EphemeralPortsWarningsOrErrorsDetected() { - var nodeList = await FabricClientSingleton.QueryManager.GetNodeListAsync(); - - // This is meant to be run on your dev machine's one node test cluster. - Assert.IsTrue(nodeList?.Count == 1); - var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new FabricSystemObserver(TestServiceContext) { - MonitorDuration = TimeSpan.FromSeconds(1), + CpuMonitorDuration = TimeSpan.FromSeconds(1), ActiveEphemeralPortCountWarning = 1 }; @@ -2807,20 +2727,15 @@ public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_Ephemer [TestMethod] public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_HandlesWarningsOrErrorsDetected() { - var nodeList = await FabricClientSingleton.QueryManager.GetNodeListAsync(); - - // This is meant to be run on your dev machine's one node test cluster. - Assert.IsTrue(nodeList?.Count == 1); - var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new FabricSystemObserver(TestServiceContext) { - MonitorDuration = TimeSpan.FromSeconds(1), + CpuMonitorDuration = TimeSpan.FromSeconds(1), AllocatedHandlesWarning = 100 }; @@ -2840,21 +2755,16 @@ public async Task FabricSystemObserver_ObserveAsync_Successful_IsHealthy_Handles [TestMethod] public async Task FabricSystemObserver_Negative_Integer_CPU_Warn_Threshold_No_Unhandled_Exception() { - var nodeList = await FabricClientSingleton.QueryManager.GetNodeListAsync(); - - // This is meant to be run on your dev machine's one node test cluster. - Assert.IsTrue(nodeList?.Count == 1); - var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new FabricSystemObserver(TestServiceContext) { - MonitorDuration = TimeSpan.FromSeconds(1), + CpuMonitorDuration = TimeSpan.FromSeconds(1), CpuWarnUsageThresholdPct = -42 }; @@ -2873,20 +2783,15 @@ public async Task FabricSystemObserver_Negative_Integer_CPU_Warn_Threshold_No_Un [TestMethod] public async Task FabricSystemObserver_Integer_Greater_Than_100_CPU_Warn_Threshold_No_Unhandled_Exception() { - var nodeList = await FabricClientSingleton.QueryManager.GetNodeListAsync(); - - // This is meant to be run on your dev machine's one node test cluster. - Assert.IsTrue(nodeList?.Count == 1); - var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new FabricSystemObserver(TestServiceContext) { - MonitorDuration = TimeSpan.FromSeconds(1), + CpuMonitorDuration = TimeSpan.FromSeconds(1), CpuWarnUsageThresholdPct = 420 }; @@ -3154,6 +3059,9 @@ public async Task AppObserver_ETW_PrivateBytes_Multiple_CodePackages_ValuesAreNo { using var foEtwListener = new FabricObserverEtwListener(_logger); await AppObserver_ObserveAsync_PrivateBytes_Successful_WarningsGenerated(); + + await Task.Delay(5000); + List telemData = foEtwListener.foEtwConverter.ServiceTelemetryData; Assert.IsNotNull(telemData); @@ -3214,7 +3122,7 @@ public async Task AppObserver_ETW_RGMemoryLimitPercent_Warning() telemData = telemData.Where( t => t.ApplicationName == "fabric:/Voting" && t.HealthState == HealthState.Warning).ToList(); - // 2 service code packages + 2 helper code packages (VotingData) * 1 metric = 4 warnings... + // 2 service code packages + 2 helper code packages (VotingData) * 1 metric = 4 warnings. Assert.IsTrue(telemData.All(t => t.Metric == ErrorWarningProperty.RGMemoryUsagePercent && telemData.Count == 4)); } @@ -3499,13 +3407,13 @@ public async Task NodeObserver_ETW_EventData_IsNodeSnapshotTelemetryData() using var foEtwListener = new FabricObserverEtwListener(_logger); var startDateTime = DateTime.Now; ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = true; using var obs = new NodeObserver(TestServiceContext) { IsEnabled = true, - MonitorDuration = TimeSpan.FromSeconds(5), + CpuMonitorDuration = TimeSpan.FromSeconds(5), CpuWarningUsageThresholdPct = 90, ActivePortsWarningThreshold = 10000, MemoryWarningLimitPercent = 90, @@ -3584,14 +3492,12 @@ public async Task OSObserver_ETW_EventData_IsMachineTelemetryData() public async Task AppObserver_Detects_Monitors_Multiple_Helper_CodePackages() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) { - JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver.config.json"), - EnableConcurrentMonitoring = true, - EnableChildProcessMonitoring = true + JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver.config.json") }; var startDateTime = DateTime.Now; @@ -3622,14 +3528,12 @@ public async Task AppObserver_Detects_Monitors_Multiple_Helper_CodePackages() public async Task AppObserver_ChildProcs_Detects_SingleDescendant_Ports_Warning() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) { - JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver.config.single-app-target-warning-ports.json"), - EnableConcurrentMonitoring = true, - EnableChildProcessMonitoring = true + JsonConfigPath = Path.Combine(Environment.CurrentDirectory, "PackageRoot", "Config", "AppObserver.config.single-app-target-warning-ports.json") }; var startDateTime = DateTime.Now; @@ -3642,7 +3546,7 @@ public async Task AppObserver_ChildProcs_Detects_SingleDescendant_Ports_Warning( // observer ran to completion with no errors. Assert.IsTrue(obs.LastRunDateTime > startDateTime); - // observer detected no warning conditions. + // observer detected warning condition. Assert.IsTrue(obs.HasActiveFabricErrorOrWarning); // observer did not have any internal errors during run. @@ -3653,7 +3557,7 @@ public async Task AppObserver_ChildProcs_Detects_SingleDescendant_Ports_Warning( public async Task AppObserver_ChildProcs_Detects_SingleDescendant_Ports_NoWarning() { ObserverManager.FabricServiceContext = TestServiceContext; - ObserverManager.TelemetryEnabled = false; + ObserverManager.TelemetryProviderEnabled = false; ObserverManager.EtwEnabled = false; using var obs = new AppObserver(TestServiceContext) diff --git a/FabricObserverTests/PackageRoot/Config/Settings.xml b/FabricObserverTests/PackageRoot/Config/Settings.xml index c9af6742..b53d7694 100644 --- a/FabricObserverTests/PackageRoot/Config/Settings.xml +++ b/FabricObserverTests/PackageRoot/Config/Settings.xml @@ -27,21 +27,21 @@
- + + + - - + + - - @@ -81,8 +81,8 @@ - - + + diff --git a/README.md b/README.md index 64709215..4c1a347a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -## FabricObserver 3.2.13 +## FabricObserver 3.2.14 [![Deploy to Azure](https://aka.ms/deploytoazurebutton)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2Fmicrosoft%2Fservice-fabric-observer%2Fmain%2FDocumentation%2FDeployment%2Fservice-fabric-observer.json) @@ -87,7 +87,7 @@ see [FOAzurePipeline.yaml](/FOAzurePipeline.yaml) for msazure devops build tasks .net6 installed (if you deploy VM images from Azure gallery, then they will not have .net6 installed), then you must deploy the SelfContained package. ### Deploy FabricObserver -**Note: You must deploy this version (3.2.13) to clusters that are running SF 9.0 and above. This version also requires .NET 6.** +**Note: You must deploy this version (3.2.14) to clusters that are running SF 9.0 and above. This version also requires .NET 6.** You can deploy FabricObserver (and ClusterObserver) using Visual Studio (if you build the sources yourself), PowerShell or ARM. Please note that this version of FabricObserver no longer supports the DefaultServices node in ApplicationManifest.xml. This means that should you deploy using PowerShell, you must create an instance of the service as the last command in your script. This was done to support ARM deployment, specifically. The StartupServices.xml file you see in the FabricHealerApp project now contains the service information once held in ApplicationManifest's DefaultServices node. Note that this information is primarily useful for deploying from Visual Studio. @@ -127,15 +127,15 @@ Connect-ServiceFabricCluster -ConnectionEndpoint @('sf-win-cluster.westus2.cloud #Copy $path contents (FO app package) to server: -Copy-ServiceFabricApplicationPackage -ApplicationPackagePath $path -CompressPackage -ApplicationPackagePathInImageStore FO3213 -TimeoutSec 1800 +Copy-ServiceFabricApplicationPackage -ApplicationPackagePath $path -CompressPackage -ApplicationPackagePathInImageStore FO3214 -TimeoutSec 1800 #Register FO ApplicationType: -Register-ServiceFabricApplicationType -ApplicationPathInImageStore FO3213 +Register-ServiceFabricApplicationType -ApplicationPathInImageStore FO3214 #Create FO application (if not already deployed at lesser version): -New-ServiceFabricApplication -ApplicationName fabric:/FabricObserver -ApplicationTypeName FabricObserverType -ApplicationTypeVersion 3.2.13 +New-ServiceFabricApplication -ApplicationName fabric:/FabricObserver -ApplicationTypeName FabricObserverType -ApplicationTypeVersion 3.2.14 #Create the Service instances (-1 means all nodes, which is what is required for FO): @@ -143,7 +143,7 @@ New-ServiceFabricService -Stateless -PartitionSchemeSingleton -ApplicationName f #OR if updating existing version: -Start-ServiceFabricApplicationUpgrade -ApplicationName fabric:/FabricObserver -ApplicationTypeVersion 3.2.13 -Monitored -FailureAction rollback +Start-ServiceFabricApplicationUpgrade -ApplicationName fabric:/FabricObserver -ApplicationTypeVersion 3.2.14 -Monitored -FailureAction rollback ``` ## Observer Model diff --git a/SampleObserverPlugin/SampleNewObserver.cs b/SampleObserverPlugin/SampleNewObserver.cs index 8f114821..394d707c 100644 --- a/SampleObserverPlugin/SampleNewObserver.cs +++ b/SampleObserverPlugin/SampleNewObserver.cs @@ -57,7 +57,7 @@ public override async Task ObserveAsync(CancellationToken token) { // If set, this observer will only run during the supplied interval. // See Settings.xml, CertificateObserverConfiguration section, RunInterval parameter for an example. - if (RunInterval > TimeSpan.MinValue && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) + if (RunInterval > TimeSpan.Zero && DateTime.Now.Subtract(LastRunDateTime) < RunInterval) { return; } diff --git a/SampleObserverPlugin/SampleObserverPlugin.csproj b/SampleObserverPlugin/SampleObserverPlugin.csproj index 076ead5c..634d0fd3 100644 --- a/SampleObserverPlugin/SampleObserverPlugin.csproj +++ b/SampleObserverPlugin/SampleObserverPlugin.csproj @@ -19,18 +19,19 @@ - - + - + - + \ No newline at end of file diff --git a/TelemetryLib/TelemetryLib.csproj b/TelemetryLib/TelemetryLib.csproj index 84e22522..a3c9f06c 100644 --- a/TelemetryLib/TelemetryLib.csproj +++ b/TelemetryLib/TelemetryLib.csproj @@ -11,7 +11,7 @@ TelemetryLib - + diff --git a/XmlDiffPatchSF/Program.cs b/XmlDiffPatchSF/Program.cs index 90757c4a..0a5343e7 100644 --- a/XmlDiffPatchSF/Program.cs +++ b/XmlDiffPatchSF/Program.cs @@ -28,7 +28,7 @@ private static void Main(string[] args) "preceding the file extension.\n\n" + "**Note, if you have observer plugins, then you must supply true for [mergeExistingNodes] as the last argument to pull over your plugin settings as part of the merge.**.\n\n" + "Example:\n\n" + - "DiffPatchXml \"C:\\repos\\FO\\3.1.26\\configs\\ApplicationManifest.xml\" \"C:\\repos\\FO\\3.2.13\\configs\\ApplicationManifest.xml\"\n"); + "DiffPatchXml \"C:\\repos\\FO\\3.1.26\\configs\\ApplicationManifest.xml\" \"C:\\repos\\FO\\3.2.14\\configs\\ApplicationManifest.xml\"\n"); return; } diff --git a/conuget.md b/conuget.md index 39a7bdda..515e04d1 100644 --- a/conuget.md +++ b/conuget.md @@ -1,5 +1,5 @@ -### ClusterObserver 2.2.7 -#### This version requires SF Runtime >= 9.0 and targets .NET 6. .NET Core 3.1 is no longer supported. +### ClusterObserver 2.2.8 +#### This version requires SF Runtime >= 9.0 and targets .NET 6. [ClusterObserver (CO)](https://github.com/microsoft/service-fabric-observer/tree/main/ClusterObserver) is a stateless singleton Service Fabric .NET 6 service that runs on one node in a cluster. CO observes cluster health (aggregated) and sends telemetry when a cluster is in Error or Warning. CO shares a very small subset of FabricObserver's (FO) code. It is designed to be completely independent from FO sources, diff --git a/foextlib.md b/foextlib.md index fc37aba4..acef99fc 100644 --- a/foextlib.md +++ b/foextlib.md @@ -1,4 +1,4 @@ -## FabricObserver Extensibility Library 3.2.13 +## FabricObserver Extensibility Library 3.2.14 FabricObserver.Extensibility is a .NET 6 library for building custom observers that extend FabricObserver's capabilities to match your needs. A custom observer is managed just like a built-in observer. diff --git a/fonuget.md b/fonuget.md index 71a8ce4e..9311e288 100644 --- a/fonuget.md +++ b/fonuget.md @@ -1,4 +1,4 @@ -## FabricObserver 3.2.13 +## FabricObserver 3.2.14 [**FabricObserver (FO)**](https://github.com/microsoft/service-fabric-observer) is a production-ready watchdog service with an easy-to-use extensibility model, written as a stateless, singleton Service Fabric **.NET 6** application that by default