Skip to content

Commit

Permalink
FO 3.1.12
Browse files Browse the repository at this point in the history
FO 3.1.12
  • Loading branch information
GitTorre authored May 20, 2021
2 parents 863386d + a974c41 commit 8bfd6d3
Show file tree
Hide file tree
Showing 46 changed files with 1,090 additions and 990 deletions.
8 changes: 4 additions & 4 deletions Build-SFPkgs.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ function Build-SFPkg {
try {
Push-Location $scriptPath

Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Linux.SelfContained.3.1.11" "$scriptPath\bin\release\FabricObserver\linux-x64\self-contained\FabricObserverType"
Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Linux.FrameworkDependent.3.1.11" "$scriptPath\bin\release\FabricObserver\linux-x64\framework-dependent\FabricObserverType"
Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Linux.SelfContained.3.1.12" "$scriptPath\bin\release\FabricObserver\linux-x64\self-contained\FabricObserverType"
Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Linux.FrameworkDependent.3.1.12" "$scriptPath\bin\release\FabricObserver\linux-x64\framework-dependent\FabricObserverType"

Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Windows.SelfContained.3.1.11" "$scriptPath\bin\release\FabricObserver\win-x64\self-contained\FabricObserverType"
Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Windows.FrameworkDependent.3.1.11" "$scriptPath\bin\release\FabricObserver\win-x64\framework-dependent\FabricObserverType"
Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Windows.SelfContained.3.1.12" "$scriptPath\bin\release\FabricObserver\win-x64\self-contained\FabricObserverType"
Build-SFPkg "Microsoft.ServiceFabricApps.FabricObserver.Windows.FrameworkDependent.3.1.12" "$scriptPath\bin\release\FabricObserver\win-x64\framework-dependent\FabricObserverType"
}
finally {
Pop-Location
Expand Down
19 changes: 9 additions & 10 deletions ClusterObserver/ClusterObserver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ public string ObserverName
get;
}


private StatelessServiceContext FabricServiceContext
{
get;
Expand Down Expand Up @@ -123,12 +122,12 @@ private async Task ReportClusterHealthAsync(CancellationToken token)
try
{
// Monitor node status.
await MonitorNodeStatusAsync(token).ConfigureAwait(false);
await MonitorNodeStatusAsync(token).ConfigureAwait(true);

// Check for active repairs in the cluster.
if (ConfigSettings.MonitorRepairJobStatus)
{
var repairsInProgress = await GetRepairTasksCurrentlyProcessingAsync(token).ConfigureAwait(false);
var repairsInProgress = await GetRepairTasksCurrentlyProcessingAsync(token).ConfigureAwait(true);
string repairState = string.Empty;

if (repairsInProgress?.Count > 0)
Expand Down Expand Up @@ -246,7 +245,7 @@ private async Task ReportClusterHealthAsync(CancellationToken token)
case HealthEvaluationKind.Nodes:
try
{
await ProcessNodeHealthAsync(clusterHealth.NodeHealthStates, token).ConfigureAwait(false);
await ProcessNodeHealthAsync(clusterHealth.NodeHealthStates, token).ConfigureAwait(true);
}
catch (Exception e) when (e is FabricException || e is TimeoutException)
{
Expand All @@ -259,7 +258,7 @@ private async Task ReportClusterHealthAsync(CancellationToken token)
case HealthEvaluationKind.SystemApplication:
try
{
await ProcessApplicationHealthAsync(clusterHealth.ApplicationHealthStates, token).ConfigureAwait(false);
await ProcessApplicationHealthAsync(clusterHealth.ApplicationHealthStates, token).ConfigureAwait(true);
}
catch (Exception e) when (e is FabricException || e is TimeoutException)
{
Expand All @@ -270,7 +269,7 @@ private async Task ReportClusterHealthAsync(CancellationToken token)
default:
try
{
await ProcessGenericEntityHealthAsync(evaluation, token).ConfigureAwait(false);
await ProcessGenericEntityHealthAsync(evaluation, token).ConfigureAwait(true);
}
catch (Exception e) when (e is FabricException || e is TimeoutException)
{
Expand Down Expand Up @@ -345,7 +344,7 @@ private async Task ProcessApplicationHealthAsync(IList<ApplicationHealthState> a
ApplicationHealth appHealth = await FabricClientInstance.HealthManager.GetApplicationHealthAsync(
healthState.ApplicationName,
ConfigSettings.AsyncTimeout,
token).ConfigureAwait(false);
token).ConfigureAwait(true);
if (appHealth == null)
{
continue;
Expand Down Expand Up @@ -474,7 +473,7 @@ private async Task ProcessApplicationHealthAsync(IList<ApplicationHealthState> a
private async Task ProcessNodeHealthAsync(IEnumerable<NodeHealthState> nodeHealthStates, CancellationToken token)
{
// Check cluster upgrade status.
int udInClusterUpgrade = await UpgradeChecker.GetUdsWhereFabricUpgradeInProgressAsync(FabricClientInstance, token).ConfigureAwait(false);
int udInClusterUpgrade = await UpgradeChecker.GetUdsWhereFabricUpgradeInProgressAsync(FabricClientInstance, token).ConfigureAwait(true);
var supportedNodeHealthStates = nodeHealthStates.Where( a => a.AggregatedHealthState == HealthState.Warning || a.AggregatedHealthState == HealthState.Error);

foreach (var node in supportedNodeHealthStates)
Expand Down Expand Up @@ -523,7 +522,7 @@ private async Task ProcessNodeHealthAsync(IEnumerable<NodeHealthState> nodeHealt
}

var targetNodeList =
await FabricClientInstance.QueryManager.GetNodeListAsync(node.NodeName, ConfigSettings.AsyncTimeout, token).ConfigureAwait(false);
await FabricClientInstance.QueryManager.GetNodeListAsync(node.NodeName, ConfigSettings.AsyncTimeout, token).ConfigureAwait(true);

Node targetNode = null;

Expand Down Expand Up @@ -619,7 +618,7 @@ private async Task MonitorNodeStatusAsync(CancellationToken token)
// If a node's NodeStatus is Disabling, Disabled, or Down
// for at or above the specified maximum time (in Settings.xml),
// then CO will emit a Warning signal.
var nodeList = await FabricClientInstance.QueryManager.GetNodeListAsync(null, ConfigSettings.AsyncTimeout, token).ConfigureAwait(false);
var nodeList = await FabricClientInstance.QueryManager.GetNodeListAsync(null, ConfigSettings.AsyncTimeout, token).ConfigureAwait(true);

// Are any of the nodes that were previously in non-Up status, now Up?
if (NodeStatusDictionary.Count > 0)
Expand Down
14 changes: 6 additions & 8 deletions ClusterObserver/ClusterObserverManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public static bool TelemetryEnabled

public static bool EtwEnabled
{
get => bool.TryParse(GetConfigSettingValue(ObserverConstants.EnableEventSourceProvider), out etwEnabled) && etwEnabled;
get => bool.TryParse(GetConfigSettingValue(ObserverConstants.EnableETWProvider), out etwEnabled) && etwEnabled;

set => etwEnabled = value;
}
Expand Down Expand Up @@ -146,7 +146,7 @@ private async void ShutdownHandler(object sender, ConsoleCancelEventArgs console
return;
}

await Task.Delay(shutdownGracePeriodInSeconds).ConfigureAwait(false);
await Task.Delay(shutdownGracePeriodInSeconds).ConfigureAwait(true);

shutdownSignaled = true;
await StopAsync();
Expand Down Expand Up @@ -253,14 +253,12 @@ public async Task StartAsync()
if (!appParamsUpdating && (shutdownSignaled || token.IsCancellationRequested))
{
Logger.LogInfo("Shutdown signaled. Stopping.");
await StopAsync().ConfigureAwait(false);
await StopAsync().ConfigureAwait(true);
break;
}

await RunObserverAync().ConfigureAwait(false);
await RunObserverAync().ConfigureAwait(true);
await Task.Delay(TimeSpan.FromSeconds(ObserverExecutionLoopSleepSeconds > 0 ? ObserverExecutionLoopSleepSeconds : 10), token);

Logger.Flush();
}
}
catch (Exception e) when (e is OperationCanceledException || e is TaskCanceledException)
Expand Down Expand Up @@ -311,7 +309,7 @@ public async Task StopAsync()
shutdownSignaled = true;
}

await SignalAbortToRunningObserverAsync().ConfigureAwait(false);
await SignalAbortToRunningObserverAsync().ConfigureAwait(true);
}

private Task SignalAbortToRunningObserverAsync()
Expand Down Expand Up @@ -377,7 +375,7 @@ private async Task RunObserverAync()
{
string observerHealthWarning = $"{observer.ObserverName} has exceeded its specified run time of {observerExecTimeout.TotalSeconds} seconds. Aborting.";

await SignalAbortToRunningObserverAsync().ConfigureAwait(false);
await SignalAbortToRunningObserverAsync().ConfigureAwait(true);

Logger.LogWarning(observerHealthWarning);

Expand Down
2 changes: 1 addition & 1 deletion ClusterObserver/PackageRoot/Config/Settings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
will be locally logged. This is the recommended setting. Note that file logging is generally
only useful for FabricObserverWebApi, which is an optional log reader service that ships in this repo. -->
<Parameter Name="EnableVerboseLogging" Value="false" />
<Parameter Name="EnableEventSourceProvider" Value="true" />
<Parameter Name="EnableETWProvider" Value="true" />
<!-- Required: Whether the Observer should send all of its monitoring data and Warnings/Errors to configured Telemetry service. This can be overriden by the setting
in the ClusterObserverConfiguration section. The idea there is that you can do an application parameter update and turn this feature on and off. -->
<Parameter Name="EnableTelemetry" Value="true" />
Expand Down
2 changes: 1 addition & 1 deletion ClusterObserver/Utilities/ObserverConstants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,6 @@ public static class ObserverConstants
public const string Undefined = "Undefined";
public const string ClusterTypePaasV1 = "PaasV1";
public const string ClusterTypeStandalone = "Standalone";
public const string EnableEventSourceProvider = "EnableEventSourceProvider";
public const string EnableETWProvider = "EnableETWProvider";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ public Task ReportMetricAsync(string role, Guid partition, string name, long val
/// <returns><placeholder>A <see cref="Task"/> representing the asynchronous operation.</placeholder></returns>
public async Task ReportMetricAsync(string role, long id, string name, long value, CancellationToken cancellationToken)
{
await ReportMetricAsync(role, id.ToString(), name, value, 1, value, value, value, 0.0, null, cancellationToken).ConfigureAwait(false);
await ReportMetricAsync(role, id.ToString(), name, value, 1, value, value, value, 0.0, null, cancellationToken).ConfigureAwait(true);
}

/// <summary>
Expand Down
8 changes: 4 additions & 4 deletions ClusterObserver/Utilities/Telemetry/LogAnalyticsTelemetry.cs
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ private async Task SendTelemetryAsync(string payload, CancellationToken token)
}

retries++;
await Task.Delay(1000).ConfigureAwait(false);
await SendTelemetryAsync(payload, token).ConfigureAwait(false);
await Task.Delay(1000).ConfigureAwait(true);
await SendTelemetryAsync(payload, token).ConfigureAwait(true);
}
else
{
Expand Down Expand Up @@ -194,7 +194,7 @@ public async Task ReportHealthAsync(
osPlatform = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ? "Windows" : "Linux"
});

await SendTelemetryAsync(jsonPayload, cancellationToken).ConfigureAwait(false);
await SendTelemetryAsync(jsonPayload, cancellationToken).ConfigureAwait(true);
}

public async Task ReportHealthAsync(TelemetryData telemetryData, CancellationToken cancellationToken)
Expand All @@ -206,7 +206,7 @@ public async Task ReportHealthAsync(TelemetryData telemetryData, CancellationTok

string jsonPayload = JsonConvert.SerializeObject(telemetryData);

await SendTelemetryAsync(jsonPayload, cancellationToken).ConfigureAwait(false);
await SendTelemetryAsync(jsonPayload, cancellationToken).ConfigureAwait(true);
}

// TODO - Implement functions below as you need them.
Expand Down
4 changes: 2 additions & 2 deletions Documentation/Plugins.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ Create a new .NET Standard 2.0 library project, install the nupkg you need for y
You can find the Microsoft-signed packages in the nuget.org gallery [here](https://www.nuget.org/profiles/ServiceFabricApps) or just run this in the package manager console:

```
Install-Package Microsoft.ServiceFabricApps.FabricObserver.Windows.SelfContained -Version 3.1.8
Install-Package Microsoft.ServiceFabricApps.FabricObserver.Windows.SelfContained -Version 3.1.12
or for Linux:
Install-Package Microsoft.ServiceFabricApps.FabricObserver.Linux.SelfContained -Version 3.1.8
Install-Package Microsoft.ServiceFabricApps.FabricObserver.Linux.SelfContained -Version 3.1.12
```

Note:
Expand Down
2 changes: 1 addition & 1 deletion Documentation/Using.md
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ $appParams = @{ "FabricSystemObserverEnabled" = "true"; "FabricSystemObserverMem
Then execute the application upgrade with

```Powershell
Start-ServiceFabricApplicationUpgrade -ApplicationName fabric:/FabricObserver -ApplicationTypeVersion 3.1.8 -ApplicationParameter $appParams -Monitored -FailureAction rollback
Start-ServiceFabricApplicationUpgrade -ApplicationName fabric:/FabricObserver -ApplicationTypeVersion 3.1.12 -ApplicationParameter $appParams -Monitored -FailureAction rollback
```

Note: On *Linux*, this will restart FO processes (one at a time, UD Walk with safety checks) due to the way Linux Capabilites work. In a nutshell, for any kind of application upgrade, we have to re-run the FO setup script to get the Capabilities in place. For Windows, FO processes will NOT be restarted.
35 changes: 19 additions & 16 deletions FabricObserver.Extensibility/ObserverBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public abstract class ObserverBase : IObserver
private const int TtlAddMinutes = 5;
private const string FabricSystemAppName = "fabric:/System";
private const int MaxDumps = 5;
private readonly Dictionary<string, int> serviceDumpCountDictionary = new Dictionary<string, int>();
private Dictionary<string, int> serviceDumpCountDictionary;
private string SFLogRoot;
private string dumpsPath;
private bool disposed;
Expand Down Expand Up @@ -225,7 +225,7 @@ public TimeSpan AsyncClusterOperationTimeoutSeconds

public int DataCapacity
{
get => ConfigurationSettings?.DataCapacity ?? 30;
get => ConfigurationSettings?.DataCapacity ?? 10;

set
{
Expand Down Expand Up @@ -575,13 +575,13 @@ e is UnauthorizedAccessException ||
/// <param name="replicaOrInstance">Replica or Instance information contained in a type.</param>
/// <param name="dumpOnError">Whether or not to dump process if Error threshold has been reached.</param>
public void ProcessResourceDataReportHealth<T>(
FabricResourceUsageData<T> data,
T thresholdError,
T thresholdWarning,
TimeSpan healthReportTtl,
HealthReportType healthReportType = HealthReportType.Node,
ReplicaOrInstanceMonitoringInfo replicaOrInstance = null,
bool dumpOnError = false) where T : struct
FabricResourceUsageData<T> data,
T thresholdError,
T thresholdWarning,
TimeSpan healthReportTtl,
HealthReportType healthReportType = HealthReportType.Node,
ReplicaOrInstanceMonitoringInfo replicaOrInstance = null,
bool dumpOnError = false) where T : struct
{
if (data == null)
{
Expand Down Expand Up @@ -664,7 +664,7 @@ public void ProcessResourceDataReportHealth<T>(
// Enable this for your observer if you want to send data to ApplicationInsights or LogAnalytics for each resource usage observation it makes per specified metric.
if (IsTelemetryEnabled)
{
_ = TelemetryClient?.ReportMetricAsync(telemetryData, Token).ConfigureAwait(false);
_ = TelemetryClient?.ReportMetricAsync(telemetryData, Token).ConfigureAwait(true);
}

// ETW - This is informational, per reading EventSource tracing, healthstate is irrelevant here.
Expand Down Expand Up @@ -719,7 +719,7 @@ public void ProcessResourceDataReportHealth<T>(

if (IsTelemetryEnabled)
{
_ = TelemetryClient?.ReportMetricAsync(telemetryData, Token).ConfigureAwait(false);
_ = TelemetryClient?.ReportMetricAsync(telemetryData, Token).ConfigureAwait(true);
}

if (IsEtwEnabled)
Expand Down Expand Up @@ -749,6 +749,11 @@ public void ProcessResourceDataReportHealth<T>(
// part of the base class for future use, like for FSO.
if (replicaOrInstance != null && dumpOnError && RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
{
if (serviceDumpCountDictionary == null)
{
serviceDumpCountDictionary = new Dictionary<string, int>(5);
}

try
{
int pid = (int)replicaOrInstance.HostProcessId;
Expand All @@ -775,9 +780,6 @@ public void ProcessResourceDataReportHealth<T>(
}
}
}

// Ignore these, it just means no dmp will be created.This is not
// critical to FO. Log as info, not warning.
catch (Exception e) when (e is ArgumentException || e is InvalidOperationException || e is Win32Exception)
{
ObserverLogger.LogInfo($"Unable to generate dmp file:{Environment.NewLine}{e}");
Expand Down Expand Up @@ -906,7 +908,7 @@ public void ProcessResourceDataReportHealth<T>(
// Send Health Report as Telemetry event (perhaps it signals an Alert from App Insights, for example.).
if (IsTelemetryEnabled)
{
_ = TelemetryClient?.ReportHealthAsync(telemetryData, Token).ConfigureAwait(false);
_ = TelemetryClient?.ReportHealthAsync(telemetryData, Token).ConfigureAwait(true);
}

// ETW.
Expand Down Expand Up @@ -965,6 +967,7 @@ public void ProcessResourceDataReportHealth<T>(

// Clean up sb.
_ = healthMessage.Clear();
healthMessage = null;
}
else
{
Expand Down Expand Up @@ -1046,8 +1049,8 @@ public void ProcessResourceDataReportHealth<T>(
if (data.Data is List<T> list)
{
// List<T> impl.
list.Clear();
list.TrimExcess();
list.Clear();
}
else
{
Expand Down
Loading

0 comments on commit 8bfd6d3

Please sign in to comment.