Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LiveMetrics] report process metrics CPU Total and Committed Memory #42213

Merged
merged 13 commits into from
Mar 5, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

### Features Added

- Report metrics "CPU Total" and "Committed Memory".
TimothyMothra marked this conversation as resolved.
Show resolved Hide resolved
([#42213](https://github.com/Azure/azure-sdk-for-net/pull/42213))

### Breaking Changes

### Bugs Fixed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,5 +195,14 @@ public void DroppedDocument(DocumentIngressDocumentType documentType)

[Event(12, Message = "Document was dropped. DocumentType: {0}. Not user actionable.", Level = EventLevel.Warning)]
public void DroppedDocument(string documentType) => WriteEvent(12, documentType);

[Event(13, Message = "Failure to calculate CPU Counter. Unexpected negative timespan: PreviousCollectedTime: {0}. RecentCollectedTime: {0}. Not user actionable.", Level = EventLevel.Error)]
public void ProcessCountersUnexpectedNegativeTimeSpan(long previousCollectedTime, long recentCollectedTime) => WriteEvent(13, previousCollectedTime, recentCollectedTime);

[Event(14, Message = "Failure to calculate CPU Counter. Unexpected negative value: PreviousCollectedValue: {0}. RecentCollectedValue: {0}. Not user actionable.", Level = EventLevel.Error)]
public void ProcessCountersUnexpectedNegativeValue(long previousCollectedValue, long recentCollectedValue) => WriteEvent(14, previousCollectedValue, recentCollectedValue);

[Event(15, Message = "Calculated Cpu Counter: Period: {0}. DiffValue: {1}. CalculatedValue: {2}. ProcessorCount: {3}. NormalizedValue: {4}", Level = EventLevel.Verbose)]
public void ProcessCountersCpuCounter(long period, long diffValue, double calculatedValue, int processorCount, double normalizedValue) => WriteEvent(15, period, diffValue, calculatedValue, processorCount, normalizedValue);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ internal static class MetricId
// EXCEPTIONS
internal const string ExceptionsPerSecondMetricIdValue = @"\ApplicationInsights\Exceptions/Sec";

// PERFORMANCE COUNTERS
// PROCESS METRICS
internal const string MemoryCommittedBytesMetricIdValue = @"\Memory\Committed Bytes";
internal const string ProcessorTimeMetricIdValue = @"\Processor(_Total)\% Processor Time";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ internal partial class Manager
internal readonly DoubleBuffer _documentBuffer = new();
internal static bool? s_isAzureWebApp = null;

//private readonly PerformanceCounter _performanceCounter_ProcessorTime = new(categoryName: "Processor", counterName: "% Processor Time", instanceName: "_Total");
//private readonly PerformanceCounter _performanceCounter_CommittedBytes = new(categoryName: "Memory", counterName: "Committed Bytes");
private readonly int _processorCount = Environment.ProcessorCount;
private DateTimeOffset _cachedCollectedTime = DateTimeOffset.MinValue;
private long _cachedCollectedValue = 0;

public MonitoringDataPoint GetDataPoint()
{
Expand Down Expand Up @@ -91,32 +92,47 @@ public MonitoringDataPoint GetDataPoint()
dataPoint.Metrics.Add(metricPoint);
}

// TODO: Reenable Perf Counters
//foreach (var metricPoint in CollectPerfCounters())
//{
// dataPoint.Metrics.Add(metricPoint);
//}
foreach (var metricPoint in CollectProcessMetrics())
{
dataPoint.Metrics.Add(metricPoint);
}

return dataPoint;
}

//public IEnumerable<Models.MetricPoint> CollectPerfCounters()
//{
// // PERFORMANCE COUNTERS
// yield return new Models.MetricPoint
// {
// Name = LiveMetricConstants.MetricId.MemoryCommittedBytesMetricIdValue,
// Value = _performanceCounter_CommittedBytes.NextValue(),
// Weight = 1
// };

// yield return new Models.MetricPoint
// {
// Name = LiveMetricConstants.MetricId.ProcessorTimeMetricIdValue,
// Value = _performanceCounter_ProcessorTime.NextValue(),
// Weight = 1
// };
//}
/// <summary>
/// Collect metrics for the current process.
TimothyMothra marked this conversation as resolved.
Show resolved Hide resolved
/// </summary>
/// <remarks>
/// For Memory:
/// <see href="https://learn.microsoft.com/dotnet/api/system.diagnostics.process.privatememorysize64"/>.
/// "The amount of memory, in bytes, allocated for the associated process that cannot be shared with other processes.".
///
/// For CPU:
/// <see href="https://learn.microsoft.com/dotnet/api/system.diagnostics.process.totalprocessortime"/>.
/// "A TimeSpan that indicates the amount of time that the associated process has spent utilizing the CPU. This value is the sum of the UserProcessorTime and the PrivilegedProcessorTime.".
/// </remarks>
public IEnumerable<Models.MetricPoint> CollectProcessMetrics()
{
var process = Process.GetCurrentProcess();
TimothyMothra marked this conversation as resolved.
Show resolved Hide resolved

yield return new Models.MetricPoint
{
Name = LiveMetricConstants.MetricId.MemoryCommittedBytesMetricIdValue,
Value = process.PrivateMemorySize64,
Weight = 1
};

if (TryCalculateCPUCounter(process, out var processorValue))
{
yield return new Models.MetricPoint
{
Name = LiveMetricConstants.MetricId.ProcessorTimeMetricIdValue,
Value = Convert.ToSingle(processorValue),
Weight = 1
};
}
}

/// <summary>
/// Searches for the environment variable specific to Azure Web App.
Expand Down Expand Up @@ -149,5 +165,67 @@ public MonitoringDataPoint GetDataPoint()

return s_isAzureWebApp;
}

private void ResetCachedValues()
{
_cachedCollectedTime = DateTimeOffset.MinValue;
_cachedCollectedValue = 0;
}

/// <summary>
/// Calcualte the CPU usage as the diff between two ticks divided by the period of time, and then divided by the number of processors.
/// <code>((change in ticks / period) / number of processors)</code>
/// </summary>
private bool TryCalculateCPUCounter(Process process, out double normalizedValue)
rajkumar-rangaraj marked this conversation as resolved.
Show resolved Hide resolved
{
var previousCollectedValue = _cachedCollectedValue;
var previousCollectedTime = _cachedCollectedTime;

var recentCollectedValue = _cachedCollectedValue = process.TotalProcessorTime.Ticks;
var recentCollectedTime = _cachedCollectedTime = DateTimeOffset.UtcNow;

double calculatedValue;

if (previousCollectedTime == DateTimeOffset.MinValue)
{
Debug.WriteLine($"{nameof(TryCalculateCPUCounter)} DateTimeOffset.MinValue");
normalizedValue = default;
return false;
}

var period = recentCollectedTime.Ticks - previousCollectedTime.Ticks;
if (period < 0)
{
// Not likely to happen but being safe here incase of clock issues in multi-core.
LiveMetricsExporterEventSource.Log.ProcessCountersUnexpectedNegativeTimeSpan(
previousCollectedTime: previousCollectedTime.Ticks,
recentCollectedTime: recentCollectedTime.Ticks);
Debug.WriteLine($"{nameof(TryCalculateCPUCounter)} period less than zero");
normalizedValue = default;
return false;
}

var diff = recentCollectedValue - previousCollectedValue;
if (diff < 0)
{
LiveMetricsExporterEventSource.Log.ProcessCountersUnexpectedNegativeValue(
previousCollectedValue: previousCollectedValue,
recentCollectedValue: recentCollectedValue);
Debug.WriteLine($"{nameof(TryCalculateCPUCounter)} diff less than zero");
normalizedValue = default;
return false;
}

period = period != 0 ? period : 1;
calculatedValue = diff * 100.0 / period;
normalizedValue = calculatedValue / _processorCount;
LiveMetricsExporterEventSource.Log.ProcessCountersCpuCounter(
period: previousCollectedValue,
diffValue: recentCollectedValue,
calculatedValue: calculatedValue,
processorCount: _processorCount,
normalizedValue: normalizedValue);
return true;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ private void SetPingState()
// This is used in determining if we should Backoff.
// If we've been in another state for X amount of time, that may exceed our maximum interval and immediately trigger a Backoff.
_lastSuccessfulPing = DateTimeOffset.UtcNow;

// Must reset the metrics cache here.
ResetCachedValues();
}

private void SetPostState()
Expand Down