Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- https://learn.microsoft.com/dotnet/fundamentals/package-validation/diagnostic-ids -->
<Suppressions xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<Suppression>
<DiagnosticId>CP0002</DiagnosticId>
<Target>M:Microsoft.Extensions.Diagnostics.ResourceMonitoring.ResourceMonitoringOptions.get_UseDeltaNrPeriodsForCpuCalculation</Target>
<Left>lib/net462/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Left>
<Right>lib/net462/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Right>
<IsBaselineSuppression>true</IsBaselineSuppression>
</Suppression>
<Suppression>
<DiagnosticId>CP0002</DiagnosticId>
<Target>M:Microsoft.Extensions.Diagnostics.ResourceMonitoring.ResourceMonitoringOptions.set_UseDeltaNrPeriodsForCpuCalculation(System.Boolean)</Target>
<Left>lib/net462/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Left>
<Right>lib/net462/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Right>
<IsBaselineSuppression>true</IsBaselineSuppression>
</Suppression>
<Suppression>
<DiagnosticId>CP0002</DiagnosticId>
<Target>M:Microsoft.Extensions.Diagnostics.ResourceMonitoring.ResourceMonitoringOptions.get_UseDeltaNrPeriodsForCpuCalculation</Target>
<Left>lib/net8.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Left>
<Right>lib/net8.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Right>
<IsBaselineSuppression>true</IsBaselineSuppression>
</Suppression>
<Suppression>
<DiagnosticId>CP0002</DiagnosticId>
<Target>M:Microsoft.Extensions.Diagnostics.ResourceMonitoring.ResourceMonitoringOptions.set_UseDeltaNrPeriodsForCpuCalculation(System.Boolean)</Target>
<Left>lib/net8.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Left>
<Right>lib/net8.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Right>
<IsBaselineSuppression>true</IsBaselineSuppression>
</Suppression>
<Suppression>
<DiagnosticId>CP0002</DiagnosticId>
<Target>M:Microsoft.Extensions.Diagnostics.ResourceMonitoring.ResourceMonitoringOptions.get_UseDeltaNrPeriodsForCpuCalculation</Target>
<Left>lib/net9.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Left>
<Right>lib/net9.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Right>
<IsBaselineSuppression>true</IsBaselineSuppression>
</Suppression>
<Suppression>
<DiagnosticId>CP0002</DiagnosticId>
<Target>M:Microsoft.Extensions.Diagnostics.ResourceMonitoring.ResourceMonitoringOptions.set_UseDeltaNrPeriodsForCpuCalculation(System.Boolean)</Target>
<Left>lib/net9.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Left>
<Right>lib/net9.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Right>
<IsBaselineSuppression>true</IsBaselineSuppression>
</Suppression>

<Suppression>
<DiagnosticId>CP0002</DiagnosticId>
<Target>M:Microsoft.Extensions.Diagnostics.ResourceMonitoring.ResourceMonitoringOptions.get_CalculateCpuUsageWithoutHostDelta</Target>
<Left>lib/net462/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Left>
<Right>lib/net462/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Right>
<IsBaselineSuppression>true</IsBaselineSuppression>
</Suppression>
<Suppression>
<DiagnosticId>CP0002</DiagnosticId>
<Target>M:Microsoft.Extensions.Diagnostics.ResourceMonitoring.ResourceMonitoringOptions.set_CalculateCpuUsageWithoutHostDelta(System.Boolean)</Target>
<Left>lib/net462/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Left>
<Right>lib/net462/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Right>
<IsBaselineSuppression>true</IsBaselineSuppression>
</Suppression>
<Suppression>
<DiagnosticId>CP0002</DiagnosticId>
<Target>M:Microsoft.Extensions.Diagnostics.ResourceMonitoring.ResourceMonitoringOptions.get_CalculateCpuUsageWithoutHostDelta</Target>
<Left>lib/net8.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Left>
<Right>lib/net8.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Right>
<IsBaselineSuppression>true</IsBaselineSuppression>
</Suppression>
<Suppression>
<DiagnosticId>CP0002</DiagnosticId>
<Target>M:Microsoft.Extensions.Diagnostics.ResourceMonitoring.ResourceMonitoringOptions.set_CalculateCpuUsageWithoutHostDelta(System.Boolean)</Target>
<Left>lib/net8.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Left>
<Right>lib/net8.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Right>
<IsBaselineSuppression>true</IsBaselineSuppression>
</Suppression>
<Suppression>
<DiagnosticId>CP0002</DiagnosticId>
<Target>M:Microsoft.Extensions.Diagnostics.ResourceMonitoring.ResourceMonitoringOptions.get_CalculateCpuUsageWithoutHostDelta</Target>
<Left>lib/net9.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Left>
<Right>lib/net9.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Right>
<IsBaselineSuppression>true</IsBaselineSuppression>
</Suppression>
<Suppression>
<DiagnosticId>CP0002</DiagnosticId>
<Target>M:Microsoft.Extensions.Diagnostics.ResourceMonitoring.ResourceMonitoringOptions.set_CalculateCpuUsageWithoutHostDelta(System.Boolean)</Target>
<Left>lib/net9.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Left>
<Right>lib/net9.0/Microsoft.Extensions.Diagnostics.ResourceMonitoring.dll</Right>
<IsBaselineSuppression>true</IsBaselineSuppression>
</Suppression>
</Suppressions>
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,16 @@ internal sealed class LinuxUtilizationProvider : ISnapshotProvider
{
private const double One = 1.0;
private const long Hundred = 100L;
private const double CpuLimitThreshold110Percent = 1.1;

// Meters to track CPU utilization threshold exceedances
private readonly Counter<long>? _cpuUtilizationLimit100PercentExceededCounter;
private readonly Counter<long>? _cpuUtilizationLimit110PercentExceededCounter;

private readonly bool _useDeltaNrPeriods;
private readonly object _cpuLocker = new();
private readonly object _memoryLocker = new();
private readonly ILogger<LinuxUtilizationProvider> _logger;
private readonly ILinuxUtilizationParser _parser;
private readonly ulong _memoryLimit;
private readonly long _cpuPeriodsInterval;
private readonly TimeSpan _cpuRefreshInterval;
private readonly TimeSpan _memoryRefreshInterval;
private readonly TimeProvider _timeProvider;
private readonly double _scaleRelativeToCpuLimit;
private readonly double _scaleRelativeToCpuRequest;
private readonly double _scaleRelativeToCpuRequestForTrackerApi;

private readonly TimeSpan _retryInterval = TimeSpan.FromMinutes(5);
Expand All @@ -42,18 +35,11 @@ internal sealed class LinuxUtilizationProvider : ISnapshotProvider

private DateTimeOffset _refreshAfterCpu;
private DateTimeOffset _refreshAfterMemory;

// Track the actual timestamp when we read CPU values
private DateTimeOffset _lastCpuMeasurementTime;

private double _cpuPercentage = double.NaN;
private double _lastCpuCoresUsed = double.NaN;
private double _memoryPercentage;
private long _previousCgroupCpuTime;
private long _previousHostCpuTime;
private long _cpuUtilizationLimit100PercentExceeded;
private long _cpuUtilizationLimit110PercentExceeded;
private long _cpuPeriodsInterval;
private long _previousCgroupCpuPeriodCounter;
public SystemResources Resources { get; }

Expand All @@ -66,7 +52,6 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
DateTimeOffset now = _timeProvider.GetUtcNow();
_cpuRefreshInterval = options.Value.CpuConsumptionRefreshInterval;
_memoryRefreshInterval = options.Value.MemoryConsumptionRefreshInterval;
_useDeltaNrPeriods = options.Value.UseDeltaNrPeriodsForCpuCalculation;
_refreshAfterCpu = now;
_refreshAfterMemory = now;
_memoryLimit = _parser.GetAvailableMemoryInBytes();
Expand All @@ -76,8 +61,8 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
float hostCpus = _parser.GetHostCpuCount();
float cpuLimit = _parser.GetCgroupLimitedCpus();
float cpuRequest = _parser.GetCgroupRequestCpu();
_scaleRelativeToCpuLimit = hostCpus / cpuLimit;
_scaleRelativeToCpuRequest = hostCpus / cpuRequest;
float scaleRelativeToCpuLimit = hostCpus / cpuLimit;
float scaleRelativeToCpuRequest = hostCpus / cpuRequest;
_scaleRelativeToCpuRequestForTrackerApi = hostCpus; // the division by cpuRequest is performed later on in the ResourceUtilization class

#pragma warning disable CA2000 // Dispose objects before losing scope
Expand All @@ -87,46 +72,40 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
var meter = meterFactory.Create(ResourceUtilizationInstruments.MeterName);
#pragma warning restore CA2000 // Dispose objects before losing scope

if (options.Value.CalculateCpuUsageWithoutHostDelta)
if (options.Value.UseLinuxCalculationV2)
{
cpuLimit = _parser.GetCgroupLimitV2();

// Try to get the CPU request from cgroup
cpuRequest = _parser.GetCgroupRequestCpuV2();

// Get Cpu periods interval from cgroup
_cpuPeriodsInterval = _parser.GetCgroupPeriodsIntervalInMicroSecondsV2();
(_previousCgroupCpuTime, _previousCgroupCpuPeriodCounter) = _parser.GetCgroupCpuUsageInNanosecondsAndCpuPeriodsV2();

// Initialize the counters
_cpuUtilizationLimit100PercentExceededCounter = meter.CreateCounter<long>("cpu_utilization_limit_100_percent_exceeded");
_cpuUtilizationLimit110PercentExceededCounter = meter.CreateCounter<long>("cpu_utilization_limit_110_percent_exceeded");

_ = meter.CreateObservableGauge(
ResourceUtilizationInstruments.ContainerCpuLimitUtilization,
() => GetMeasurementWithRetry(() => CpuUtilizationLimit(cpuLimit)),
"1");

_ = meter.CreateObservableGauge(
name: ResourceUtilizationInstruments.ContainerCpuRequestUtilization,
observeValues: () => GetMeasurementWithRetry(() => CpuUtilizationWithoutHostDelta() / cpuRequest),
observeValues: () => GetMeasurementWithRetry(() => CpuUtilizationRequest(cpuRequest)),
unit: "1");
}
else
{
_ = meter.CreateObservableGauge(
name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization,
observeValues: () => GetMeasurementWithRetry(() => CpuUtilization() * _scaleRelativeToCpuLimit),
observeValues: () => GetMeasurementWithRetry(() => CpuUtilization() * scaleRelativeToCpuLimit),
unit: "1");

_ = meter.CreateObservableGauge(
name: ResourceUtilizationInstruments.ContainerCpuRequestUtilization,
observeValues: () => GetMeasurementWithRetry(() => CpuUtilization() * _scaleRelativeToCpuRequest),
observeValues: () => GetMeasurementWithRetry(() => CpuUtilization() * scaleRelativeToCpuRequest),
unit: "1");

_ = meter.CreateObservableGauge(
name: ResourceUtilizationInstruments.ProcessCpuUtilization,
observeValues: () => GetMeasurementWithRetry(() => CpuUtilization() * _scaleRelativeToCpuRequest),
observeValues: () => GetMeasurementWithRetry(() => CpuUtilization() * scaleRelativeToCpuRequest),
unit: "1");
}

Expand All @@ -148,10 +127,9 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
_logger.SystemResourcesInfo(cpuLimit, cpuRequest, _memoryLimit, _memoryLimit);
}

public double CpuUtilizationWithoutHostDelta()
public double CpuUtilizationV2()
{
DateTimeOffset now = _timeProvider.GetUtcNow();
double actualElapsedNanoseconds = (now - _lastCpuMeasurementTime).TotalNanoseconds;
lock (_cpuLocker)
{
if (now < _refreshAfterCpu)
Expand All @@ -160,79 +138,34 @@ public double CpuUtilizationWithoutHostDelta()
}
}

var (cpuUsageTime, cpuPeriodCounter) = _parser.GetCgroupCpuUsageInNanosecondsAndCpuPeriodsV2();
(long cpuUsageTime, long cpuPeriodCounter) = _parser.GetCgroupCpuUsageInNanosecondsAndCpuPeriodsV2();
lock (_cpuLocker)
{
if (now >= _refreshAfterCpu)
if (now < _refreshAfterCpu)
{
long deltaCgroup = cpuUsageTime - _previousCgroupCpuTime;
double coresUsed;

if (_useDeltaNrPeriods)
{
long deltaPeriodCount = cpuPeriodCounter - _previousCgroupCpuPeriodCounter;
long deltaCpuPeriodInNanoseconds = deltaPeriodCount * _cpuPeriodsInterval * 1000;

if (deltaCgroup > 0 && deltaPeriodCount > 0)
{
coresUsed = deltaCgroup / (double)deltaCpuPeriodInNanoseconds;

_logger.CpuUsageDataV2(cpuUsageTime, _previousCgroupCpuTime, deltaCpuPeriodInNanoseconds, coresUsed);

_lastCpuCoresUsed = coresUsed;
_refreshAfterCpu = now.Add(_cpuRefreshInterval);
_previousCgroupCpuTime = cpuUsageTime;
_previousCgroupCpuPeriodCounter = cpuPeriodCounter;
}
}
else
{
if (deltaCgroup > 0)
{
coresUsed = deltaCgroup / actualElapsedNanoseconds;

_logger.CpuUsageDataV2(cpuUsageTime, _previousCgroupCpuTime, actualElapsedNanoseconds, coresUsed);

_lastCpuCoresUsed = coresUsed;
_refreshAfterCpu = now.Add(_cpuRefreshInterval);
_previousCgroupCpuTime = cpuUsageTime;

// Update the timestamp for next calculation
_lastCpuMeasurementTime = now;
}
}
return _lastCpuCoresUsed;
}
}

return _lastCpuCoresUsed;
}
long deltaCgroup = cpuUsageTime - _previousCgroupCpuTime;
long deltaPeriodCount = cpuPeriodCounter - _previousCgroupCpuPeriodCounter;

/// <summary>
/// Calculates CPU utilization relative to the CPU limit.
/// </summary>
/// <param name="cpuLimit">The CPU limit to use for the calculation.</param>
/// <returns>CPU usage as a ratio of the limit.</returns>
public double CpuUtilizationLimit(float cpuLimit)
{
double utilization = CpuUtilizationWithoutHostDelta() / cpuLimit;
if (deltaCgroup <= 0 || deltaPeriodCount <= 0)
{
return _lastCpuCoresUsed;
}

// Increment counter if utilization exceeds 1 (100%)
if (utilization > 1.0)
{
_cpuUtilizationLimit100PercentExceededCounter?.Add(1);
_cpuUtilizationLimit100PercentExceeded++;
_logger.CounterMessage100(_cpuUtilizationLimit100PercentExceeded);
}
long deltaCpuPeriodInNanoseconds = deltaPeriodCount * _cpuPeriodsInterval * 1000;
double coresUsed = deltaCgroup / (double)deltaCpuPeriodInNanoseconds;

// Increment counter if utilization exceeds 110%
if (utilization > CpuLimitThreshold110Percent)
{
_cpuUtilizationLimit110PercentExceededCounter?.Add(1);
_cpuUtilizationLimit110PercentExceeded++;
_logger.CounterMessage110(_cpuUtilizationLimit110PercentExceeded);
_logger.CpuUsageDataV2(cpuUsageTime, _previousCgroupCpuTime, deltaCpuPeriodInNanoseconds, coresUsed);

_lastCpuCoresUsed = coresUsed;
_refreshAfterCpu = now.Add(_cpuRefreshInterval);
_previousCgroupCpuTime = cpuUsageTime;
_previousCgroupCpuPeriodCounter = cpuPeriodCounter;
}

return utilization;
return _lastCpuCoresUsed;
}

public double CpuUtilization()
Expand All @@ -252,23 +185,27 @@ public double CpuUtilization()

lock (_cpuLocker)
{
if (now >= _refreshAfterCpu)
if (now < _refreshAfterCpu)
{
long deltaHost = hostCpuTime - _previousHostCpuTime;
long deltaCgroup = cgroupCpuTime - _previousCgroupCpuTime;

if (deltaHost > 0 && deltaCgroup > 0)
{
double percentage = Math.Min(One, (double)deltaCgroup / deltaHost);
return _cpuPercentage;
}

_logger.CpuUsageData(cgroupCpuTime, hostCpuTime, _previousCgroupCpuTime, _previousHostCpuTime, percentage);
long deltaHost = hostCpuTime - _previousHostCpuTime;
long deltaCgroup = cgroupCpuTime - _previousCgroupCpuTime;

_cpuPercentage = percentage;
_refreshAfterCpu = now.Add(_cpuRefreshInterval);
_previousCgroupCpuTime = cgroupCpuTime;
_previousHostCpuTime = hostCpuTime;
}
if (deltaHost <= 0 || deltaCgroup <= 0)
{
return _cpuPercentage;
}

double percentage = Math.Min(One, (double)deltaCgroup / deltaHost);

_logger.CpuUsageData(cgroupCpuTime, hostCpuTime, _previousCgroupCpuTime, _previousHostCpuTime, percentage);

_cpuPercentage = percentage;
_refreshAfterCpu = now.Add(_cpuRefreshInterval);
_previousCgroupCpuTime = cgroupCpuTime;
_previousHostCpuTime = hostCpuTime;
}

return _cpuPercentage;
Expand Down Expand Up @@ -351,4 +288,9 @@ ex is System.IO.DirectoryNotFoundException ||
return Enumerable.Empty<Measurement<double>>();
}
}

// Math.Min() is used below to mitigate margin errors and various kinds of precisions losses
// due to the fact that the calculation itself is not an atomic operation:
private double CpuUtilizationRequest(double cpuRequest) => Math.Min(One, CpuUtilizationV2() / cpuRequest);
private double CpuUtilizationLimit(double cpuLimit) => Math.Min(One, CpuUtilizationV2() / cpuLimit);
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,7 @@ public static partial void CpuUsageDataV2(
double actualElapsedNanoseconds,
double cpuCores);

[LoggerMessage(5, LogLevel.Debug,
"CPU utilization exceeded 100%: Counter = {counterValue}")]
public static partial void CounterMessage100(
this ILogger logger,
long counterValue);

[LoggerMessage(6, LogLevel.Debug,
"CPU utilization exceeded 110%: Counter = {counterValue}")]
public static partial void CounterMessage110(
this ILogger logger,
long counterValue);

[LoggerMessage(7, LogLevel.Warning,
[LoggerMessage(5, LogLevel.Warning,
"Error while getting disk stats: Error={errorMessage}")]
public static partial void HandleDiskStatsException(
this ILogger logger,
Expand Down
Loading
Loading