diff --git a/plugins/inputs/intel_powerstat/README.md b/plugins/inputs/intel_powerstat/README.md index db84ef73f6fae..1688fc9677231 100644 --- a/plugins/inputs/intel_powerstat/README.md +++ b/plugins/inputs/intel_powerstat/README.md @@ -1,10 +1,13 @@ # Intel PowerStat Input Plugin -This input plugin monitors power statistics on Intel-based platforms and assumes presence of Linux based OS. +This input plugin monitors power statistics on Intel-based platforms and assumes +presence of Linux based OS. -Main use cases are power saving and workload migration. Telemetry frameworks allow users to monitor critical platform level metrics. -Key source of platform telemetry is power domain that is beneficial for MANO/Monitoring&Analytics systems -to take preventive/corrective actions based on platform busyness, CPU temperature, actual CPU utilization and power statistics. +Main use cases are power saving and workload migration. Telemetry frameworks +allow users to monitor critical platform level metrics. Key source of platform +telemetry is power domain that is beneficial for MANO Monitoring&Analytics +systems to take preventive/corrective actions based on platform busyness, CPU +temperature, actual CPU utilization and power statistics. ## Configuration @@ -16,7 +19,7 @@ to take preventive/corrective actions based on platform busyness, CPU temperatur ## - Setting this value to an empty array means no package metrics will be collected ## - Finally, a user can specify individual metrics to capture from the supported options list ## Supported options: - ## "current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency" + ## "current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency", "uncore_frequency" # package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power"] ## The user can choose which per-CPU metrics are monitored by the plugin in cpu_metrics array. @@ -29,7 +32,8 @@ to take preventive/corrective actions based on platform busyness, CPU temperatur ## Example: Configuration with no per-CPU telemetry -This configuration allows getting default processor package specific metrics, no per-CPU metrics are collected: +This configuration allows getting default processor package specific metrics, no +per-CPU metrics are collected: ```toml [[inputs.intel_powerstat]] @@ -38,7 +42,8 @@ This configuration allows getting default processor package specific metrics, no ## Example: Configuration with no per-CPU telemetry - equivalent case -This configuration allows getting default processor package specific metrics, no per-CPU metrics are collected: +This configuration allows getting default processor package specific metrics, no +per-CPU metrics are collected: ```toml [[inputs.intel_powerstat]] @@ -46,7 +51,8 @@ This configuration allows getting default processor package specific metrics, no ## Example: Configuration for CPU Temperature and CPU Frequency -This configuration allows getting default processor package specific metrics, plus subset of per-CPU metrics (CPU Temperature and CPU Frequency): +This configuration allows getting default processor package specific metrics, +plus subset of per-CPU metrics (CPU Temperature and CPU Frequency): ```toml [[inputs.intel_powerstat]] @@ -55,7 +61,8 @@ This configuration allows getting default processor package specific metrics, pl ## Example: Configuration for CPU Temperature and CPU Frequency without default package metrics -This configuration allows getting only a subset of per-CPU metrics (CPU Temperature and CPU Frequency): +This configuration allows getting only a subset of per-CPU metrics (CPU +Temperature and CPU Frequency): ```toml [[inputs.intel_powerstat]] @@ -65,27 +72,33 @@ This configuration allows getting only a subset of per-CPU metrics (CPU Temperat ## Example: Configuration with all available metrics -This configuration allows getting all processor package specific metrics and all per-CPU metrics: +This configuration allows getting all processor package specific metrics and all +per-CPU metrics: ```toml [[inputs.intel_powerstat]] - package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency"] + package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency", "uncore_frequency"] cpu_metrics = ["cpu_frequency", "cpu_busy_frequency", "cpu_temperature", "cpu_c0_state_residency", "cpu_c1_state_residency", "cpu_c6_state_residency"] ``` ## SW Dependencies -Plugin is based on Linux Kernel modules that expose specific metrics over `sysfs` or `devfs` interfaces. -The following dependencies are expected by plugin: +Plugin is based on Linux Kernel modules that expose specific metrics over +`sysfs` or `devfs` interfaces. The following dependencies are expected by +plugin: - _intel-rapl_ module which exposes Intel Runtime Power Limiting metrics over `sysfs` (`/sys/devices/virtual/powercap/intel-rapl`), - _msr_ kernel module that provides access to processor model specific registers over `devfs` (`/dev/cpu/cpu%d/msr`), - _cpufreq_ kernel module - which exposes per-CPU Frequency over `sysfs` (`/sys/devices/system/cpu/cpu%d/cpufreq/scaling_cur_freq`). +- _intel-uncore-frequency_ module exposes Intel uncore frequency metrics over `sysfs` (`/sys/devices/system/cpu/intel_uncore_frequency`), -Minimum kernel version required is 3.13 to satisfy all requirements. +Minimum kernel version required is 3.13 to satisfy most of requirements, +for `uncore_frequency` metrics `intel-uncore-frequency` module is required +(available since kernel 5.6). -Please make sure that kernel modules are loaded and running (cpufreq is integrated in kernel). Modules might have to be manually enabled by using `modprobe`. -Depending on the kernel version, run commands: +Please make sure that kernel modules are loaded and running (cpufreq is +integrated in kernel). Modules might have to be manually enabled by using +`modprobe`. Depending on the kernel version, run commands: ```sh # kernel 5.x.x: @@ -94,13 +107,17 @@ subo modprobe msr sudo modprobe intel_rapl_common sudo modprobe intel_rapl_msr +# also for kernel >= 5.6.0 +sudo modprobe intel-uncore-frequency + # kernel 4.x.x: sudo modprobe msr sudo modprobe intel_rapl ``` -**Telegraf with Intel PowerStat plugin enabled may require root access to read model specific registers (MSRs)** -to retrieve data for calculation of most critical per-CPU specific metrics: +**Telegraf with Intel PowerStat plugin enabled may require root access to read +model specific registers (MSRs)** to retrieve data for calculation of most +critical per-CPU specific metrics: - `cpu_busy_frequency_mhz` - `cpu_temperature_celsius` @@ -111,17 +128,20 @@ to retrieve data for calculation of most critical per-CPU specific metrics: and to retrieve data for calculation per-package specific metric: - `max_turbo_frequency_mhz` +- `uncore_frequency_mhz_cur` -To expose other Intel PowerStat metrics root access may or may not be required (depending on OS type or configuration). +To expose other Intel PowerStat metrics root access may or may not be required +(depending on OS type or configuration). ## HW Dependencies -Specific metrics require certain processor features to be present, otherwise Intel PowerStat plugin won't be able to -read them. When using Linux Kernel based OS, user can detect supported processor features reading `/proc/cpuinfo` file. +Specific metrics require certain processor features to be present, otherwise +Intel PowerStat plugin won't be able to read them. When using Linux Kernel based +OS, user can detect supported processor features reading `/proc/cpuinfo` file. Plugin assumes crucial properties are the same for all CPU cores in the system. The following processor properties are examined in more detail in this section: -processor _cpu family_, _model_ and _flags_. -The following processor properties are required by the plugin: +processor _cpu family_, _model_ and _flags_. The following processor properties +are required by the plugin: - Processor _cpu family_ must be Intel (0x6) - since data used by the plugin assumes Intel specific model specific registers for all features @@ -186,9 +206,11 @@ and _powerstat\_core.cpu\_c6\_state\_residency_ metrics: ## Metrics -All metrics collected by Intel PowerStat plugin are collected in fixed intervals. -Metrics that reports processor C-state residency or power are calculated over elapsed intervals. -When starting to measure metrics, plugin skips first iteration of metrics if they are based on deltas with previous value. +All metrics collected by Intel PowerStat plugin are collected in fixed +intervals. Metrics that reports processor C-state residency or power are +calculated over elapsed intervals. When starting to measure metrics, plugin +skips first iteration of metrics if they are based on deltas with previous +value. **The following measurements are supported by Intel PowerStat plugin:** @@ -225,6 +247,8 @@ When starting to measure metrics, plugin skips first iteration of metrics if the |-----|-------------| | `package_id` | ID of platform package/socket | | `active_cores`| Specific tag for `max_turbo_frequency_mhz` metric. The maximum number of activated cores for reachable turbo frequency + | `die`| Specific tag for all `uncore_frequency` metrics. Id of die + | `type`| Specific tag for all `uncore_frequency` metrics. Type of uncore frequency (current or initial) Measurement powerstat_package metrics are collected per processor package -_package_id_ tag indicates which package metric refers to. @@ -232,25 +256,32 @@ When starting to measure metrics, plugin skips first iteration of metrics if the | Metric name (field) | Description | Units | |-----|-------------|-----| - | `thermal_design_power_watts` | Maximum Thermal Design Power (TDP) available for processor package | Watts | + | `thermal_design_power_watts` | Maximum Thermal Design Power (TDP) available for processor package | Watts | | `current_power_consumption_watts` | Current power consumption of processor package | Watts | | `current_dram_power_consumption_watts` | Current power consumption of processor package DRAM subsystem | Watts | | `max_turbo_frequency_mhz`| Maximum reachable turbo frequency for number of cores active | MHz + | `uncore_frequency_limit_mhz_min`| Minimum uncore frequency limit for die in processor package | MHz + | `uncore_frequency_limit_mhz_max`| Maximum uncore frequency limit for die in processor package | MHz + | `uncore_frequency_mhz_cur`| Current uncore frequency for die in processor package. Available only with tag `current`. Since this value is not yet available from `intel-uncore-frequency` module it needs to be accessed via MSR. In case of lack of loaded msr, only `uncore_frequency_limit_mhz_min` and `uncore_frequency_limit_mhz_max` metrics will be collected | MHz ### Known issues -From linux kernel version v5.4.77 with [this kernel change](https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=v5.4.77&id=19f6d91bdad42200aac557a683c17b1f65ee6c94) -resources like `/sys/class/powercap/intel-rapl*/*/energy_uj` are readable only by root for security reasons, so this plugin needs root privileges to work properly. +From linux kernel version v5.4.77 with [this kernel change][19f6d91b] resources +like `/sys/class/powercap/intel-rapl*/*/energy_uj` are readable only by root for +security reasons, so this plugin needs root privileges to work properly. -If such strict security restrictions are not relevant, reading permissions to files in `/sys/devices/virtual/powercap/intel-rapl/` -directory can be manually changed for example with `chmod` command with custom parameters. -For example to give all users permission to all files in `intel-rapl` directory: +If such strict security restrictions are not relevant, reading permissions to +files in `/sys/devices/virtual/powercap/intel-rapl/` directory can be manually +changed for example with `chmod` command with custom parameters. For example to +give all users permission to all files in `intel-rapl` directory: ```bash sudo chmod -R a+rx /sys/devices/virtual/powercap/intel-rapl/ ``` -### Example Output +[19f6d91b]: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=v5.4.77&id=19f6d91bdad42200aac557a683c17b1f65ee6c94 + +## Example Output ```shell powerstat_package,host=ubuntu,package_id=0 thermal_design_power_watts=160 1606494744000000000 @@ -258,6 +289,8 @@ powerstat_package,host=ubuntu,package_id=0 current_power_consumption_watts=35 16 powerstat_package,host=ubuntu,package_id=0 current_dram_power_consumption_watts=13.94 1606494744000000000 powerstat_package,host=ubuntu,package_id=0,active_cores=0 max_turbo_frequency_mhz=3000i 1606494744000000000 powerstat_package,host=ubuntu,package_id=0,active_cores=1 max_turbo_frequency_mhz=2800i 1606494744000000000 +powerstat_package,die=0,host=ubuntu,package_id=0,type=initial uncore_frequency_limit_mhz_min=800,uncore_frequency_limit_mhz_max=2400 1606494744000000000 +powerstat_package,die=0,host=ubuntu,package_id=0,type=current uncore_frequency_mhz_cur=800i,uncore_frequency_limit_mhz_min=800,uncore_frequency_limit_mhz_max=2400 1606494744000000000 powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_frequency_mhz=1200.29 1606494744000000000 powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_temperature_celsius=34i 1606494744000000000 powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c6_state_residency_percent=92.52 1606494744000000000 diff --git a/plugins/inputs/intel_powerstat/file_mock_test.go b/plugins/inputs/intel_powerstat/file_mock_test.go index ccf67b601e5af..102142c2b3930 100644 --- a/plugins/inputs/intel_powerstat/file_mock_test.go +++ b/plugins/inputs/intel_powerstat/file_mock_test.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.10.0. DO NOT EDIT. +// Code generated by mockery v2.12.3. DO NOT EDIT. package intel_powerstat @@ -130,3 +130,18 @@ func (_m *mockFileService) readFileToFloat64(reader io.Reader) (float64, int64, return r0, r1, r2 } + +type newmockFileServiceT interface { + mock.TestingT + Cleanup(func()) +} + +// newmockFileService creates a new instance of mockFileService. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func newmockFileService(t newmockFileServiceT) *mockFileService { + mock := &mockFileService{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/plugins/inputs/intel_powerstat/intel_powerstat.go b/plugins/inputs/intel_powerstat/intel_powerstat.go index 9ecbb0682d703..3868356697e7e 100644 --- a/plugins/inputs/intel_powerstat/intel_powerstat.go +++ b/plugins/inputs/intel_powerstat/intel_powerstat.go @@ -6,6 +6,7 @@ package intel_powerstat import ( _ "embed" + "errors" "fmt" "math/big" "strconv" @@ -33,6 +34,7 @@ const ( packageCurrentDramPowerConsumption = "current_dram_power_consumption" packageThermalDesignPower = "thermal_design_power" packageTurboLimit = "max_turbo_frequency" + packageUncoreFrequency = "uncore_frequency" percentageMultiplier = 100 ) @@ -57,6 +59,7 @@ type PowerStat struct { packageCurrentPowerConsumption bool packageCurrentDramPowerConsumption bool packageThermalDesignPower bool + packageUncoreFrequency bool cpuInfo map[string]*cpuInfo skipFirstIteration bool logOnce map[string]error @@ -76,10 +79,10 @@ func (p *PowerStat) Init() error { } // Initialize MSR service only when there is at least one metric enabled if p.cpuFrequency || p.cpuBusyFrequency || p.cpuTemperature || p.cpuC0StateResidency || p.cpuC1StateResidency || - p.cpuC6StateResidency || p.cpuBusyCycles || p.packageTurboLimit { + p.cpuC6StateResidency || p.cpuBusyCycles || p.packageTurboLimit || p.packageUncoreFrequency { p.msr = newMsrServiceWithFs(p.Log, p.fs) } - if p.packageCurrentPowerConsumption || p.packageCurrentDramPowerConsumption || p.packageThermalDesignPower || p.packageTurboLimit { + if p.packageCurrentPowerConsumption || p.packageCurrentDramPowerConsumption || p.packageThermalDesignPower || p.packageTurboLimit || p.packageUncoreFrequency { p.rapl = newRaplServiceWithFs(p.Log, p.fs) } @@ -97,7 +100,17 @@ func (p *PowerStat) Gather(acc telegraf.Accumulator) error { } if p.areCoreMetricsEnabled() { - p.addPerCoreMetrics(acc) + if p.msr.isMsrLoaded() { + p.logOnce["msr"] = nil + p.addPerCoreMetrics(acc) + } else { + err := errors.New("error while trying to read MSR (probably msr module was not loaded)") + if val := p.logOnce["msr"]; val == nil || val.Error() != err.Error() { + p.Log.Errorf("%v", err) + // Remember that specific error occurs to omit logging next time + p.logOnce["msr"] = err + } + } } // Gathering the first iteration of metrics was skipped for most of them because they are based on delta calculations @@ -109,25 +122,31 @@ func (p *PowerStat) Gather(acc telegraf.Accumulator) error { func (p *PowerStat) addGlobalMetrics(acc telegraf.Accumulator) { // Prepare RAPL data each gather because there is a possibility to disable rapl kernel module p.rapl.initializeRaplData() - for socketID := range p.rapl.getRaplData() { if p.packageTurboLimit { p.addTurboRatioLimit(socketID, acc) } + if p.packageUncoreFrequency { + die := maxDiePerSocket(socketID) + for actualDie := 0; actualDie < die; actualDie++ { + p.addUncoreFreq(socketID, strconv.Itoa(actualDie), acc) + } + } + err := p.rapl.retrieveAndCalculateData(socketID) if err != nil { // In case of an error skip calculating metrics for this socket - if val := p.logOnce[socketID]; val == nil || val.Error() != err.Error() { + if val := p.logOnce[socketID+"rapl"]; val == nil || val.Error() != err.Error() { p.Log.Errorf("error fetching rapl data for socket %s, err: %v", socketID, err) // Remember that specific error occurs for socketID to omit logging next time - p.logOnce[socketID] = err + p.logOnce[socketID+"rapl"] = err } continue } // If error stops occurring, clear logOnce indicator - p.logOnce[socketID] = nil + p.logOnce[socketID+"rapl"] = nil if p.packageThermalDesignPower { p.addThermalDesignPowerMetric(socketID, acc) } @@ -143,6 +162,84 @@ func (p *PowerStat) addGlobalMetrics(acc telegraf.Accumulator) { } } } +func maxDiePerSocket(_ string) int { + /* + TODO: + At the moment, linux does not distinguish between more dies per socket. + This piece of code will need to be upgraded in the future. + https://github.com/torvalds/linux/blob/v5.17/arch/x86/include/asm/topology.h#L153 + */ + return 1 +} + +func (p *PowerStat) addUncoreFreq(socketID string, die string, acc telegraf.Accumulator) { + err := checkFile("/sys/devices/system/cpu/intel_uncore_frequency") + if err != nil { + err := fmt.Errorf("error while checking existing intel_uncore_frequency (probably intel-uncore-frequency module was not loaded)") + if val := p.logOnce["intel_uncore_frequency"]; val == nil || val.Error() != err.Error() { + p.Log.Errorf("%v", err) + // Remember that specific error occurs to omit logging next time + p.logOnce["intel_uncore_frequency"] = err + } + return + } + p.logOnce["intel_uncore_frequency"] = nil + p.readUncoreFreq("initial", socketID, die, acc) + p.readUncoreFreq("current", socketID, die, acc) +} + +func (p *PowerStat) readUncoreFreq(typeFreq string, socketID string, die string, acc telegraf.Accumulator) { + fields := map[string]interface{}{} + cpuID := "" + if typeFreq == "current" { + if p.areCoreMetricsEnabled() && p.msr.isMsrLoaded() { + p.logOnce[socketID+"msr"] = nil + for _, v := range p.cpuInfo { + if v.physicalID == socketID { + cpuID = v.cpuID + } + } + if cpuID == "" { + p.Log.Debugf("error while reading socket ID") + return + } + actualUncoreFreq, err := p.msr.readSingleMsr(cpuID, "MSR_UNCORE_PERF_STATUS") + if err != nil { + p.Log.Debugf("error while reading MSR_UNCORE_PERF_STATUS: %v", err) + return + } + actualUncoreFreq = (actualUncoreFreq & 0x3F) * 100 + fields["uncore_frequency_mhz_cur"] = actualUncoreFreq + } else { + err := errors.New("error while trying to read MSR (probably msr module was not loaded), uncore_frequency_mhz_cur metric will not be collected") + if val := p.logOnce[socketID+"msr"]; val == nil || val.Error() != err.Error() { + p.Log.Errorf("%v", err) + // Remember that specific error occurs for socketID to omit logging next time + p.logOnce[socketID+"msr"] = err + } + } + } + initMinFreq, err := p.msr.retrieveUncoreFrequency(socketID, typeFreq, "min", die) + if err != nil { + p.Log.Errorf("error while retrieving minimum uncore frequency of the socket %s, err: %v", socketID, err) + return + } + initMaxFreq, err := p.msr.retrieveUncoreFrequency(socketID, typeFreq, "max", die) + if err != nil { + p.Log.Errorf("error while retrieving maximum uncore frequency of the socket %s, err: %v", socketID, err) + return + } + + tags := map[string]string{ + "package_id": socketID, + "type": typeFreq, + "die": die, + } + fields["uncore_frequency_limit_mhz_min"] = initMinFreq + fields["uncore_frequency_limit_mhz_max"] = initMaxFreq + + acc.AddGauge("powerstat_package", fields, tags) +} func (p *PowerStat) addThermalDesignPowerMetric(socketID string, acc telegraf.Accumulator) { maxPower, err := p.rapl.getConstraintMaxPowerWatts(socketID) @@ -579,6 +676,9 @@ func (p *PowerStat) parsePackageMetricsConfig() { if contains(p.PackageMetrics, packageThermalDesignPower) { p.packageThermalDesignPower = true } + if contains(p.PackageMetrics, packageUncoreFrequency) { + p.packageUncoreFrequency = true + } } func (p *PowerStat) parseCPUMetricsConfig() { @@ -693,6 +793,7 @@ func newPowerStat(fs fileService) *PowerStat { cpuTemperature: false, cpuBusyFrequency: false, packageTurboLimit: false, + packageUncoreFrequency: false, packageCurrentPowerConsumption: false, packageCurrentDramPowerConsumption: false, packageThermalDesignPower: false, diff --git a/plugins/inputs/intel_powerstat/intel_powerstat_test.go b/plugins/inputs/intel_powerstat/intel_powerstat_test.go index 5dd24087a754d..14a7f59b1b2bf 100644 --- a/plugins/inputs/intel_powerstat/intel_powerstat_test.go +++ b/plugins/inputs/intel_powerstat/intel_powerstat_test.go @@ -119,6 +119,7 @@ func TestGather(t *testing.T) { On("retrieveAndCalculateData", mock.Anything).Return(nil).Times(len(raplDataMap)). On("getConstraintMaxPowerWatts", mock.Anything).Return(546783852.3, nil) mockServices.msr.On("getCPUCoresData").Return(preparedCPUData). + On("isMsrLoaded", mock.Anything).Return(true). On("openAndReadMsr", mock.Anything).Return(nil). On("retrieveCPUFrequencyForCore", mock.Anything).Return(1200000.2, nil) @@ -227,6 +228,43 @@ func TestAddCPUFrequencyMetric(t *testing.T) { acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags) } +func TestReadUncoreFreq(t *testing.T) { + var acc testutil.Accumulator + cpuID := "0" + coreID := "0" + packageID := "0" + die := "0" + power, mockServices := getPowerWithMockedServices() + prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID) + preparedData := getPreparedCPUData([]string{cpuID}) + + mockServices.msr.On("getCPUCoresData").Return(preparedData) + + mockServices.msr.On("isMsrLoaded").Return(true) + + mockServices.msr.On("readSingleMsr", "0", "MSR_UNCORE_PERF_STATUS").Return(uint64(10), nil) + + mockServices.msr.On("retrieveUncoreFrequency", "0", "initial", "min", "0"). + Return(float64(500), nil) + mockServices.msr.On("retrieveUncoreFrequency", "0", "initial", "max", "0"). + Return(float64(1200), nil) + mockServices.msr.On("retrieveUncoreFrequency", "0", "current", "min", "0"). + Return(float64(600), nil) + mockServices.msr.On("retrieveUncoreFrequency", "0", "current", "max", "0"). + Return(float64(1100), nil) + + power.readUncoreFreq("current", packageID, die, &acc) + power.readUncoreFreq("initial", packageID, die, &acc) + + require.Equal(t, 2, len(acc.GetTelegrafMetrics())) + + expectedMetric := getPowerUncoreFreqMetric("initial", float64(500), float64(1200), nil, packageID, die) + acc.AssertContainsTaggedFields(t, "powerstat_package", expectedMetric.fields, expectedMetric.tags) + + expectedMetric = getPowerUncoreFreqMetric("current", float64(600), float64(1100), uint64(1000), packageID, die) + acc.AssertContainsTaggedFields(t, "powerstat_package", expectedMetric.fields, expectedMetric.tags) +} + func TestAddCoreCPUTemperatureMetric(t *testing.T) { var acc testutil.Accumulator cpuID := "0" @@ -496,6 +534,27 @@ func getPowerGlobalMetric(name string, value interface{}, socketID string) struc return getPowerMetric(name, value, map[string]string{"package_id": socketID}) } +func getPowerUncoreFreqMetric(typeFreq string, limitMin interface{}, limitMax interface{}, current interface{}, socketID string, die string) struct { + fields map[string]interface{} + tags map[string]string +} { + var ret struct { + fields map[string]interface{} + tags map[string]string + } + ret.tags = make(map[string]string) + ret.fields = make(map[string]interface{}) + ret.tags["package_id"] = socketID + ret.tags["die"] = die + ret.tags["type"] = typeFreq + ret.fields["uncore_frequency_limit_mhz_min"] = limitMin + ret.fields["uncore_frequency_limit_mhz_max"] = limitMax + if typeFreq == "current" { + ret.fields["uncore_frequency_mhz_cur"] = current + } + return ret +} + func getPowerMetric(name string, value interface{}, tags map[string]string) struct { fields map[string]interface{} tags map[string]string diff --git a/plugins/inputs/intel_powerstat/msr.go b/plugins/inputs/intel_powerstat/msr.go index 46eacdc08366e..4a2ee1be611d5 100644 --- a/plugins/inputs/intel_powerstat/msr.go +++ b/plugins/inputs/intel_powerstat/msr.go @@ -20,6 +20,7 @@ const ( systemCPUPath = "/sys/devices/system/cpu/" cpuCurrentFreqPartialPath = "/sys/devices/system/cpu/cpu%s/cpufreq/scaling_cur_freq" msrPartialPath = "/dev/cpu/%s/msr" + uncoreFreqPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_%s_die_%s/%s%s_freq_khz" c3StateResidencyLocation = 0x3FC c6StateResidencyLocation = 0x3FD c7StateResidencyLocation = 0x3FE @@ -32,14 +33,17 @@ const ( turboRatioLimit1Location = 0x1AE turboRatioLimit2Location = 0x1AF atomCoreTurboRatiosLocation = 0x66C + uncorePerfStatusLocation = 0x621 ) // msrService is responsible for interactions with MSR. type msrService interface { getCPUCoresData() map[string]*msrData retrieveCPUFrequencyForCore(core string) (float64, error) + retrieveUncoreFrequency(socketID string, typeFreq string, kind string, die string) (float64, error) openAndReadMsr(core string) error readSingleMsr(core string, msr string) (uint64, error) + isMsrLoaded() bool } type msrServiceImpl struct { @@ -53,6 +57,15 @@ func (m *msrServiceImpl) getCPUCoresData() map[string]*msrData { return m.cpuCoresData } +func (m *msrServiceImpl) isMsrLoaded() bool { + for cpuID := range m.getCPUCoresData() { + err := m.openAndReadMsr(cpuID) + if err == nil { + return true + } + } + return false +} func (m *msrServiceImpl) retrieveCPUFrequencyForCore(core string) (float64, error) { cpuFreqPath := fmt.Sprintf(cpuCurrentFreqPartialPath, core) err := checkFile(cpuFreqPath) @@ -69,6 +82,49 @@ func (m *msrServiceImpl) retrieveCPUFrequencyForCore(core string) (float64, erro return convertKiloHertzToMegaHertz(cpuFreq), err } +func (m *msrServiceImpl) retrieveUncoreFrequency(socketID string, typeFreq string, kind string, die string) (float64, error) { + uncoreFreqPath, err := createUncoreFreqPath(socketID, typeFreq, kind, die) + if err != nil { + return 0, fmt.Errorf("unable to create uncore freq read path for socketID %s, and frequency type %s err: %v", socketID, typeFreq, err) + } + err = checkFile(uncoreFreqPath) + if err != nil { + return 0, err + } + uncoreFreqFile, err := os.Open(uncoreFreqPath) + if err != nil { + return 0, fmt.Errorf("error opening uncore frequncy file on %s, err: %v", uncoreFreqPath, err) + } + defer uncoreFreqFile.Close() + + uncoreFreq, _, err := m.fs.readFileToFloat64(uncoreFreqFile) + return convertKiloHertzToMegaHertz(uncoreFreq), err +} + +func createUncoreFreqPath(socketID string, typeFreq string, kind string, die string) (string, error) { + if socketID >= "0" && socketID <= "9" { + socketID = fmt.Sprintf("0%s", socketID) + } + if die >= "0" && die <= "9" { + die = fmt.Sprintf("0%s", die) + } + var prefix string + + switch typeFreq { + case "initial": + prefix = "initial_" + case "current": + prefix = "" + default: + return "", fmt.Errorf("unknown frequency type %s, only 'initial' and 'current' are supported", typeFreq) + } + + if kind != "min" && kind != "max" { + return "", fmt.Errorf("unknown frequency type %s, only 'min' and 'max' are supported", kind) + } + return fmt.Sprintf(uncoreFreqPath, socketID, die, prefix, kind), nil +} + func (m *msrServiceImpl) openAndReadMsr(core string) error { path := fmt.Sprintf(msrPartialPath, core) err := checkFile(path) @@ -110,6 +166,8 @@ func (m *msrServiceImpl) readSingleMsr(core string, msr string) (uint64, error) msrAddress = turboRatioLimit2Location case "MSR_ATOM_CORE_TURBO_RATIOS": msrAddress = atomCoreTurboRatiosLocation + case "MSR_UNCORE_PERF_STATUS": + msrAddress = uncorePerfStatusLocation default: return 0, fmt.Errorf("incorect name of MSR %s", msr) } @@ -250,6 +308,5 @@ func newMsrServiceWithFs(logger telegraf.Logger, fs fileService) *msrServiceImpl msrService.msrOffsets = []int64{c3StateResidencyLocation, c6StateResidencyLocation, c7StateResidencyLocation, maximumFrequencyClockCountLocation, actualFrequencyClockCountLocation, timestampCounterLocation, throttleTemperatureLocation, temperatureLocation} - return msrService } diff --git a/plugins/inputs/intel_powerstat/msr_mock_test.go b/plugins/inputs/intel_powerstat/msr_mock_test.go index 67aebf2309fb8..3381132fa9fb9 100644 --- a/plugins/inputs/intel_powerstat/msr_mock_test.go +++ b/plugins/inputs/intel_powerstat/msr_mock_test.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.10.0. DO NOT EDIT. +// Code generated by mockery v2.12.3. DO NOT EDIT. package intel_powerstat @@ -9,6 +9,20 @@ type mockMsrService struct { mock.Mock } +// isMsrLoaded provides a mock function with given fields: +func (_m *mockMsrService) isMsrLoaded() bool { + ret := _m.Called() + + var r0 bool + if rf, ok := ret.Get(0).(func() bool); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(bool) + } + + return r0 +} + // getCPUCoresData provides a mock function with given fields: func (_m *mockMsrService) getCPUCoresData() map[string]*msrData { ret := _m.Called() @@ -80,3 +94,39 @@ func (_m *mockMsrService) retrieveCPUFrequencyForCore(core string) (float64, err return r0, r1 } + +// retrieveUncoreFrequency provides a mock function with given fields: socketID, typeFreq, kind, die +func (_m *mockMsrService) retrieveUncoreFrequency(socketID string, typeFreq string, kind string, die string) (float64, error) { + ret := _m.Called(socketID, typeFreq, kind, die) + + var r0 float64 + if rf, ok := ret.Get(0).(func(string, string, string, string) float64); ok { + r0 = rf(socketID, typeFreq, kind, die) + } else { + r0 = ret.Get(0).(float64) + } + + var r1 error + if rf, ok := ret.Get(1).(func(string, string, string, string) error); ok { + r1 = rf(socketID, typeFreq, kind, die) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +type newmockMsrServiceT interface { + mock.TestingT + Cleanup(func()) +} + +// newmockMsrService creates a new instance of mockMsrService. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func newmockMsrService(t newmockMsrServiceT) *mockMsrService { + mock := &mockMsrService{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/plugins/inputs/intel_powerstat/msr_test.go b/plugins/inputs/intel_powerstat/msr_test.go index 0fc59314b9abe..921e7f90e8f12 100644 --- a/plugins/inputs/intel_powerstat/msr_test.go +++ b/plugins/inputs/intel_powerstat/msr_test.go @@ -93,6 +93,60 @@ func TestReadValueFromFileAtOffset(t *testing.T) { require.Equal(t, zero, <-testChannel) } +func TestCreateUncoreFreqPath(t *testing.T) { + path, err := createUncoreFreqPath("0", "initial", "min", "0") + expectedPath := "/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/initial_min_freq_khz" + require.Equal(t, nil, err) + require.Equal(t, expectedPath, path) + + path, err = createUncoreFreqPath("0", "initial", "max", "0") + expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/initial_max_freq_khz" + require.Equal(t, nil, err) + require.Equal(t, expectedPath, path) + + path, err = createUncoreFreqPath("0", "current", "min", "0") + expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/min_freq_khz" + require.Equal(t, nil, err) + require.Equal(t, expectedPath, path) + + path, err = createUncoreFreqPath("0", "current", "max", "0") + expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/max_freq_khz" + require.Equal(t, nil, err) + require.Equal(t, expectedPath, path) + + path, err = createUncoreFreqPath("9", "current", "max", "0") + expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_09_die_00/max_freq_khz" + require.Equal(t, nil, err) + require.Equal(t, expectedPath, path) + + path, err = createUncoreFreqPath("99", "current", "max", "0") + expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_99_die_00/max_freq_khz" + require.Equal(t, nil, err) + require.Equal(t, expectedPath, path) + + path, err = createUncoreFreqPath("0", "current", "max", "9") + expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_09/max_freq_khz" + require.Equal(t, nil, err) + require.Equal(t, expectedPath, path) + + path, err = createUncoreFreqPath("0", "current", "max", "99") + expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_99/max_freq_khz" + require.Equal(t, nil, err) + require.Equal(t, expectedPath, path) + + path, err = createUncoreFreqPath("0", "foo", "max", "0") + expectedPath = "" + expectedError := errors.New("unknown frequency type foo, only 'initial' and 'current' are supported") + require.Equal(t, expectedError, err) + require.Equal(t, expectedPath, path) + + path, err = createUncoreFreqPath("0", "current", "bar", "0") + expectedPath = "" + expectedError = errors.New("unknown frequency type bar, only 'min' and 'max' are supported") + require.Equal(t, expectedError, err) + require.Equal(t, expectedPath, path) +} + func prepareTestData(fsMock *mockFileService, cores []string, msr *msrServiceImpl, t *testing.T) { // Prepare MSR offsets and CPUCoresData for test. fsMock.On("getStringsMatchingPatternOnPath", mock.Anything). diff --git a/plugins/inputs/intel_powerstat/rapl_mock_test.go b/plugins/inputs/intel_powerstat/rapl_mock_test.go index 0cf86ce138cf4..fde381aa6c49a 100644 --- a/plugins/inputs/intel_powerstat/rapl_mock_test.go +++ b/plugins/inputs/intel_powerstat/rapl_mock_test.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.10.0. DO NOT EDIT. +// Code generated by mockery v2.12.3. DO NOT EDIT. package intel_powerstat @@ -64,3 +64,18 @@ func (_m *mockRaplService) retrieveAndCalculateData(socketID string) error { return r0 } + +type newmockRaplServiceT interface { + mock.TestingT + Cleanup(func()) +} + +// newmockRaplService creates a new instance of mockRaplService. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func newmockRaplService(t newmockRaplServiceT) *mockRaplService { + mock := &mockRaplService{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/plugins/inputs/intel_powerstat/sample.conf b/plugins/inputs/intel_powerstat/sample.conf index 93e63fcf8fe7b..09a5d19cdbf00 100644 --- a/plugins/inputs/intel_powerstat/sample.conf +++ b/plugins/inputs/intel_powerstat/sample.conf @@ -5,7 +5,7 @@ ## - Setting this value to an empty array means no package metrics will be collected ## - Finally, a user can specify individual metrics to capture from the supported options list ## Supported options: - ## "current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency" + ## "current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency", "uncore_frequency" # package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power"] ## The user can choose which per-CPU metrics are monitored by the plugin in cpu_metrics array.