diff --git a/cmd/nic-feature-discovery/app/app.go b/cmd/nic-feature-discovery/app/app.go index 18bd7c8..4921689 100644 --- a/cmd/nic-feature-discovery/app/app.go +++ b/cmd/nic-feature-discovery/app/app.go @@ -19,6 +19,7 @@ package app import ( "context" "fmt" + "path/filepath" "github.com/go-logr/logr" "github.com/spf13/cobra" @@ -30,8 +31,14 @@ import ( _ "k8s.io/component-base/logs/json/register" "github.com/Mellanox/nic-feature-discovery/cmd/nic-feature-discovery/app/options" + "github.com/Mellanox/nic-feature-discovery/pkg/daemon" + "github.com/Mellanox/nic-feature-discovery/pkg/feature" "github.com/Mellanox/nic-feature-discovery/pkg/utils/signals" "github.com/Mellanox/nic-feature-discovery/pkg/utils/version" + "github.com/Mellanox/nic-feature-discovery/pkg/writer" + + // import feature sources + _ "github.com/Mellanox/nic-feature-discovery/pkg/feature/sources" ) // NewNICFeatureDiscoveryCommand creates a new command @@ -81,7 +88,11 @@ func NewNICFeatureDiscoveryCommand() *cobra.Command { func RunNICFeatureDiscovery(ctx context.Context, opts *options.Options) error { logger := logr.FromContextOrDiscard(ctx) logger.Info("start NIC feature discovery", "Options", opts) - <-ctx.Done() + + labelWriter := writer.NewLabelWriter(filepath.Join(opts.NFDFeaturesPath, opts.FeatureFileName), + logger.WithName("label-writer")) + d := daemon.New(opts.FeatureScanInterval, labelWriter, feature.Sources) + d.Run(ctx) return nil } diff --git a/cmd/nic-feature-discovery/app/options/options.go b/cmd/nic-feature-discovery/app/options/options.go index a4c993b..2424c8e 100644 --- a/cmd/nic-feature-discovery/app/options/options.go +++ b/cmd/nic-feature-discovery/app/options/options.go @@ -27,12 +27,17 @@ import ( "github.com/Mellanox/nic-feature-discovery/pkg/utils/filesystem" ) +const ( + defaultNFDFeaturePath = "/etc/node-feature-discovery/features.d/" + defaultFeatureFileName = "nvidia-com-nic-feature-discovery.features" +) + // New creates new Options func New() *Options { return &Options{ - NFDFeaturesPath: "/etc/node-feature-discovery/features.d/", - FeatureFileName: "nvidia-com-nic-feature-discovery.features", - FeatureScanInterval: 5 * time.Minute, + NFDFeaturesPath: defaultNFDFeaturePath, + FeatureFileName: defaultFeatureFileName, + FeatureScanInterval: 1 * time.Minute, LogConfig: logsapi.NewLoggingConfiguration(), } } @@ -66,6 +71,10 @@ func (o *Options) AddNamedFlagSets(sharedFS *cliflag.NamedFlagSets) { func (o *Options) Validate() error { var err error + if err = logsapi.ValidateAndApply(o.LogConfig, nil); err != nil { + return fmt.Errorf("failed to validate logging flags. %w", err) + } + if err = filesystem.FolderExist(o.NFDFeaturesPath); err != nil { return fmt.Errorf("failed to validate NFD features path. %w", err) } diff --git a/deployment/k8s/daemonset.yaml b/deployment/k8s/daemonset.yaml new file mode 100644 index 0000000..ed12c86 --- /dev/null +++ b/deployment/k8s/daemonset.yaml @@ -0,0 +1,50 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: nic-feature-discovery-ds + namespace: kube-system + labels: + tier: node + app: nic-feature-discovery + name: nic-feature-discovery +spec: + selector: + matchLabels: + name: nic-feature-discovery + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + tier: node + app: nic-feature-discovery + name: nic-feature-discovery + spec: + tolerations: + - operator: Exists + effect: NoSchedule + containers: + - name: nic-feature-discovery + image: ghcr.io/mellanox/nic-feature-discovery:latest + command: [ "/nic-feature-discovery" ] + args: + - --v=0 + - --logging-format=json + resources: + requests: + cpu: "100m" + memory: "50Mi" + limits: + cpu: "300m" + memory: "300Mi" + securityContext: + privileged: true + volumeMounts: + - name: features-dir + mountPath: /etc/node-feature-discovery/features.d + terminationGracePeriodSeconds: 10 + volumes: + - name: features-dir + hostPath: + path: /etc/node-feature-discovery/features.d + type: DirectoryOrCreate diff --git a/go.mod b/go.mod index 61c0961..9e31bcf 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.20 require ( github.com/go-logr/logr v1.2.4 + github.com/google/renameio/v2 v2.0.0 github.com/spf13/cobra v1.7.0 k8s.io/component-base v0.27.4 k8s.io/klog/v2 v2.100.1 diff --git a/go.sum b/go.sum index ea4dbb2..f34e91f 100644 --- a/go.sum +++ b/go.sum @@ -140,6 +140,8 @@ github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hf github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= +github.com/google/renameio/v2 v2.0.0 h1:UifI23ZTGY8Tt29JbYFiuyIU3eX+RNFtUwefq9qAhxg= +github.com/google/renameio/v2 v2.0.0/go.mod h1:BtmJXm5YlszgC+TD4HOEEUFgkJP3nLxehU6hfe7jRt4= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go new file mode 100644 index 0000000..20b08ab --- /dev/null +++ b/pkg/daemon/daemon.go @@ -0,0 +1,76 @@ +package daemon + +import ( + "context" + "time" + + "github.com/go-logr/logr" + + "github.com/Mellanox/nic-feature-discovery/pkg/feature" + "github.com/Mellanox/nic-feature-discovery/pkg/label" + "github.com/Mellanox/nic-feature-discovery/pkg/writer" +) + +// New creates a new Daemon +func New(scanInterval time.Duration, labelWriter writer.LabelWriter, sources []feature.Source) *Daemon { + return &Daemon{ + scanInterval: scanInterval, + writer: labelWriter, + sources: sources, + } +} + +// Daemon periodically scans for features and writes labels +type Daemon struct { + scanInterval time.Duration + writer writer.LabelWriter + sources []feature.Source +} + +// Run daemon control loop +func (d *Daemon) Run(ctx context.Context) { + log := logr.FromContextOrDiscard(ctx) + d.discover(ctx) +OUTER: + for { + select { + case <-ctx.Done(): + log.Info("context closed exiting daemon") + + break OUTER + case <-time.After(d.scanInterval): + d.discover(ctx) + } + } +} + +func (d *Daemon) discover(ctx context.Context) { + log := logr.FromContextOrDiscard(ctx) + log.WithName("discovery-daemon") + + log.Info("discovering features") + + features := make([]feature.Feature, 0) + for _, s := range d.sources { + log.V(2).Info("discovering features from source", "name", s.Name()) + sourceFeatures, err := s.Discover(ctx) + if err != nil { + log.Error(err, "failed to discover features from source", "name", s.Name()) + + continue + } + features = append(features, sourceFeatures...) + } + + labels := make([]label.Label, 0) + for _, f := range features { + labels = append(labels, f.Labels()...) + } + + log.Info("conditionally updating features file") + err := d.writer.Write(labels) + if err != nil { + log.Error(err, "failed to write feature labels") + } + log.Info("discovery complete") +} diff --git a/pkg/feature/feature.go b/pkg/feature/feature.go new file mode 100644 index 0000000..ba5d0df --- /dev/null +++ b/pkg/feature/feature.go @@ -0,0 +1,46 @@ +/* + Copyright 2023, NVIDIA CORPORATION & AFFILIATES + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + SPDX-License-Identifier: Apache-2.0 + SPDX-FileCopyrightText: Copyright 2023, NVIDIA CORPORATION & AFFILIATES +*/ + +package feature + +import ( + "context" + + "github.com/Mellanox/nic-feature-discovery/pkg/label" +) + +var Sources []Source + +// Feature exposes a set of key,value lables +type Feature interface { + // Name of Feature + Name() string + // Labels returns the list of Labels for Feature + Labels() []label.Label +} + +// Source is a souce of features +type Source interface { + // Name of Source + Name() string + // Discover discovers Features + Discover(ctx context.Context) ([]Feature, error) +} + +// AddToSources registers Sources +func AddToSources(s Source) { + Sources = append(Sources, s) +} diff --git a/pkg/feature/internal/common/generic_feature.go b/pkg/feature/internal/common/generic_feature.go new file mode 100644 index 0000000..6403d73 --- /dev/null +++ b/pkg/feature/internal/common/generic_feature.go @@ -0,0 +1,51 @@ +/* + Copyright 2023, NVIDIA CORPORATION & AFFILIATES + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + SPDX-License-Identifier: Apache-2.0 + SPDX-FileCopyrightText: Copyright 2023, NVIDIA CORPORATION & AFFILIATES +*/ + +package common + +import ( + "github.com/Mellanox/nic-feature-discovery/pkg/label" +) + +// NewGenericFeature creates a new GenericFeature +func NewGenericFeature(name string) *GenericFeature { + return &GenericFeature{ + name: name, + } +} + +// GenericFeature is a generic implementation of Feature +type GenericFeature struct { + name string + labels []label.Label +} + +// Name of Feature +func (gf *GenericFeature) Name() string { + return gf.name +} + +// Labels returns the list of Labels for Feature +func (gf *GenericFeature) Labels() []label.Label { + return gf.labels +} + +// AddLabel adds a key=value label to GenericFeature +func (gf *GenericFeature) AddLabel(k, v string) *GenericFeature { + gf.labels = append(gf.labels, label.Label{Key: k, Value: v}) + + return gf +} diff --git a/pkg/feature/internal/common/prefix.go b/pkg/feature/internal/common/prefix.go new file mode 100644 index 0000000..bed011f --- /dev/null +++ b/pkg/feature/internal/common/prefix.go @@ -0,0 +1,31 @@ +/* + Copyright 2023, NVIDIA CORPORATION & AFFILIATES + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + SPDX-License-Identifier: Apache-2.0 + SPDX-FileCopyrightText: Copyright 2023, NVIDIA CORPORATION & AFFILIATES +*/ + +package common + +const ( + defaultPrefix = "nvidia.com" +) + +// DefaultPrefixedKey creates a key with default prefix +func DefaultPrefixedKey(key string) string { + return PrefixedKey(defaultPrefix, key) +} + +// PrefixedKey creates a prefixed key +func PrefixedKey(prefix, key string) string { + return prefix + "/" + key +} diff --git a/pkg/feature/internal/driver/driver.go b/pkg/feature/internal/driver/driver.go new file mode 100644 index 0000000..49e30ef --- /dev/null +++ b/pkg/feature/internal/driver/driver.go @@ -0,0 +1,115 @@ +/* + Copyright 2023, NVIDIA CORPORATION & AFFILIATES + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + SPDX-License-Identifier: Apache-2.0 + SPDX-FileCopyrightText: Copyright 2023, NVIDIA CORPORATION & AFFILIATES +*/ + +package driver + +import ( + "context" + "errors" + "fmt" + "os" + "strings" + + "github.com/go-logr/logr" + + "github.com/Mellanox/nic-feature-discovery/pkg/feature" + "github.com/Mellanox/nic-feature-discovery/pkg/feature/internal/common" +) + +const ( + mlxverLabelName = "mofed.version" + mlx5ModulePath = "/sys/module/mlx5_core" + mlx5ModuleVersion = "/sys/module/mlx5_core/version" +) + +var ( + errFeatureNotExist = errors.New("feature does not exist") +) + +func init() { + feature.AddToSources(NewDriverSource()) +} + +type driverSource struct{} + +// NewDriverSource creates a new driver source +func NewDriverSource() feature.Source { + return &driverSource{} +} + +// Name of Source +func (ds *driverSource) Name() string { + return "driver" +} + +// Features returns list of Features this Source supports +func (ds *driverSource) Discover(ctx context.Context) ([]feature.Feature, error) { + log := logr.FromContextOrDiscard(ctx) + log = log.WithName("driver-source") + + log.Info("discovering features") + + var fs []feature.Feature + if f, err := ds.discoverMlxVerFeature(log); err == nil { + fs = append(fs, f) + } else { + if errors.Is(err, errFeatureNotExist) { + return fs, nil + } + + return nil, fmt.Errorf("failed to discover mlx version. %w", err) + } + + return fs, nil +} + +func (ds *driverSource) discoverMlxVerFeature(log logr.Logger) (feature.Feature, error) { + log.V(5).Info("discovering mlx version") + _, err := os.Lstat(mlx5ModulePath) + if err != nil { + if os.IsNotExist(err) { + log.V(3).Info("mlx5 driver is not loaded", "path", mlx5ModulePath) + + return nil, errFeatureNotExist + } + + return nil, err + } + + _, err = os.Lstat(mlx5ModuleVersion) + if err != nil { + if os.IsNotExist(err) { + log.V(3).Info("mlx5 driver has no version file, its most likely inbox", "path", mlx5ModuleVersion) + + return nil, errFeatureNotExist + } + + return nil, err + } + + // get version + data, err := os.ReadFile(mlx5ModuleVersion) + if err != nil { + return nil, fmt.Errorf("failed to read driver version. %w", err) + } + + ver := strings.TrimSpace(string(data)) + if ver == "" { + return nil, fmt.Errorf("unexpected driver version(%q)", ver) + } + + return common.NewGenericFeature("mofed-version").AddLabel(common.DefaultPrefixedKey(mlxverLabelName), ver), nil +} diff --git a/pkg/feature/sources/sources.go b/pkg/feature/sources/sources.go new file mode 100644 index 0000000..f2d4793 --- /dev/null +++ b/pkg/feature/sources/sources.go @@ -0,0 +1,22 @@ +/* + Copyright 2023, NVIDIA CORPORATION & AFFILIATES + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + SPDX-License-Identifier: Apache-2.0 + SPDX-FileCopyrightText: Copyright 2023, NVIDIA CORPORATION & AFFILIATES +*/ + +package sources + +import ( + // feature sources imports + _ "github.com/Mellanox/nic-feature-discovery/pkg/feature/internal/driver" +) diff --git a/pkg/label/label.go b/pkg/label/label.go new file mode 100644 index 0000000..1922a9b --- /dev/null +++ b/pkg/label/label.go @@ -0,0 +1,88 @@ +/* + Copyright 2023, NVIDIA CORPORATION & AFFILIATES + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + SPDX-License-Identifier: Apache-2.0 + SPDX-FileCopyrightText: Copyright 2023, NVIDIA CORPORATION & AFFILIATES +*/ + +package label + +import ( + "fmt" + "reflect" +) + +// Label is a key=value Label +type Label struct { + Key string + Value string +} + +// String implements fmt.Stringer interface +func (l *Label) String() string { + return fmt.Sprintf("%s=%s", l.Key, l.Value) +} + +// NewLabel creates a new *Label +func NewLabel(key, value string) Label { + return Label{Key: key, Value: value} +} + +type Set interface { + Equal(other Set) bool + AddLabel(l Label) + AsLabels() []Label + AsMap() map[string]string +} + +func NewSet(labels ...Label) Set { + lm := make(map[string]string) + + for _, l := range labels { + lm[l.Key] = l.Value + } + + return &labelSet{ + labelMap: lm, + } +} + +type labelSet struct { + labelMap map[string]string +} + +func (ls labelSet) Equal(other Set) bool { + return reflect.DeepEqual(ls.labelMap, other.AsMap()) +} + +func (ls labelSet) AddLabel(l Label) { + ls.labelMap[l.Key] = l.Value +} + +func (ls labelSet) AsLabels() []Label { + labels := make([]Label, 0, len(ls.labelMap)) + for k, v := range ls.labelMap { + labels = append(labels, NewLabel(k, v)) + } + + return labels +} + +func (ls labelSet) AsMap() map[string]string { + m := make(map[string]string) + + for k, v := range ls.labelMap { + m[k] = v + } + + return m +} diff --git a/pkg/writer/writer.go b/pkg/writer/writer.go new file mode 100644 index 0000000..78e2e5e --- /dev/null +++ b/pkg/writer/writer.go @@ -0,0 +1,118 @@ +/* + Copyright 2023, NVIDIA CORPORATION & AFFILIATES + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + SPDX-License-Identifier: Apache-2.0 + SPDX-FileCopyrightText: Copyright 2023, NVIDIA CORPORATION & AFFILIATES +*/ + +package writer + +import ( + "fmt" + "os" + "strings" + + "github.com/go-logr/logr" + "github.com/google/renameio/v2" + + "github.com/Mellanox/nic-feature-discovery/pkg/label" +) + +type LabelWriter interface { + // Write labels to feature file. if labels are the same, no write is performed + Write(labels []label.Label) error +} + +// labelWriter implements LabelWriter +type labelWriter struct { + featureFilePath string + log logr.Logger +} + +func NewLabelWriter(path string, log logr.Logger) LabelWriter { + return &labelWriter{ + featureFilePath: path, + log: log, + } +} + +// Write labels to feature file. if labels are the same, no write is performed +func (lr *labelWriter) Write(labels []label.Label) error { + curLabels, err := lr.getCurrentLabels() + if err != nil { + lr.log.Error(err, "failed to get current labels, will not attempt to determine if labels remained the same") + } + + if err == nil { + // check if labels changed + if label.NewSet(labels...).Equal(label.NewSet(curLabels...)) { + // labels equal no need to write + lr.log.V(2).Info("current and new labels are equal, nothing to write.") + + return nil + } + } + + return lr.writeLabels(labels) +} + +// getCurrentLabels returns current labels set in feature file +func (lr *labelWriter) getCurrentLabels() ([]label.Label, error) { + curLabels := make([]label.Label, 0) + + _, err := os.Lstat(lr.featureFilePath) + if err != nil { + if os.IsNotExist(err) { + return curLabels, nil + } + + return nil, err + } + + data, err := os.ReadFile(lr.featureFilePath) + if err != nil { + return nil, err + } + lines := strings.Split(string(data), "\n") + for _, line := range lines { + line = strings.TrimSpace(line) + if line == "" { + continue + } + kv := strings.Split(line, "=") + if len(kv) != 2 { + // this is unexpected, return error + return nil, fmt.Errorf("unexpected line format. %q", line) + } + curLabels = append(curLabels, label.NewLabel(kv[0], kv[1])) + } + + return curLabels, nil +} + +// writeLabels writes given labels to featureFilePath +func (lr *labelWriter) writeLabels(labels []label.Label) error { + lines := make([]string, 0, len(labels)) + + for _, l := range labels { + lines = append(lines, l.String()) + } + data := strings.Join(lines, "\n") + data += "\n" + + err := renameio.WriteFile(lr.featureFilePath, []byte(data), 0644) + if err != nil { + return fmt.Errorf("failed to write labels to file(%s). %w", lr.featureFilePath, err) + } + + return nil +}