Skip to content

Commit

Permalink
feat: Add documentation for plugin-generated metrics and enable CI
Browse files Browse the repository at this point in the history
  • Loading branch information
rahulguptajss committed Sep 24, 2024
1 parent fb389dd commit 0e8107d
Show file tree
Hide file tree
Showing 6 changed files with 2,385 additions and 214 deletions.
32 changes: 11 additions & 21 deletions cmd/tools/generate/counter.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import (
"gopkg.in/yaml.v3"
"log"
"maps"
"net"
"net/http"
"net/url"
"os"
Expand All @@ -31,8 +30,6 @@ import (
"time"
)

const prometheusIP = "10.193.161.234"

var (
replacer = strings.NewReplacer("\n", "", ":", "")
objectSwaggerMap = map[string]string{
Expand Down Expand Up @@ -147,6 +144,8 @@ var (
"health_",
"aggr_hybrid_disk_count",
"nfs_clients_idle_duration",
"ems_events",
"olume_top_clients",
}

excludeDocumentedZapiMetrics = []string{
Expand Down Expand Up @@ -1156,22 +1155,17 @@ func findAPI(apis []MetricDef, other MetricDef) []int {
return indices
}

func fetchAndCategorizePrometheusMetrics() (map[string]bool, map[string]bool, error) {
hostPort := net.JoinHostPort(prometheusIP, "9090")
baseURL := fmt.Sprintf("http://%s/api/v1/series", hostPort)
func fetchAndCategorizePrometheusMetrics(promURL string) (map[string]bool, map[string]bool, error) {
urlStr := promURL + "/api/v1/series?match[]={datacenter!=\"\"}"

// Construct the URL with query parameters
u, err := url.Parse(baseURL)
u, err := url.Parse(urlStr)
if err != nil {
return nil, nil, fmt.Errorf("failed to parse base URL: %v", err)
return nil, nil, fmt.Errorf("failed to parse URL: %w", err)
}
q := u.Query()
q.Set("match[]", `{datacenter!=""}`)
u.RawQuery = q.Encode()

resp, err := http.Get(u.String())
if err != nil {
return nil, nil, fmt.Errorf("failed to fetch metrics from Prometheus: %v", err)
return nil, nil, fmt.Errorf("failed to fetch metrics from Prometheus: %w", err)
}
defer resp.Body.Close()

Expand All @@ -1183,32 +1177,28 @@ func fetchAndCategorizePrometheusMetrics() (map[string]bool, map[string]bool, er
Status string `json:"status"`
Data []map[string]string `json:"data"`
}

if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, nil, fmt.Errorf("failed to decode Prometheus response: %w", err)
}

if result.Status != "success" {
return nil, nil, fmt.Errorf("unexpected status from Prometheus: %s", result.Status)
}

// Categorize metrics
restMetrics := make(map[string]bool)
zapiMetrics := make(map[string]bool)

for _, series := range result.Data {
metricName := series["__name__"]
datacenter := series["datacenter"]

if datacenter == "Rest" {
switch series["datacenter"] {
case "REST":
restMetrics[metricName] = true
} else if datacenter == "Zapi" {
case "ZAPI":
zapiMetrics[metricName] = true
}
}

return restMetrics, zapiMetrics, nil
}

func validateMetrics(documentedRest, documentedZapi map[string]Counter, prometheusRest, prometheusZapi map[string]bool) error {
var documentedButMissingRestMetrics []string
var notDocumentedRestMetrics []string
Expand Down
24 changes: 24 additions & 0 deletions cmd/tools/generate/counter.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,22 @@ counters:
Type: average
BaseCounter: ops

- Name: qos_detail_ops
Description: This field is the workload's rate of operations that completed during the measurement interval measured per second.
APIs:
- API: REST
Endpoint: api/cluster/counter/tables/qos, api/cluster/counter/tables/qos_volume
ONTAPCounter: ops
Template: conf/restperf/9.12.0/workload_detail.yaml
Unit: per_sec
Type: rate
- API: ZAPI
Endpoint: perf-object-get-instances workload, workload_volume
ONTAPCounter: ops
Template: conf/zapiperf/9.12.0/workload_detail.yaml
Unit: per_sec
Type: rate

- Name: qos_detail_service_time_latency
Description: This refers to the average service time for workload within the subsystems of the Data ONTAP. These subsystems are the various modules or components within the system that could contribute to delays or latency during data or task processing. This latency is the processing time within the subsystem.
APIs:
Expand Down Expand Up @@ -1921,3 +1937,11 @@ counters:
Endpoint: NA
ONTAPCounter: Harvest generated
Template: NA

- Name: ems_events
Description: Indicates EMS events that have occurred in the ONTAP as configured in the ems.yaml.
APIs:
- API: REST
Endpoint: api/support/ems/events
ONTAPCounter: Harvest generated
Template: conf/ems/9.6.0/ems.yaml
9 changes: 4 additions & 5 deletions cmd/tools/generate/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ type options struct {
mounts []string
configPath string
confPath string
isCI bool
promURL string
}

var metricRe = regexp.MustCompile(`(\w+)\{`)
Expand Down Expand Up @@ -599,8 +599,8 @@ func BuildMetrics(dir, configPath, pollerName string) (map[string]Counter, rest.
counters := mergeCounters(restCounters, zapiCounters)
counters = processExternalCounters(dir, counters)

if opts.isCI {
prometheusRest, prometheusZapi, err := fetchAndCategorizePrometheusMetrics()
if opts.promURL != "" {
prometheusRest, prometheusZapi, err := fetchAndCategorizePrometheusMetrics(opts.promURL)
if err != nil {
logErrAndExit(err)
}
Expand Down Expand Up @@ -713,6 +713,5 @@ func init() {
fFlags.IntVar(&opts.promPort, "promPort", 9090, "Prometheus Port")
fFlags.IntVar(&opts.grafanaPort, "grafanaPort", 3000, "Grafana Port")

// Add the new flag for CI validation
Cmd.PersistentFlags().BoolVar(&opts.isCI, "ci", false, "Enable CI validation of documented counters against Prometheus")
metricCmd.PersistentFlags().StringVar(&opts.promURL, "prom-url", "", "Prometheus URL for CI validation")
}
Loading

0 comments on commit 0e8107d

Please sign in to comment.