Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automate Reconfiguration Performance Test #2313

Merged
merged 42 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
57a4073
Add code implementation of resource deploying scripts
bjee19 Jul 8, 2024
5e12b36
Add functions to check for resources and clean them up
bjee19 Jul 10, 2024
8a04efc
Add results template
bjee19 Jul 10, 2024
d233c92
Add basic prometheus metrics
bjee19 Jul 12, 2024
d2ad309
Add queries
bjee19 Jul 12, 2024
9358660
Add queries and results
bjee19 Jul 17, 2024
4da88d5
Add time to ready total and average
bjee19 Jul 29, 2024
946a547
Add logic documentation for time to ready average
bjee19 Jul 29, 2024
4c7aad1
Add test run on gke
bjee19 Jul 30, 2024
6f94138
Add test description
bjee19 Jul 30, 2024
25e5502
Remove completed TODO
bjee19 Jul 30, 2024
898b94c
Sort imports
bjee19 Aug 1, 2024
b99121f
Change timeouts for creating resources
bjee19 Aug 1, 2024
0d0f29b
Remove debugging print lines
bjee19 Aug 1, 2024
107898b
Add test results from 150 numResource run
bjee19 Aug 1, 2024
7f38b8f
Remove certificate namespace
bjee19 Aug 2, 2024
20a1f17
Delete entire namespace instead of specific manifest resources
bjee19 Aug 3, 2024
43d2b85
Change comparison on HTTPRoutes to be equal operator
bjee19 Aug 5, 2024
377778e
Add units to results
bjee19 Aug 5, 2024
bfe2f13
Refactor reconfig namespace to be declared in beforeeach
bjee19 Aug 5, 2024
e7709d7
Refactor cleanupResources and cleanup after each test
bjee19 Aug 5, 2024
714be9f
Add reconfiguration test to nfr checks in system suite test
bjee19 Aug 5, 2024
bc4862d
Close out file and redeploy NGF for future tests
bjee19 Aug 5, 2024
0ee057f
Add constant maxResourceCount
bjee19 Aug 5, 2024
97b819a
Refactor setup steps
bjee19 Aug 6, 2024
531bc4e
Refactor ginkgo test specs
bjee19 Aug 6, 2024
509cbdd
Refactor scale test to include framework queries
bjee19 Aug 7, 2024
dc8125f
Refactor blind sleep to instead wait for pods to be ready
bjee19 Aug 7, 2024
410d68d
Add and use ApplyFromBuffer to create unique resources
bjee19 Aug 8, 2024
0d3c604
Delete old reconfiguration files
bjee19 Aug 8, 2024
85b24ef
Revert "Delete old reconfiguration files"
bjee19 Aug 8, 2024
03ed0bc
Revert full delete of reconfiguration files and keep results files
bjee19 Aug 8, 2024
3bf27e2
Elaborate on test description
bjee19 Aug 8, 2024
35159e9
Add note on node size
bjee19 Aug 8, 2024
fe1f414
Change package to main
bjee19 Aug 8, 2024
9842b1e
Refactor queries.go function layout to be more readable
bjee19 Aug 9, 2024
2d753a4
Remove unnecessary return and correct placement of creation of resour…
bjee19 Aug 9, 2024
cdf4387
Use time constant layout when parsing logs
bjee19 Aug 9, 2024
c67265c
Move queries to different file and add descriptions to functions
bjee19 Aug 9, 2024
e1720b6
Add FIXME on issues
bjee19 Aug 12, 2024
685295d
Add latest results
bjee19 Aug 12, 2024
683cd36
Remove latest results
bjee19 Aug 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
325 changes: 325 additions & 0 deletions tests/framework/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,3 +293,328 @@ func WritePrometheusMatrixToCSVFile(fileName string, value model.Value) error {

return nil
}

// Bucket represents a data point of a Histogram Bucket.
type Bucket struct {
// Le is the interval Less than or Equal which represents the Bucket's bin. i.e. "500ms".
Le string
// Val is the value for how many instances fall in the Bucket.
Val int
}

// GetReloadCount gets the total number of nginx reloads.
func GetReloadCount(promInstance PrometheusInstance, ngfPodName string) (float64, error) {
return getFirstValueOfVector(
fmt.Sprintf(
`nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`,
ngfPodName,
),
promInstance,
)
}

// GetReloadCountWithStartTime gets the total number of nginx reloads from a start time to the current time.
func GetReloadCountWithStartTime(
promInstance PrometheusInstance,
ngfPodName string,
startTime time.Time,
) (float64, error) {
return getFirstValueOfVector(
fmt.Sprintf(
`nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`+
` - `+
`nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"} @ %d`,
ngfPodName,
startTime.Unix(),
),
promInstance,
)
}

// GetReloadErrsCountWithStartTime gets the total number of nginx reload errors from a start time to the current time.
func GetReloadErrsCountWithStartTime(
promInstance PrometheusInstance,
ngfPodName string,
startTime time.Time,
) (float64, error) {
return getFirstValueOfVector(
fmt.Sprintf(
`nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"}`+
` - `+
`nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"} @ %d`,
ngfPodName,
startTime.Unix(),
),
promInstance,
)
}

// GetReloadAvgTime gets the average time in milliseconds for nginx to reload.
func GetReloadAvgTime(promInstance PrometheusInstance, ngfPodName string) (float64, error) {
return getFirstValueOfVector(
fmt.Sprintf(
`nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"}`+
` / `+
`nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`,
ngfPodName,
),
promInstance,
)
}

// GetReloadAvgTimeWithStartTime gets the average time in milliseconds for nginx to reload using a start time
// to the current time to calculate.
func GetReloadAvgTimeWithStartTime(
promInstance PrometheusInstance,
ngfPodName string,
startTime time.Time,
) (float64, error) {
return getFirstValueOfVector(
fmt.Sprintf(
`(nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"}`+
` - `+
`nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"} @ %[2]d)`+
` / `+
`(nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`+
` - `+
`nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"} @ %[2]d)`,
ngfPodName,
startTime.Unix(),
),
promInstance,
)
}

// GetReloadBuckets gets the Buckets in millisecond intervals for nginx reloads.
func GetReloadBuckets(promInstance PrometheusInstance, ngfPodName string) ([]Bucket, error) {
return getBuckets(
fmt.Sprintf(
`nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"}`,
ngfPodName,
),
promInstance,
)
}

// GetReloadBucketsWithStartTime gets the Buckets in millisecond intervals for nginx reloads from a start time
// to the current time.
func GetReloadBucketsWithStartTime(
promInstance PrometheusInstance,
ngfPodName string,
startTime time.Time,
) ([]Bucket, error) {
return getBuckets(
fmt.Sprintf(
`nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"}`+
` - `+
`nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"} @ %d`,
ngfPodName,
startTime.Unix(),
),
promInstance,
)
}

// GetEventsCount gets the NGF event batch processing count.
func GetEventsCount(promInstance PrometheusInstance, ngfPodName string) (float64, error) {
return getFirstValueOfVector(
fmt.Sprintf(
`nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`,
ngfPodName,
),
promInstance,
)
}

// GetEventsCountWithStartTime gets the NGF event batch processing count from a start time to the current time.
func GetEventsCountWithStartTime(
promInstance PrometheusInstance,
ngfPodName string,
startTime time.Time,
) (float64, error) {
return getFirstValueOfVector(
fmt.Sprintf(
`nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`+
` - `+
`nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"} @ %d`,
ngfPodName,
startTime.Unix(),
),
promInstance,
)
}

// GetEventsAvgTime gets the average time in milliseconds it takes for NGF to process a single event batch.
func GetEventsAvgTime(promInstance PrometheusInstance, ngfPodName string) (float64, error) {
return getFirstValueOfVector(
fmt.Sprintf(
`nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"}`+
` / `+
`nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`,
ngfPodName,
),
promInstance,
)
}

// GetEventsAvgTimeWithStartTime gets the average time in milliseconds it takes for NGF to process a single event
// batch using a start time to the current time to calculate.
func GetEventsAvgTimeWithStartTime(
promInstance PrometheusInstance,
ngfPodName string,
startTime time.Time,
) (float64, error) {
return getFirstValueOfVector(
fmt.Sprintf(
`(nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"}`+
` - `+
`nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"} @ %[2]d)`+
` / `+
`(nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`+
` - `+
`nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"} @ %[2]d)`,
ngfPodName,
startTime.Unix(),
),
promInstance,
)
}

// GetEventsBuckets gets the Buckets in millisecond intervals for NGF event batch processing.
func GetEventsBuckets(promInstance PrometheusInstance, ngfPodName string) ([]Bucket, error) {
return getBuckets(
fmt.Sprintf(
`nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"}`,
ngfPodName,
),
promInstance,
)
}

// GetEventsBucketsWithStartTime gets the Buckets in millisecond intervals for NGF event batch processing from a start
// time to the current time.
func GetEventsBucketsWithStartTime(
promInstance PrometheusInstance,
ngfPodName string,
startTime time.Time,
) ([]Bucket, error) {
return getBuckets(
fmt.Sprintf(
`nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"}`+
` - `+
`nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"} @ %d`,
ngfPodName,
startTime.Unix(),
),
promInstance,
)
}

// CreateMetricExistChecker returns a function that will query Prometheus at a specific timestamp
// and adjust that timestamp if there is no result found.
func CreateMetricExistChecker(
promInstance PrometheusInstance,
query string,
getTime func() time.Time,
modifyTime func(),
) func() error {
return func() error {
queryWithTimestamp := fmt.Sprintf("%s @ %d", query, getTime().Unix())

result, err := promInstance.Query(queryWithTimestamp)
if err != nil {
return fmt.Errorf("failed to query Prometheus: %w", err)
}

if result.String() == "" {
modifyTime()
return errors.New("empty result")
}

return nil
}
}

// CreateEndTimeFinder returns a function that will range query Prometheus given a specific startTime and endTime
// and adjust the endTime if there is no result found.
func CreateEndTimeFinder(
promInstance PrometheusInstance,
query string,
startTime time.Time,
endTime *time.Time,
queryRangeStep time.Duration,
) func() error {
return func() error {
result, err := promInstance.QueryRange(query, v1.Range{
Start: startTime,
End: *endTime,
Step: queryRangeStep,
})
if err != nil {
return fmt.Errorf("failed to query Prometheus: %w", err)
}

if result.String() == "" {
*endTime = time.Now()
return errors.New("empty result")
}

return nil
}
}

// CreateResponseChecker returns a function that checks if there is a successful response from a url.
func CreateResponseChecker(url, address string, requestTimeout time.Duration) func() error {
return func() error {
status, _, err := Get(url, address, requestTimeout)
if err != nil {
return fmt.Errorf("bad response: %w", err)
}

if status != 200 {
return fmt.Errorf("unexpected status code: %d", status)
}

return nil
}
}

func getFirstValueOfVector(query string, promInstance PrometheusInstance) (float64, error) {
result, err := promInstance.Query(query)
if err != nil {
return 0, err
}

val, err := GetFirstValueOfPrometheusVector(result)
if err != nil {
return 0, err
}

return val, nil
}

func getBuckets(query string, promInstance PrometheusInstance) ([]Bucket, error) {
result, err := promInstance.Query(query)
if err != nil {
return nil, err
}

res, ok := result.(model.Vector)
if !ok {
return nil, errors.New("could not convert result to vector")
}

buckets := make([]Bucket, 0, len(res))

for _, sample := range res {
le := sample.Metric["le"]
val := float64(sample.Value)
bucket := Bucket{
Le: string(le),
Val: int(val),
}
buckets = append(buckets, bucket)
}

return buckets, nil
}
Loading
Loading