From f2f423597a9ebfa7c081bfcb66a917c0120b4593 Mon Sep 17 00:00:00 2001 From: Lindsay Hanks Date: Tue, 29 Aug 2023 20:44:15 +0000 Subject: [PATCH 1/2] Add metrics, dashboard, and test suite --- config/grafana/grafana_dashboard.json | 2404 +++++++++++++++++ pkg/ipamd/datastore/data_store.go | 21 + pkg/ipamd/ipamd.go | 2 + test/integration/warm-pool/clear_warm_env.go | 26 + test/integration/warm-pool/set_warm_env.go | 26 + test/integration/warm-pool/use_case_1_test.go | 98 + test/integration/warm-pool/use_case_2_test.go | 104 + test/integration/warm-pool/use_case_3_test.go | 112 + test/integration/warm-pool/use_case_4_test.go | 102 + test/integration/warm-pool/use_case_5_test.go | 113 + test/integration/warm-pool/use_case_6_test.go | 117 + test/integration/warm-pool/use_case_7_test.go | 114 + test/integration/warm-pool/use_case_8_test.go | 122 + test/integration/warm-pool/use_case_9_test.go | 115 + .../warm-pool/warm_pool_suite_test.go | 369 +++ 15 files changed, 3845 insertions(+) create mode 100644 config/grafana/grafana_dashboard.json create mode 100644 test/integration/warm-pool/clear_warm_env.go create mode 100644 test/integration/warm-pool/set_warm_env.go create mode 100644 test/integration/warm-pool/use_case_1_test.go create mode 100644 test/integration/warm-pool/use_case_2_test.go create mode 100644 test/integration/warm-pool/use_case_3_test.go create mode 100644 test/integration/warm-pool/use_case_4_test.go create mode 100644 test/integration/warm-pool/use_case_5_test.go create mode 100644 test/integration/warm-pool/use_case_6_test.go create mode 100644 test/integration/warm-pool/use_case_7_test.go create mode 100644 test/integration/warm-pool/use_case_8_test.go create mode 100644 test/integration/warm-pool/use_case_9_test.go create mode 100644 test/integration/warm-pool/warm_pool_suite_test.go diff --git a/config/grafana/grafana_dashboard.json b/config/grafana/grafana_dashboard.json new file mode 100644 index 0000000000..553e60ec1a --- /dev/null +++ b/config/grafana/grafana_dashboard.json @@ -0,0 +1,2404 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 16032, + "graphTooltip": 0, + "id": 3, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Warm Pool (total-assigned)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Assigned IPs" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total IPs Remaining" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 94, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "awscni_total_ip_addresses-awscni_assigned_ip_addresses", + "instant": false, + "interval": "", + "legendFormat": "{{node}}", + "refId": "B" + } + ], + "title": "Warm Pool per Node", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 51, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(awscni_assigned_ip_addresses -2)", + "format": "time_series", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "# of Busybox Pods", + "transformations": [ + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "sum(awscni_assigned_ip_addresses -2)" + ] + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "\n\nnormal pods + preemptible pods should match\nthe assigned ip addresses count,\nmodulo pods using the host network", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 83, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "editorMode": "builder", + "expr": "count(awscni_eni_max{k8s_app=\"aws-node\"})", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "AWS Nodes", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 76, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "editorMode": "code", + "expr": "max(max_over_time(awscni_total_ip_addresses[30m])-(max_over_time(awscni_assigned_ip_addresses[30m])))", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Max Net IP Requests[30m]", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 4 + }, + "id": 103, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "editorMode": "code", + "expr": "delta(awscni_assigned_ip_addresses[24h])", + "hide": false, + "legendFormat": "{{node}}", + "range": true, + "refId": "A" + } + ], + "title": "Net IP Requests[24h]", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Warm Pool (total-assigned)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Assigned IPs" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total IPs Remaining" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 80, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "awscni_total_ip_addresses-awscni_assigned_ip_addresses", + "instant": false, + "interval": "", + "legendFormat": "{{node}}", + "refId": "B" + } + ], + "title": "Warm Pool per Node", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 12 + }, + "id": 97, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "editorMode": "code", + "expr": "max_over_time(awscni_total_ip_addresses[30m])-max_over_time(awscni_assigned_ip_addresses[30m])", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Max Net IP Requests[30m]", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 12 + }, + "id": 102, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "editorMode": "code", + "expr": "delta(awscni_assigned_ip_addresses[24h])", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Net IP Requests[24h]", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 12 + }, + "id": 101, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "editorMode": "code", + "expr": "stddev_over_time(awscni_assigned_ip_addresses[24h])", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "StdDev Net IP Requests[24h]", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Warm: total - assigned, already allocated and available\n\nCold: max - total, could be allocated", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Warm Pool (total-assigned)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Assigned IPs" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total IPs Remaining per Node" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 12, + "y": 12 + }, + "id": 59, + "options": { + "displayLabels": [], + "legend": { + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "values": [ + "value", + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "avg(awscni_total_ip_addresses-awscni_assigned_ip_addresses)", + "instant": false, + "interval": "", + "legendFormat": "Warm Pool (total-assigned)", + "refId": "B" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "avg(awscni_ip_max-awscni_total_ip_addresses)", + "instant": false, + "interval": "", + "legendFormat": "Total IPs Remaining per Node", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "avg(awscni_assigned_ip_addresses)", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Assigned IPs", + "refId": "C" + } + ], + "title": "Average IP Allocation per Node", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Warm: total - assigned, already allocated and available\n\nCold: max - total, could be allocated", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Warm Pool (total-assigned)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Assigned IPs" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total IPs Remaining" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 10, + "w": 6, + "x": 18, + "y": 12 + }, + "id": 95, + "options": { + "displayLabels": [], + "legend": { + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "values": [ + "value", + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(awscni_total_ip_addresses-awscni_assigned_ip_addresses)", + "instant": false, + "interval": "", + "legendFormat": "Warm Pool (total-assigned)", + "refId": "B" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(awscni_ip_max-awscni_total_ip_addresses)", + "instant": false, + "interval": "", + "legendFormat": "Total IPs Remaining", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(awscni_assigned_ip_addresses)", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Assigned IPs", + "refId": "C" + } + ], + "title": "Cluster IP Allocation", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 61, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "editorMode": "builder", + "expr": "delta(awscni_err_no_avail_addrs{k8s_app=\"aws-node\"}[5m])", + "legendFormat": "{{node}}", + "range": true, + "refId": "A" + } + ], + "title": "No Available Addresses Error", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Assigned IPs" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Warm Pool (total-assigned)" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "IP Max per Cluster" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total IPs" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 22 + }, + "id": 6, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.3.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(awscni_assigned_ip_addresses)", + "interval": "", + "legendFormat": "Assigned IPs", + "range": true, + "refId": "C" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(awscni_ip_max)", + "interval": "", + "legendFormat": "IP Max per Cluster", + "range": true, + "refId": "B" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(awscni_total_ip_addresses-awscni_assigned_ip_addresses)", + "hide": false, + "interval": "", + "legendFormat": "Warm Pool (total-assigned)", + "range": true, + "refId": "D" + }, + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(awscni_total_ip_addresses)", + "hide": false, + "interval": "", + "legendFormat": "Total IPs", + "range": true, + "refId": "A" + } + ], + "title": "IP Addresses in the Cluster", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "prometheus-kube-state-metrics, grafana, prometheus-server, prometheus-pushgateway, and coredns are all not host-network", + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "displayMode": "auto", + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 0, + "y": 25 + }, + "id": 40, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "awscni_assigned_ip_addresses\n", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "All Assigned IP Addresses", + "transformations": [ + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "instance", + "pod", + "Value" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "Value": "Assigned IP Addresses" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "\n\nCold: max - total, could be allocated", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Warm" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "avg(awscni_add_warm_ip_req_count{k8s_app=\"aws-node\"})" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 25 + }, + "id": 67, + "maxDataPoints": 1, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "editorMode": "builder", + "expr": "awscni_eni_allocated", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "ENIs Allocated per Node", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 29 + }, + "id": 55, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "editorMode": "builder", + "expr": "rate(awscni_add_ip_req_count{k8s_app=\"aws-node\"}[5m])", + "hide": false, + "legendFormat": "{{node}}", + "range": true, + "refId": "B" + } + ], + "title": "IP Allocation Request per Node [5m]", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 29 + }, + "id": 98, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "editorMode": "builder", + "expr": "delta(awscni_del_ip_req_count{k8s_app=\"aws-node\"}[5m])", + "hide": false, + "legendFormat": "{{node}}", + "range": true, + "refId": "B" + } + ], + "title": "IP Deallocation Request per Node [5m]", + "type": "timeseries" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "points", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 30 + }, + "id": 14, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.3.2", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by (fn) (awscni_ipamd_action_inprogress)", + "interval": "", + "legendFormat": "{{fn}}", + "refId": "A" + } + ], + "title": "IPAMD Actions in Progress", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Warm: total - assigned, already allocated and available\n\nCold: max - total, could be allocated", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Warm" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "avg(awscni_add_warm_ip_req_count{k8s_app=\"aws-node\"})" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 30 + }, + "id": 82, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "count(1 > awscni_eni_util{k8s_app=\"aws-node\"})", + "format": "table", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "__auto", + "refId": "C" + } + ], + "title": "Empty ENIs per Cluster", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Warm: total - assigned, already allocated and available\n\nCold: max - total, could be allocated", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Warm" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "avg(awscni_add_warm_ip_req_count{k8s_app=\"aws-node\"})" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 33 + }, + "id": 74, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "awscni_eni_max", + "format": "table", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "__auto", + "refId": "C" + } + ], + "title": "ENI Max per Node", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "description": "Warm: total - assigned, already allocated and available\n\nCold: max - total, could be allocated", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Warm" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "avg(awscni_add_warm_ip_req_count{k8s_app=\"aws-node\"})" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 36 + }, + "id": 66, + "maxDataPoints": 1, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "avg(awscni_ip_max / awscni_eni_max)", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "__auto", + "refId": "C" + } + ], + "title": "Max IPs per ENI", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "displayName": "${__field.labels.node}", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 65, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "editorMode": "builder", + "expr": "awscni_eni_util", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "# of IPs per ENI (Labeled by Node IP) - Correlate with Allocated ENIs for Delay", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "points", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 39 + }, + "id": 4, + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.3.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg by (api)(rate(awscni_aws_api_latency_ms_sum[5m])/rate(awscni_aws_api_latency_ms_count[5m]))", + "interval": "", + "legendFormat": "{{api}}", + "range": true, + "refId": "A" + } + ], + "title": "Average AWS API Latency [5m]", + "type": "timeseries" + } + ], + "refresh": false, + "schemaVersion": 37, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "uid": "$datasource" + }, + "definition": "label_values(awscni_assigned_ip_addresses{job=\"aws-cni-metrics\"}, instance)", + "hide": 2, + "includeAll": false, + "label": "", + "multi": false, + "name": "instance", + "options": [], + "query": { + "query": "label_values(awscni_assigned_ip_addresses{job=\"aws-cni-metrics\"}, instance)", + "refId": "Prometheus-instance-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": { + "uid": "$datasource" + }, + "definition": "label_values(awscni_del_ip_req_count{job=\"aws-cni-metrics\"}, reason)", + "hide": 2, + "includeAll": false, + "multi": false, + "name": "ip_deletion_reason", + "options": [], + "query": { + "query": "label_values(awscni_del_ip_req_count{job=\"aws-cni-metrics\"}, reason)", + "refId": "Prometheus-ip_deletion_reason-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "NdWXluq4z" + }, + "filters": [], + "hide": 0, + "name": "Filters", + "skipUrlSync": false, + "type": "adhoc" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "VPC CNI IPAMD Warm Pool Testing", + "uid": "p7lNf06Vk", + "version": 115, + "weekStart": "" +} \ No newline at end of file diff --git a/pkg/ipamd/datastore/data_store.go b/pkg/ipamd/datastore/data_store.go index 8adc2965ab..f224dce21f 100644 --- a/pkg/ipamd/datastore/data_store.go +++ b/pkg/ipamd/datastore/data_store.go @@ -538,6 +538,7 @@ func (ds *DataStore) AddENI(eniID string, deviceNumber int, isPrimary, isTrunk, DeviceNumber: deviceNumber, AvailableIPv4Cidrs: make(map[string]*CidrInfo)} + ds.GetENIUtilization() enis.Set(float64(len(ds.eniPool))) // Initialize ENI IPs In Use to 0 when an ENI is created eniIPsInUse.WithLabelValues(eniID).Set(0) @@ -822,6 +823,7 @@ func (ds *DataStore) assignPodIPAddressUnsafe(addr *AddressInfo, ipamKey IPAMKey addr.IPAMMetadata = ipamMetadata addr.AssignedTime = assignedTime + ds.log.Debugf("IP allocation request") ds.assigned++ // Prometheus gauge assignedIPs.Set(float64(ds.assigned)) @@ -838,6 +840,7 @@ func (ds *DataStore) unassignPodIPAddressUnsafe(addr *AddressInfo) { addr.IPAMKey = IPAMKey{} // unassign the addr addr.IPAMMetadata = IPAMMetadata{} ds.assigned-- + ds.log.Debugf("IP deallocation request") // Prometheus gauge assignedIPs.Set(float64(ds.assigned)) } @@ -891,6 +894,24 @@ func (ds *DataStore) GetIPStats(addressFamily string) *DataStoreStats { return stats } +// GetENIUtilization updates a Prometheus gauge vector with each ENIs id and how many ip addresses are assigned on it +func (ds *DataStore) GetENIUtilization() { + //eniUtilization.Reset() + for _, eni := range ds.eniPool { + count := 0 + for _, assignedAddr := range eni.AvailableIPv4Cidrs { + for _, addr := range assignedAddr.IPAddresses { + if addr.Assigned() { + count += 1 + } + } + } + utilization := count + eniID := eni.ID + eniUtilization.WithLabelValues(eniID).Set(float64(utilization)) + } +} + // GetTrunkENI returns the trunk ENI ID or an empty string func (ds *DataStore) GetTrunkENI() string { ds.lock.Lock() diff --git a/pkg/ipamd/ipamd.go b/pkg/ipamd/ipamd.go index 626e8ef9ee..e6dc667901 100644 --- a/pkg/ipamd/ipamd.go +++ b/pkg/ipamd/ipamd.go @@ -679,6 +679,8 @@ func (c *IPAMContext) updateIPPoolIfRequired(ctx context.Context) { if c.shouldRemoveExtraENIs() { c.tryFreeENI() } + // Prometheus Metric + c.dataStore.GetENIUtilization() } // decreaseDatastorePool runs every `interval` and attempts to return unused ENIs and IPs diff --git a/test/integration/warm-pool/clear_warm_env.go b/test/integration/warm-pool/clear_warm_env.go new file mode 100644 index 0000000000..d2774bea26 --- /dev/null +++ b/test/integration/warm-pool/clear_warm_env.go @@ -0,0 +1,26 @@ +package warm_pool + +import ( + k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" + . "github.com/onsi/ginkgo/v2" +) + +// Environment variables are not reset before and after each test so that way multiple tests can be run to +// evaluate behavior. You can run this test which will unset all warm pool environment variables. Or, if you +// want to test the behavior with some of those environment variables set, alter them in that file and run it once before +// you run the desired tests. +var _ = Describe("clear warm env", func() { + Context("Clear out environment variables for warm pool for testing", func() { + + It("Unsetting env variables", func() { + k8sUtils.UpdateEnvVarOnDaemonSetAndWaitUntilReady(f, "aws-node", "kube-system", + "aws-node", map[string]string{}, + map[string]struct{}{ + "WARM_ENI_TARGET": {}, + "WARM_IP_TARGET": {}, + "MINIMUM_IP_TARGET": {}, + "WARM_PREFIX_TARGET": {}, + }) + }) + }) +}) diff --git a/test/integration/warm-pool/set_warm_env.go b/test/integration/warm-pool/set_warm_env.go new file mode 100644 index 0000000000..e46be8ecdf --- /dev/null +++ b/test/integration/warm-pool/set_warm_env.go @@ -0,0 +1,26 @@ +package warm_pool + +import ( + k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" + . "github.com/onsi/ginkgo/v2" + "strconv" +) + +// Environment variables are not reset before and after each test so that way multiple tests can be run to +// evaluate behavior. You can run this test which will unset all warm pool environment variables. Or, if you +// want to test the behavior with some of those environment variables set, alter them in that file and run it once before +// you run the desired tests. +var _ = Describe("set warm env", func() { + Context("Sets env variables", func() { + + It("Sets env variables", func() { + k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, + utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, + map[string]string{ + "WARM_IP_TARGET": strconv.Itoa(0), + "ENABLE_DYNAMIC_WARM_POOL": strconv.FormatBool(true), + }) + }) + }) +}) diff --git a/test/integration/warm-pool/use_case_1_test.go b/test/integration/warm-pool/use_case_1_test.go new file mode 100644 index 0000000000..5c985745ad --- /dev/null +++ b/test/integration/warm-pool/use_case_1_test.go @@ -0,0 +1,98 @@ +package warm_pool + +import ( + "fmt" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + "time" +) + +var primaryNode v1.Node + +// This test scales up the cluster to maxPods, then scales it back down to minPods. +var _ = Describe("use case 1", func() { + Context("Quick Scale Up and Down", func() { + + BeforeEach(func() { + By("Getting Warm Pool Environment Variables Before Test") + getWarmPoolEnvVars() + }) + + It("Scales the cluster and checks warm pool before and after", func() { + fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) + + start := time.Now().Unix() + + fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods\n", minPods) + deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). + Namespace("default"). + Name("busybox"). + NodeName(primaryNode.Name). + Namespace(utils.DefaultTestNamespace). + Replicas(minPods). + Build() + + _, err := f.K8sResourceManagers. + DeploymentManager(). + CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) + Expect(err).ToNot(HaveOccurred()) + + if minPods != 0 { + time.Sleep(sleep) + } + + fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods\n", maxPods) + quickScale(maxPods) + + Expect(maxPods).To(Equal(busyboxPodCnt())) + + fmt.Fprintf(GinkgoWriter, "Scaling cluster down to %v pods\n", minPods) + quickScale(minPods) + + end := time.Now().Unix() + + fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) + fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) + + By("Starting Curl Container") + curlContainer := manifest.NewCurlContainer(). + Command([]string{"sleep", "1000"}).Build() + + getCurlPod := manifest.NewDefaultPodBuilder(). + Name("curl-pod"). + Namespace(utils.DefaultTestNamespace). + NodeName(primaryNode.Name). + HostNetwork(true). + Container(curlContainer). + Build() + + testPod, err := f.K8sResourceManagers.PodManager(). + CreateAndWaitTillPodCompleted(getCurlPod) + + logs, errLogs := f.K8sResourceManagers.PodManager(). + PodLogs(testPod.Namespace, testPod.Name) + Expect(errLogs).ToNot(HaveOccurred()) + fmt.Fprintln(GinkgoWriter, logs) + + By("Fetching metrics via Curl Container") + getMetrics(start, end) + + By("Deleting the deployment") + err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) + Expect(err).NotTo(HaveOccurred()) + + By("Deleting Curl Container") + err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) + Expect(err).NotTo(HaveOccurred()) + }) + + AfterEach(func() { + By("Getting Warm Pool Environment Variables After Test") + getWarmPoolEnvVars() + + }) + }) +}) diff --git a/test/integration/warm-pool/use_case_2_test.go b/test/integration/warm-pool/use_case_2_test.go new file mode 100644 index 0000000000..de26bd1938 --- /dev/null +++ b/test/integration/warm-pool/use_case_2_test.go @@ -0,0 +1,104 @@ +package warm_pool + +import ( + "fmt" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" + "strconv" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// This test replicates sawtooth behavior by adding a fixed amount of pods and removing the same fixed amount of pods +// over a preset number of iterations. +var _ = Describe("use case 2", func() { + Context("Sawtooth Fixed Add and Subtract", func() { + + BeforeEach(func() { + By("Getting Warm Pool Environment Variables Before Test") + getWarmPoolEnvVars() + }) + + It("Scales the cluster and checks warm pool before and after", func() { + replicas := minPods + + start := time.Now().Unix() + + fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) + deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). + Namespace("default"). + Name("busybox"). + NodeName(primaryNode.Name). + Namespace(utils.DefaultTestNamespace). + Replicas(replicas). + Build() + + _, err := f.K8sResourceManagers. + DeploymentManager(). + CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) + Expect(err).ToNot(HaveOccurred()) + + if minPods != 0 { + time.Sleep(sleep) + } + + for i := 0; i < iterations; i++ { + By("Loop " + strconv.Itoa(i)) + replicas = checkInRange(replicas + iterPods) + fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods\n", replicas) + quickScale(replicas) + Expect(replicas).To(Equal(busyboxPodCnt())) + + replicas = checkInRange(replicas - iterPods) + fmt.Fprintf(GinkgoWriter, "Scaling cluster down to %v pods\n", replicas) + quickScale(replicas) + Expect(replicas).To(Equal(busyboxPodCnt())) + } + + Expect(minPods).To(Equal(busyboxPodCnt())) + + end := time.Now().Unix() + + fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) + fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) + + By("Starting Curl Container") + curlContainer := manifest.NewCurlContainer(). + Command([]string{"sleep", "3600"}).Build() + + getCurlPod := manifest.NewDefaultPodBuilder(). + Name("curl-pod"). + Namespace(utils.DefaultTestNamespace). + NodeName(primaryNode.Name). + HostNetwork(true). + Container(curlContainer). + Build() + + testPod, err := f.K8sResourceManagers.PodManager(). + CreateAndWaitTillPodCompleted(getCurlPod) + + logs, errLogs := f.K8sResourceManagers.PodManager(). + PodLogs(testPod.Namespace, testPod.Name) + Expect(errLogs).ToNot(HaveOccurred()) + fmt.Fprintln(GinkgoWriter, logs) + + By("Fetching metrics via Curl Container") + getMetrics(start, end) + + By("Deleting the deployment") + err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) + Expect(err).NotTo(HaveOccurred()) + + By("Deleting Curl Container") + err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) + Expect(err).NotTo(HaveOccurred()) + }) + + AfterEach(func() { + By("Getting Warm Pool Environment Variables After Test") + getWarmPoolEnvVars() + }) + }) +}) diff --git a/test/integration/warm-pool/use_case_3_test.go b/test/integration/warm-pool/use_case_3_test.go new file mode 100644 index 0000000000..7a365c7ddc --- /dev/null +++ b/test/integration/warm-pool/use_case_3_test.go @@ -0,0 +1,112 @@ +package warm_pool + +import ( + "fmt" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" + "math/rand" + "strconv" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// This test adds a random amount of pods and then subtracts a random amount of pods, limited to the number of preset +// iterations. The number of pods should not drop below minPods. +var _ = Describe("use case 3", func() { + Context("Random Scale Fixed Add and Subtract", func() { + + BeforeEach(func() { + By("Getting Warm Pool Environment Variables Before Test") + getWarmPoolEnvVars() + }) + + It("Scales the cluster and checks warm pool before and after", func() { + replicas := minPods + + start := time.Now().Unix() + + fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) + deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). + Namespace("default"). + Name("busybox"). + NodeName(primaryNode.Name). + Namespace(utils.DefaultTestNamespace). + Replicas(replicas). + Build() + + _, err := f.K8sResourceManagers. + DeploymentManager(). + CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) + Expect(err).ToNot(HaveOccurred()) + + if minPods != 0 { + time.Sleep(sleep) + } + + for i := 0; i < iterations; i++ { + By("Loop " + strconv.Itoa(i)) + randPods := incIf(rand.Intn(randDigits)) + // Will scale to a maximum of maxPods + replicas = min(replicas+randPods, maxPods) + fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods by adding %v pods\n", replicas, randPods) + quickScale(replicas) + Expect(replicas).To(Equal(busyboxPodCnt())) + + randPods = incIf(rand.Intn(randDigits)) + // Will scale to a minimum of minPods pods + replicas = max(replicas-randPods, minPods) + fmt.Fprintf(GinkgoWriter, "Scaling cluster down to %v pods by subtracting %v pods\n", replicas, + randPods) + quickScale(replicas) + Expect(replicas).To(Equal(busyboxPodCnt())) + + if replicas == maxPods { + break + } + } + + end := time.Now().Unix() + + fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) + fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) + + By("Starting Curl Container") + curlContainer := manifest.NewCurlContainer(). + Command([]string{"sleep", "3600"}).Build() + + getCurlPod := manifest.NewDefaultPodBuilder(). + Name("curl-pod"). + Namespace(utils.DefaultTestNamespace). + NodeName(primaryNode.Name). + HostNetwork(true). + Container(curlContainer). + Build() + + testPod, err := f.K8sResourceManagers.PodManager(). + CreateAndWaitTillPodCompleted(getCurlPod) + + logs, errLogs := f.K8sResourceManagers.PodManager(). + PodLogs(testPod.Namespace, testPod.Name) + Expect(errLogs).ToNot(HaveOccurred()) + fmt.Fprintln(GinkgoWriter, logs) + + By("Fetching metrics via Curl Container") + getMetrics(start, end) + + By("Deleting the deployment") + err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) + Expect(err).NotTo(HaveOccurred()) + + By("Deleting Curl Container") + err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) + Expect(err).NotTo(HaveOccurred()) + }) + + AfterEach(func() { + By("Getting Warm Pool Environment Variables After Test") + getWarmPoolEnvVars() + }) + }) +}) diff --git a/test/integration/warm-pool/use_case_4_test.go b/test/integration/warm-pool/use_case_4_test.go new file mode 100644 index 0000000000..7a8487b4ff --- /dev/null +++ b/test/integration/warm-pool/use_case_4_test.go @@ -0,0 +1,102 @@ +package warm_pool + +import ( + "fmt" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" + "strconv" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// This test does a random operation with a random number of pods over a preset number of iterations. +var _ = Describe("use case 4", func() { + Context("Random Scale Random Add and Subtract Operations", func() { + + BeforeEach(func() { + By("Getting Warm Pool Environment Variables Before Test") + getWarmPoolEnvVars() + }) + + It("Scales the cluster and checks warm pool before and after", func() { + replicas := minPods + + fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) + + start := time.Now().Unix() + + deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). + Namespace("default"). + Name("busybox"). + NodeName(primaryNode.Name). + Namespace(utils.DefaultTestNamespace). + Replicas(replicas). + Build() + + _, err := f.K8sResourceManagers. + DeploymentManager(). + CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) + Expect(err).ToNot(HaveOccurred()) + + if minPods != 0 { + time.Sleep(sleep) + } + + for i := 0; i < iterations; i++ { + By("Loop " + strconv.Itoa(i)) + result, op, randPods := randOpLoop(replicas) + replicas = checkInRange(result) + if op == "no change" { + fmt.Fprintf(GinkgoWriter, "No change to cluster, %v pods", replicas) + } else { + fmt.Fprintf(GinkgoWriter, "Scaling cluster to %v pods by %v %v pods\n", replicas, op, randPods) + } + quickScale(replicas) + Expect(replicas).To(Equal(busyboxPodCnt())) + } + + end := time.Now().Unix() + + fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) + fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) + + By("Starting Curl Container") + curlContainer := manifest.NewCurlContainer(). + Command([]string{"sleep", "3600"}).Build() + + getCurlPod := manifest.NewDefaultPodBuilder(). + Name("curl-pod"). + Namespace(utils.DefaultTestNamespace). + NodeName(primaryNode.Name). + HostNetwork(true). + Container(curlContainer). + Build() + + testPod, err := f.K8sResourceManagers.PodManager(). + CreateAndWaitTillPodCompleted(getCurlPod) + + logs, errLogs := f.K8sResourceManagers.PodManager(). + PodLogs(testPod.Namespace, testPod.Name) + Expect(errLogs).ToNot(HaveOccurred()) + fmt.Fprintln(GinkgoWriter, logs) + + By("Fetching metrics via Curl Container") + getMetrics(start, end) + + By("Deleting the deployment") + err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) + Expect(err).NotTo(HaveOccurred()) + + By("Deleting Curl Container") + err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) + Expect(err).NotTo(HaveOccurred()) + }) + + AfterEach(func() { + By("Getting Warm Pool Environment Variables After Test") + getWarmPoolEnvVars() + }) + }) +}) diff --git a/test/integration/warm-pool/use_case_5_test.go b/test/integration/warm-pool/use_case_5_test.go new file mode 100644 index 0000000000..4b9dbf0c74 --- /dev/null +++ b/test/integration/warm-pool/use_case_5_test.go @@ -0,0 +1,113 @@ +package warm_pool + +import ( + "fmt" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" + "strconv" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// This test scales the cluster up to maxPods by a preset scale and back down again by that same scale +var _ = Describe("use case 5", func() { + Context("Proportionate Scaling", func() { + + BeforeEach(func() { + By("Getting Warm Pool Environment Variables Before Test") + getWarmPoolEnvVars() + }) + + It("Scales the cluster and checks warm pool before and after", func() { + replicas := minPods + + start := time.Now().Unix() + + fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) + deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). + Namespace("default"). + Name("busybox"). + NodeName(primaryNode.Name). + Namespace(utils.DefaultTestNamespace). + Replicas(replicas). + Build() + + _, err := f.K8sResourceManagers. + DeploymentManager(). + CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) + Expect(err).ToNot(HaveOccurred()) + + if minPods != 0 { + time.Sleep(sleep) + } + + scaleAmt := maxPods * scale + + for replicas < maxPods { + i := 0 + By("Loop " + strconv.Itoa(i)) + // Will scale to a maximum of maxPods + replicas = min(replicas+int(scaleAmt), maxPods) + fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods by adding %v pods\n", replicas, int(scaleAmt)) + quickScale(replicas) + Expect(replicas).To(Equal(busyboxPodCnt())) + i++ + } + + for replicas > minPods { + i := 0 + By("Loop " + strconv.Itoa(i)) + // Will scale to a minimum of minPods + replicas = max(replicas-int(scaleAmt), minPods) + fmt.Fprintf(GinkgoWriter, "Scaling cluster down to %v pods by subtracting %v pods\n", replicas, + int(scaleAmt)) + quickScale(replicas) + Expect(replicas).To(Equal(busyboxPodCnt())) + i++ + } + + end := time.Now().Unix() + + fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) + fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) + + By("Starting Curl Container") + curlContainer := manifest.NewCurlContainer(). + Command([]string{"sleep", "3600"}).Build() + + getCurlPod := manifest.NewDefaultPodBuilder(). + Name("curl-pod"). + Namespace(utils.DefaultTestNamespace). + NodeName(primaryNode.Name). + HostNetwork(true). + Container(curlContainer). + Build() + + testPod, err := f.K8sResourceManagers.PodManager(). + CreateAndWaitTillPodCompleted(getCurlPod) + + logs, errLogs := f.K8sResourceManagers.PodManager(). + PodLogs(testPod.Namespace, testPod.Name) + Expect(errLogs).ToNot(HaveOccurred()) + fmt.Fprintln(GinkgoWriter, logs) + + By("Fetching metrics via Curl Container") + getMetrics(start, end) + + By("Deleting the deployment") + err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) + Expect(err).NotTo(HaveOccurred()) + + By("Deleting Curl Container") + err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) + Expect(err).NotTo(HaveOccurred()) + }) + + AfterEach(func() { + By("Getting Warm Pool Environment Variables After Test") + getWarmPoolEnvVars() + }) + }) +}) diff --git a/test/integration/warm-pool/use_case_6_test.go b/test/integration/warm-pool/use_case_6_test.go new file mode 100644 index 0000000000..84c85b4392 --- /dev/null +++ b/test/integration/warm-pool/use_case_6_test.go @@ -0,0 +1,117 @@ +package warm_pool + +import ( + "fmt" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" + "math/rand" + "strconv" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// This test will scale the cluster up to maxPods with a scale amount that is randomly calculated each loop based on +// maxPods and scales the cluster back down with the same way +var _ = Describe("use case 6", func() { + Context("Random Scaling", func() { + + BeforeEach(func() { + By("Getting Warm Pool Environment Variables Before Test") + getWarmPoolEnvVars() + }) + + It("Scales the cluster and checks warm pool before and after", func() { + replicas := minPods + + start := time.Now().Unix() + + fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) + deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). + Namespace("default"). + Name("busybox"). + NodeName(primaryNode.Name). + Namespace(utils.DefaultTestNamespace). + Replicas(replicas). + Build() + + _, err := f.K8sResourceManagers. + DeploymentManager(). + CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) + Expect(err).ToNot(HaveOccurred()) + + if minPods != 0 { + time.Sleep(sleep) + } + + for replicas < maxPods { + i := 0 + randScale := rand.Intn(randDigits) + 1 // prevent divide by 0 + scaleAmt := int(maxPods / randScale) + By("Loop " + strconv.Itoa(i)) + // Will scale to a maximum of maxPods + replicas = min(replicas+scaleAmt, maxPods) + fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods by adding %v pods\n", replicas, scaleAmt) + quickScale(replicas) + Expect(replicas).To(Equal(busyboxPodCnt())) + i++ + } + + for replicas > minPods { + i := 0 + randScale := rand.Intn(randDigits) + scaleAmt := int(maxPods/randScale) + 1 // prevent divide by 0 + By("Loop " + strconv.Itoa(i)) + // Will scale to a minimum of minPods + replicas = max(replicas-scaleAmt, minPods) + fmt.Fprintf(GinkgoWriter, "Scaling cluster down to %v pods by subtracting %v pods\n", replicas, + scaleAmt) + quickScale(replicas) + Expect(replicas).To(Equal(busyboxPodCnt())) + i++ + } + + end := time.Now().Unix() + + fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) + fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) + + By("Starting Curl Container") + curlContainer := manifest.NewCurlContainer(). + Command([]string{"sleep", "3600"}).Build() + + getCurlPod := manifest.NewDefaultPodBuilder(). + Name("curl-pod"). + Namespace(utils.DefaultTestNamespace). + NodeName(primaryNode.Name). + HostNetwork(true). + Container(curlContainer). + Build() + + testPod, err := f.K8sResourceManagers.PodManager(). + CreateAndWaitTillPodCompleted(getCurlPod) + + logs, errLogs := f.K8sResourceManagers.PodManager(). + PodLogs(testPod.Namespace, testPod.Name) + Expect(errLogs).ToNot(HaveOccurred()) + fmt.Fprintln(GinkgoWriter, logs) + + By("Fetching metrics via Curl Container") + getMetrics(start, end) + + By("Deleting the deployment") + err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) + Expect(err).NotTo(HaveOccurred()) + + By("Deleting Curl Container") + err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) + Expect(err).NotTo(HaveOccurred()) + }) + + AfterEach(func() { + By("Getting Warm Pool Environment Variables After Test") + getWarmPoolEnvVars() + }) + }) +}) diff --git a/test/integration/warm-pool/use_case_7_test.go b/test/integration/warm-pool/use_case_7_test.go new file mode 100644 index 0000000000..bc8b43ce44 --- /dev/null +++ b/test/integration/warm-pool/use_case_7_test.go @@ -0,0 +1,114 @@ +package warm_pool + +import ( + "fmt" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" + "math/rand" + "strconv" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// This test will create a single burst to maxPods at a random interval depending on the preset number of iterations +var _ = Describe("use case 7", func() { + Context("Single Burst Behavior", func() { + + BeforeEach(func() { + By("Getting Warm Pool Environment Variables Before Test") + getWarmPoolEnvVars() + }) + + It("Scales the cluster and checks warm pool before and after", func() { + replicas := minPods + + start := time.Now().Unix() + + fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) + deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). + Namespace("default"). + Name("busybox"). + NodeName(primaryNode.Name). + Namespace(utils.DefaultTestNamespace). + Replicas(replicas). + Build() + + _, err := f.K8sResourceManagers. + DeploymentManager(). + CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) + Expect(err).ToNot(HaveOccurred()) + + if minPods != 0 { + time.Sleep(sleep) + } + + randBurst := rand.Intn(iterations) + + for i := 0; i < iterations; i++ { + By("Loop " + strconv.Itoa(i)) + + if i == randBurst { + fmt.Fprintf(GinkgoWriter, "Burst behavior from %v to %v pods\n", replicas, maxPods) + quickScale(maxPods) + continue + } + + if i == randBurst+1 { + fmt.Fprintf(GinkgoWriter, "Burst behavior over, scaling down from %v to %v pods\n", maxPods, + replicas) + quickScale(replicas) + continue + } + + result, op := randOp(replicas, iterPods) + replicas = checkInRange(result) + fmt.Fprintf(GinkgoWriter, "%v %v pod to cluster to equal %v pods\n", op, iterPods, replicas) + quickScale(replicas) + Expect(replicas).To(Equal(busyboxPodCnt())) + } + + end := time.Now().Unix() + + fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) + fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) + + By("Starting Curl Container") + curlContainer := manifest.NewCurlContainer(). + Command([]string{"sleep", "3600"}).Build() + + getCurlPod := manifest.NewDefaultPodBuilder(). + Name("curl-pod"). + Namespace(utils.DefaultTestNamespace). + NodeName(primaryNode.Name). + HostNetwork(true). + Container(curlContainer). + Build() + + testPod, err := f.K8sResourceManagers.PodManager(). + CreateAndWaitTillPodCompleted(getCurlPod) + + logs, errLogs := f.K8sResourceManagers.PodManager(). + PodLogs(testPod.Namespace, testPod.Name) + Expect(errLogs).ToNot(HaveOccurred()) + fmt.Fprintln(GinkgoWriter, logs) + + By("Fetching metrics via Curl Container") + getMetrics(start, end) + + By("Deleting the deployment") + err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) + Expect(err).NotTo(HaveOccurred()) + + By("Deleting Curl Container") + err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) + Expect(err).NotTo(HaveOccurred()) + }) + + AfterEach(func() { + By("Getting Warm Pool Environment Variables After Test") + getWarmPoolEnvVars() + }) + }) +}) diff --git a/test/integration/warm-pool/use_case_8_test.go b/test/integration/warm-pool/use_case_8_test.go new file mode 100644 index 0000000000..9ec6f10f2f --- /dev/null +++ b/test/integration/warm-pool/use_case_8_test.go @@ -0,0 +1,122 @@ +package warm_pool + +import ( + "fmt" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" + "math/rand" + "strconv" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// This test will simulate a preset number of bursts of different sizes relative to maxPods that occur at random +// intervals over a preset number of iterations +var _ = Describe("use case 8", func() { + Context("Multiple Burst Behavior", func() { + + BeforeEach(func() { + By("Getting Warm Pool Environment Variables Before Test") + getWarmPoolEnvVars() + }) + + It("Scales the cluster and checks warm pool before and after", func() { + replicas := minPods + + start := time.Now().Unix() + + fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) + deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). + Namespace("default"). + Name("busybox"). + NodeName(primaryNode.Name). + Namespace(utils.DefaultTestNamespace). + Replicas(replicas). + Build() + + _, err := f.K8sResourceManagers. + DeploymentManager(). + CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) + Expect(err).ToNot(HaveOccurred()) + + if minPods != 0 { + time.Sleep(sleep) + } + + // Creates some bursts of different sizes at random iterations. + burstIdx := rand.Perm(iterations)[:numBursts] + burstMap := make(map[int]int) + for i := 0; i < len(burstIdx); i++ { + key := burstIdx[i] + //value := incIf(rand.Intn(maxPods + 1)) + //value := int(maxPods / (rand.Intn(4) + 1)) + value := int(maxPods) + burstMap[key] = value + } + + for i := 0; i < iterations; i++ { + By("Loop " + strconv.Itoa(i)) + + val, present := burstMap[i] + if present { + fmt.Fprintf(GinkgoWriter, "Burst behavior from %v to %v pods\n", replicas, val) + quickScale(val) + + fmt.Fprintf(GinkgoWriter, "Burst behavior over, scaling down from %v to %v pods\n", val, + replicas) + quickScale(replicas) + continue + } + + result, op := randOp(replicas, iterPods) + replicas = checkInRange(result) + fmt.Fprintf(GinkgoWriter, "%v %v pod from cluster to equal %v pods\n", op, iterPods, replicas) + quickScale(replicas) + Expect(replicas).To(Equal(busyboxPodCnt())) + } + + end := time.Now().Unix() + + fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) + fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) + + By("Starting Curl Container") + curlContainer := manifest.NewCurlContainer(). + Command([]string{"sleep", "3600"}).Build() + + getCurlPod := manifest.NewDefaultPodBuilder(). + Name("curl-pod"). + Namespace(utils.DefaultTestNamespace). + NodeName(primaryNode.Name). + HostNetwork(true). + Container(curlContainer). + Build() + + testPod, err := f.K8sResourceManagers.PodManager(). + CreateAndWaitTillPodCompleted(getCurlPod) + + logs, errLogs := f.K8sResourceManagers.PodManager(). + PodLogs(testPod.Namespace, testPod.Name) + Expect(errLogs).ToNot(HaveOccurred()) + fmt.Fprintln(GinkgoWriter, logs) + + By("Fetching metrics via Curl Container") + getMetrics(start, end) + + By("Deleting the deployment") + err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) + Expect(err).NotTo(HaveOccurred()) + + By("Deleting Curl Container") + err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) + Expect(err).NotTo(HaveOccurred()) + }) + + AfterEach(func() { + By("Getting Warm Pool Environment Variables After Test") + getWarmPoolEnvVars() + }) + }) +}) diff --git a/test/integration/warm-pool/use_case_9_test.go b/test/integration/warm-pool/use_case_9_test.go new file mode 100644 index 0000000000..ce372e1c10 --- /dev/null +++ b/test/integration/warm-pool/use_case_9_test.go @@ -0,0 +1,115 @@ +package warm_pool + +import ( + "fmt" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" + "math/rand" + "strconv" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// This test will add a random amount of pods until it gets to maxPods, then subtract a random amount of pods until it +// gets to minPods +var _ = Describe("use case 9", func() { + Context("Random Add to Max, Random Sub to Min", func() { + + BeforeEach(func() { + By("Getting Warm Pool Environment Variables Before Test") + getWarmPoolEnvVars() + }) + + It("Scales the cluster and checks warm pool before and after", func() { + replicas := minPods + + start := time.Now().Unix() + + fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) + deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). + Namespace("default"). + Name("busybox"). + NodeName(primaryNode.Name). + Namespace(utils.DefaultTestNamespace). + Replicas(replicas). + Build() + + _, err := f.K8sResourceManagers. + DeploymentManager(). + CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) + Expect(err).ToNot(HaveOccurred()) + + if minPods != 0 { + time.Sleep(sleep) + } + + for replicas < maxPods { + i := 0 + randPods := incIf(rand.Intn(randDigits)) + By("Loop " + strconv.Itoa(i)) + // Will scale to a maximum of maxPods + replicas = min(replicas+randPods, maxPods) + fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods by adding %v pods\n", replicas, randPods) + quickScale(replicas) + Expect(replicas).To(Equal(busyboxPodCnt())) + i++ + } + + for replicas > minPods { + i := 0 + randPods := incIf(rand.Intn(randDigits)) + By("Loop " + strconv.Itoa(i)) + // Will scale to a minimum of minPods + replicas = max(replicas-randPods, minPods) + fmt.Fprintf(GinkgoWriter, "Scaling cluster down to %v pods by subtracting %v pods\n", replicas, + randPods) + quickScale(replicas) + Expect(replicas).To(Equal(busyboxPodCnt())) + i++ + } + + end := time.Now().Unix() + + fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) + fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) + + By("Starting Curl Container") + curlContainer := manifest.NewCurlContainer(). + Command([]string{"sleep", "3600"}).Build() + + getCurlPod := manifest.NewDefaultPodBuilder(). + Name("curl-pod"). + Namespace(utils.DefaultTestNamespace). + NodeName(primaryNode.Name). + HostNetwork(true). + Container(curlContainer). + Build() + + testPod, err := f.K8sResourceManagers.PodManager(). + CreateAndWaitTillPodCompleted(getCurlPod) + + logs, errLogs := f.K8sResourceManagers.PodManager(). + PodLogs(testPod.Namespace, testPod.Name) + Expect(errLogs).ToNot(HaveOccurred()) + fmt.Fprintln(GinkgoWriter, logs) + + By("Fetching metrics via Curl Container") + getMetrics(start, end) + + By("Deleting the deployment") + err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) + Expect(err).NotTo(HaveOccurred()) + + By("Deleting Curl Container") + err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) + Expect(err).NotTo(HaveOccurred()) + }) + + AfterEach(func() { + By("Getting Warm Pool Environment Variables After Test") + getWarmPoolEnvVars() + }) + }) +}) diff --git a/test/integration/warm-pool/warm_pool_suite_test.go b/test/integration/warm-pool/warm_pool_suite_test.go new file mode 100644 index 0000000000..0f8d6dadfa --- /dev/null +++ b/test/integration/warm-pool/warm_pool_suite_test.go @@ -0,0 +1,369 @@ +// VPC Warm Pool Test Suite +// This test suite is a foundation for evaluating a dynamic warm pool, or ip consumption in general. Pair with grafana +//metrics dashboard to look at ip allocation and consumption. Each test displays the warm pool environment variables +//before and after to evaluate the changes made to the warm pool. Environment variables are not reset before and after +//each test so that way multiple tests can be run to evaluate behavior. You can run the test "clear warm env" which will +//unset all warm pool environment variables. Or, if you want to test the behavior with some of those environment +//variables set, alter them in that test and run it once before you run the desired tests. +// Use Case Test 1: Quick Scale Up and Down +// Use Case Test 2: Sawtooth Fixed Add and Subtract +// Use Case Test 3: Random Scale Fixed Add and Subtract +// Use Case Test 4: Random Scale Random Add and Subtract Operations +// Use Case Test 5: Proportionate Scaling +// Use Case Test 6: Random Scaling +// Use Case Test 7: Single Burst Behavior +// Use Case Test 8: Multiple Burst Behavior +// Use Case Test 9: Random Add to Max, Random Sub to Min + +package warm_pool + +import ( + "encoding/json" + "fmt" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" + "github.com/aws/aws-sdk-go/service/ec2" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "math/rand" + "slices" + "strconv" + "testing" + "time" + + "github.com/aws/amazon-vpc-cni-k8s/test/framework" + k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" +) + +// Warm Pool Test Suite Constants +// Run all tests with these constants or change individual tests to get desired outcome +// Environment variables are used in the tests listed in the (...) +const ( + randDigits = 10 // exclusive, used in rand.Intn to change scale amount, <= maxPods, (3,6,9) + scale = 0.25 // used in set proportional scaling, iterate with a fixed percentage (5) + iterations = 2 // run test over a set number of iterations (2,3,4,7,8) + iterPods = 1 // iterate with a fixed number of pods (2,7,8) + numBursts = 2 // Use Case Test 8, set number of bursts (8) + preventNoChange = 1 // retries x amount of times if randInt/randOp is out of range, if out of range no cluster + // scaling occurs, if set above 0 will increment some areas of no cluster scaling (3, 4, 6, 8, 9) + maxPods = 60 // max pods you want to work with for your cluster (all) + minPods = 0 // tests can be run with a base amount of pods at start (all) + sleep = 1 * time.Minute // sleep interval (all) +) + +var clusterIP = "10.100.140.129" // Get the cluster ip of the prometheus-server service +var primaryInstance *ec2.Instance +var f *framework.Framework +var err error +var coreDNSDeploymentCopy *v1.Deployment + +const CoreDNSDeploymentName = "coredns" +const KubeSystemNamespace = "kube-system" + +type Result struct { + Status string `json:"status"` + Data struct { + ResultType string `json:"resultType"` + Result []struct { + Metric struct { + Name string `json:"__name__"` + AppKubernetesIoInstance string `json:"app_kubernetes_io_instance"` + AppKubernetesIoName string `json:"app_kubernetes_io_name"` + ControllerRevisionHash string `json:"controller_revision_hash"` + Instance string `json:"instance"` + Job string `json:"job"` + K8SApp string `json:"k8s_app"` + Namespace string `json:"namespace"` + Node string `json:"node"` + Pod string `json:"pod"` + PodTemplateGeneration string `json:"pod_template_generation"` + } + Values [][2]interface{} `json:"values"` + } + } +} + +func TestWarmPool(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "VPC Warm Pool Test Suite") +} + +var _ = BeforeSuite(func() { + f = framework.New(framework.GlobalOptions) + + By("creating test namespace") + f.K8sResourceManagers.NamespaceManager(). + CreateNamespace(utils.DefaultTestNamespace) + + nodeList, err := f.K8sResourceManagers.NodeManager().GetNodes(f.Options.NgNameLabelKey, + f.Options.NgNameLabelVal) + Expect(err).ToNot(HaveOccurred()) + + numOfNodes := len(nodeList.Items) + Expect(numOfNodes).Should(BeNumerically(">", 1)) + + // Nominate the first untainted node as the one to run coredns deployment against + By("adding nodeSelector in coredns deployment to be scheduled on single node") + var primaryNode *corev1.Node + for _, n := range nodeList.Items { + if len(n.Spec.Taints) == 0 { + primaryNode = &n + break + } + } + Expect(primaryNode).To(Not(BeNil()), "expected to find a non-tainted node") + instanceID := k8sUtils.GetInstanceIDFromNode(*primaryNode) + primaryInstance, err = f.CloudServices.EC2().DescribeInstance(instanceID) + Expect(err).ToNot(HaveOccurred()) + + By("getting node with no pods scheduled to run tests") + coreDNSDeployment, err := f.K8sResourceManagers.DeploymentManager().GetDeployment(CoreDNSDeploymentName, + KubeSystemNamespace) + Expect(err).ToNot(HaveOccurred()) + + // Copy the deployment to restore later + coreDNSDeploymentCopy = coreDNSDeployment.DeepCopy() + + // Add nodeSelector label to coredns deployment so coredns pods are scheduled on 'primary' node + coreDNSDeployment.Spec.Template.Spec.NodeSelector = map[string]string{ + "kubernetes.io/hostname": primaryNode.Labels["kubernetes.io/hostname"], + } + err = f.K8sResourceManagers.DeploymentManager().UpdateAndWaitTillDeploymentIsReady(coreDNSDeployment, + utils.DefaultDeploymentReadyTimeout) + Expect(err).ToNot(HaveOccurred()) + + // Redefine primary node as node without coredns pods. Note that this node may have previously had coredns pods. + for _, n := range nodeList.Items { + if len(n.Spec.Taints) == 0 && n.Name != primaryNode.Name { + primaryNode = &n + break + } + } + fmt.Fprintf(GinkgoWriter, "primary node is %s\n", primaryNode.Name) + instanceID = k8sUtils.GetInstanceIDFromNode(*primaryNode) + primaryInstance, err = f.CloudServices.EC2().DescribeInstance(instanceID) + Expect(err).ToNot(HaveOccurred()) +}) + +var _ = AfterSuite(func() { + // Restore coredns deployment + By("restoring coredns deployment") + err = f.K8sResourceManagers.DeploymentManager().UpdateAndWaitTillDeploymentIsReady(coreDNSDeploymentCopy, + utils.DefaultDeploymentReadyTimeout) + + By("deleting test namespace") + f.K8sResourceManagers.NamespaceManager(). + DeleteAndWaitTillNamespaceDeleted(utils.DefaultTestNamespace) +}) + +// Helper Functions // +func getWarmPoolEnvVars() { + daemonset, _ := f.K8sResourceManagers.DaemonSetManager().GetDaemonSet("kube-system", "aws-node") + warmPoolKeys := [5]string{"WARM_ENI_TARGET", "MINIMUM_IP_TARGET", "WARM_IP_TARGET", "WARM_PREFIX_TARGET", + "ENABLE_DYNAMIC_WARM_POOL"} + print("----\n") + for _, key := range warmPoolKeys { + val := utils.GetEnvValueForKeyFromDaemonSet(key, daemonset) + if val != "" { + print(" -", key, " : ", val, "\n") + } else { + print(" -", key, " : not set", "\n") + } + } + print("----\n") +} + +// Basic Prometheus api call +func callPrometheus(url string) Result { + command := []string{"curl", "--silent", "-g", url} + stdout, _, err := f.K8sResourceManagers.PodManager().PodExec(utils.DefaultTestNamespace, "curl-pod", + command) + Expect(err).ToNot(HaveOccurred()) + Expect(stdout).ShouldNot(BeEmpty()) + var result Result + marshallErr := json.Unmarshal([]byte(stdout), &result) + if marshallErr != nil { + fmt.Printf("Cannot unmarshall json: %s", marshallErr) + } + return result +} + +// Gets Prometheus metrics over the duration of the test and displays them +func getMetrics(start int64, end int64) { + warmMetric := "awscni_total_ip_addresses-awscni_assigned_ip_addresses" + noAddrsMetric := "awscni_err_no_avail_addrs" + netMetric := "awscni_assigned_ip_addresses" + duration := strDurationMin(start, end) + step := "30s" + + // warmMetric + netWarmUrl := fmt.Sprintf("http://%s/api/v1/query_range?query=%s&start=%v&end=%v&step=%s", + clusterIP, warmMetric, start, end, step) + resultNetWarm := callPrometheus(netWarmUrl) + fmt.Printf("\n %s", warmMetric) + netMap := make(map[string]int) + fmt.Printf("\nMAX Warm Pool (%v) over test duration: \n", warmMetric) + for i := 0; i < len(resultNetWarm.Data.Result); i++ { + node := resultNetWarm.Data.Result[i].Metric.Node + var maxArr []int + for j := 0; j < len(resultNetWarm.Data.Result[i].Values); j++ { + val, _ := strconv.Atoi(resultNetWarm.Data.Result[i].Values[j][1].(string)) + maxArr = append(maxArr, val) + if j == len(resultNetWarm.Data.Result[i].Values)-1 { + netMap[node] = val + } + } + fmt.Printf("%v : %v \n", node, slices.Max(maxArr)) + } + fmt.Printf("\nNET Warm Pool (%s) over test duration: \n", warmMetric) + for k, v := range netMap { + fmt.Printf("%v : %v \n", k, v) + } + + // noAddrsMetric + fmt.Printf("\n %s", noAddrsMetric) + noAddrUrl := fmt.Sprintf("http://%s/api/v1/query_range?query=delta(%s[%sm])&start=%v&end=%v&step=%s", + clusterIP, noAddrsMetric, duration, start, end, step) + resultNoAddrs := callPrometheus(noAddrUrl) + fmt.Printf("\nMAX DELTA %s over test duration: \n", noAddrsMetric) + for i := 0; i < len(resultNoAddrs.Data.Result); i++ { + node := resultNoAddrs.Data.Result[i].Metric.Node + var maxArr []int + for j := 0; j < len(resultNoAddrs.Data.Result[i].Values); j++ { + val := resultNoAddrs.Data.Result[i].Values[j][1].(string) + floatVal, err := strconv.ParseFloat(val, 64) + if err != nil { + Expect(err).ToNot(HaveOccurred()) + } + maxArr = append(maxArr, int(floatVal)) + } + fmt.Printf("%v : %v \n", node, slices.Max(maxArr)) + } + + // netMetric + fmt.Printf("\n %s", netMetric) + netUrl := fmt.Sprintf("http://%s/api/v1/query_range?query=delta(%s[%sm])&start=%v&end=%v&step=%s", + clusterIP, netMetric, duration, start, end, step) + resultNet := callPrometheus(netUrl) + fmt.Printf("\nMAX DELTA %s over test duration: \n", netMetric) + for i := 0; i < len(resultNet.Data.Result); i++ { + node := resultNet.Data.Result[i].Metric.Node + var maxArr []int + for j := 0; j < len(resultNet.Data.Result[i].Values); j++ { + val := resultNet.Data.Result[i].Values[j][1].(string) + floatVal, err := strconv.ParseFloat(val, 64) + if err != nil { + Expect(err).ToNot(HaveOccurred()) + } + maxArr = append(maxArr, int(floatVal)) + } + fmt.Printf("%v : %v \n", node, slices.Max(maxArr)) + } +} + +// Gets the duration in minutes for Prometheus queries +func strDurationMin(start int64, end int64) string { + duration := (end - start) / 60 + durationMin := strconv.FormatInt(duration, 10) + print("TEST DURATION: ", duration) + return durationMin +} + +// Random operation, if preventNoChange is 0 this includes no change being a result, otherwise it will add or subtract +func randOp(replicas int, pods int) (int, string) { + if preventNoChange == 0 { + op := rand.Intn(3) + if op == 0 { + return replicas + pods, "adding" + } + if op == 1 { + return replicas - pods, "subtracting" + } else { + return replicas, "no change" + } + } else { + op := rand.Intn(2) + if op == 0 { + return replicas + pods, "adding" + } else { + return replicas - pods, "subtracting" + } + } +} + +// Tries to get a random op/number combo that actually changes the cluster. If preventNoChange is above 0, will +// attempt to get another random integer to add/subtract that is within range. This is not always possible depending on +// what iterations and randDigits is set to, so it is best to set preventNoChange to a low number if it is set at all. +// If you want to see periods of no change, set this to 0. +func randOpLoop(replicas int) (int, string, int) { + result := 0 + op := "" + randPods := 0 + for i := 0; i < preventNoChange+1; i++ { + randPods = rand.Intn(randDigits) + result, op = randOp(replicas, randPods) + if result > minPods && result < maxPods && randPods != 0 { + return result, op, randPods + } + } + return result, op, randPods +} + +func quickScale(pods int) { + deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). + Namespace("default"). + Name("busybox"). + NodeName(primaryNode.Name). + Namespace(utils.DefaultTestNamespace). + Replicas(pods). + Build() + + err := f.K8sResourceManagers. + DeploymentManager(). + UpdateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) + Expect(err).ToNot(HaveOccurred()) + + time.Sleep(sleep) +} + +// Check on pod count outside deployment +func busyboxPodCnt() int { + podCount := 0 + podList, _ := f.K8sResourceManagers.PodManager().GetPodsWithLabelSelector("role", "test") + for _, _ = range podList.Items { + podCount += 1 + } + return podCount +} + +func checkInRange(result int) int { + replicas := result + replicas = max(replicas, minPods) + replicas = min(replicas, maxPods) + return replicas +} + +// Tries to prevent no scaling in the cluster as rand.Intn is inclusive with 0, so just scale 1 instead. +func incIf(pods int) int { + if pods == 0 && preventNoChange > 0 { + return 1 + } else { + return pods + } +} + +func max(x, y int) int { + if x < y { + return y + } + return x +} + +func min(x, y int) int { + if y < x { + return y + } + return x +} From 32d71ad64894ce28f7cd3445a64d94e46d31076e Mon Sep 17 00:00:00 2001 From: Lindsay Hanks Date: Mon, 11 Sep 2023 21:44:36 +0000 Subject: [PATCH 2/2] just metrics --- config/grafana/grafana_dashboard.json | 2404 ----------------- pkg/ipamd/datastore/data_store.go | 25 +- pkg/ipamd/ipamd.go | 2 - test/integration/warm-pool/clear_warm_env.go | 26 - test/integration/warm-pool/set_warm_env.go | 26 - test/integration/warm-pool/use_case_1_test.go | 98 - test/integration/warm-pool/use_case_2_test.go | 104 - test/integration/warm-pool/use_case_3_test.go | 112 - test/integration/warm-pool/use_case_4_test.go | 102 - test/integration/warm-pool/use_case_5_test.go | 113 - test/integration/warm-pool/use_case_6_test.go | 117 - test/integration/warm-pool/use_case_7_test.go | 114 - test/integration/warm-pool/use_case_8_test.go | 122 - test/integration/warm-pool/use_case_9_test.go | 115 - .../warm-pool/warm_pool_suite_test.go | 369 --- 15 files changed, 2 insertions(+), 3847 deletions(-) delete mode 100644 config/grafana/grafana_dashboard.json delete mode 100644 test/integration/warm-pool/clear_warm_env.go delete mode 100644 test/integration/warm-pool/set_warm_env.go delete mode 100644 test/integration/warm-pool/use_case_1_test.go delete mode 100644 test/integration/warm-pool/use_case_2_test.go delete mode 100644 test/integration/warm-pool/use_case_3_test.go delete mode 100644 test/integration/warm-pool/use_case_4_test.go delete mode 100644 test/integration/warm-pool/use_case_5_test.go delete mode 100644 test/integration/warm-pool/use_case_6_test.go delete mode 100644 test/integration/warm-pool/use_case_7_test.go delete mode 100644 test/integration/warm-pool/use_case_8_test.go delete mode 100644 test/integration/warm-pool/use_case_9_test.go delete mode 100644 test/integration/warm-pool/warm_pool_suite_test.go diff --git a/config/grafana/grafana_dashboard.json b/config/grafana/grafana_dashboard.json deleted file mode 100644 index 553e60ec1a..0000000000 --- a/config/grafana/grafana_dashboard.json +++ /dev/null @@ -1,2404 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "description": "", - "editable": true, - "fiscalYearStartMonth": 0, - "gnetId": 16032, - "graphTooltip": 0, - "id": 3, - "links": [], - "liveNow": false, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Warm Pool (total-assigned)" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Assigned IPs" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total IPs Remaining" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "id": 94, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "awscni_total_ip_addresses-awscni_assigned_ip_addresses", - "instant": false, - "interval": "", - "legendFormat": "{{node}}", - "refId": "B" - } - ], - "title": "Warm Pool per Node", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 12, - "y": 0 - }, - "id": 51, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum(awscni_assigned_ip_addresses -2)", - "format": "time_series", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "# of Busybox Pods", - "transformations": [ - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "sum(awscni_assigned_ip_addresses -2)" - ] - } - } - } - ], - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "\n\nnormal pods + preemptible pods should match\nthe assigned ip addresses count,\nmodulo pods using the host network", - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 16, - "y": 0 - }, - "id": 83, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "editorMode": "builder", - "expr": "count(awscni_eni_max{k8s_app=\"aws-node\"})", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "AWS Nodes", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 0 - }, - "id": 76, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "code", - "expr": "max(max_over_time(awscni_total_ip_addresses[30m])-(max_over_time(awscni_assigned_ip_addresses[30m])))", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Max Net IP Requests[30m]", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 4 - }, - "id": 103, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "code", - "expr": "delta(awscni_assigned_ip_addresses[24h])", - "hide": false, - "legendFormat": "{{node}}", - "range": true, - "refId": "A" - } - ], - "title": "Net IP Requests[24h]", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Warm Pool (total-assigned)" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Assigned IPs" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total IPs Remaining" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 4, - "w": 12, - "x": 0, - "y": 8 - }, - "id": 80, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "awscni_total_ip_addresses-awscni_assigned_ip_addresses", - "instant": false, - "interval": "", - "legendFormat": "{{node}}", - "refId": "B" - } - ], - "title": "Warm Pool per Node", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 12 - }, - "id": 97, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "code", - "expr": "max_over_time(awscni_total_ip_addresses[30m])-max_over_time(awscni_assigned_ip_addresses[30m])", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Max Net IP Requests[30m]", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 12 - }, - "id": 102, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "code", - "expr": "delta(awscni_assigned_ip_addresses[24h])", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Net IP Requests[24h]", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 12 - }, - "id": 101, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "code", - "expr": "stddev_over_time(awscni_assigned_ip_addresses[24h])", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "StdDev Net IP Requests[24h]", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Warm: total - assigned, already allocated and available\n\nCold: max - total, could be allocated", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [], - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Warm Pool (total-assigned)" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Assigned IPs" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total IPs Remaining per Node" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 12, - "y": 12 - }, - "id": 59, - "options": { - "displayLabels": [], - "legend": { - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "values": [ - "value", - "percent" - ] - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "avg(awscni_total_ip_addresses-awscni_assigned_ip_addresses)", - "instant": false, - "interval": "", - "legendFormat": "Warm Pool (total-assigned)", - "refId": "B" - }, - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "avg(awscni_ip_max-awscni_total_ip_addresses)", - "instant": false, - "interval": "", - "legendFormat": "Total IPs Remaining per Node", - "refId": "A" - }, - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "avg(awscni_assigned_ip_addresses)", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "Assigned IPs", - "refId": "C" - } - ], - "title": "Average IP Allocation per Node", - "type": "piechart" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Warm: total - assigned, already allocated and available\n\nCold: max - total, could be allocated", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [], - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Warm Pool (total-assigned)" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Assigned IPs" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total IPs Remaining" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 10, - "w": 6, - "x": 18, - "y": 12 - }, - "id": 95, - "options": { - "displayLabels": [], - "legend": { - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "values": [ - "value", - "percent" - ] - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum(awscni_total_ip_addresses-awscni_assigned_ip_addresses)", - "instant": false, - "interval": "", - "legendFormat": "Warm Pool (total-assigned)", - "refId": "B" - }, - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum(awscni_ip_max-awscni_total_ip_addresses)", - "instant": false, - "interval": "", - "legendFormat": "Total IPs Remaining", - "refId": "A" - }, - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum(awscni_assigned_ip_addresses)", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "Assigned IPs", - "refId": "C" - } - ], - "title": "Cluster IP Allocation", - "type": "piechart" - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 17 - }, - "id": 61, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "builder", - "expr": "delta(awscni_err_no_avail_addrs{k8s_app=\"aws-node\"}[5m])", - "legendFormat": "{{node}}", - "range": true, - "refId": "A" - } - ], - "title": "No Available Addresses Error", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Assigned IPs" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Warm Pool (total-assigned)" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "IP Max per Cluster" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Total IPs" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "dark-orange", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 22 - }, - "id": 6, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "8.3.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum(awscni_assigned_ip_addresses)", - "interval": "", - "legendFormat": "Assigned IPs", - "range": true, - "refId": "C" - }, - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum(awscni_ip_max)", - "interval": "", - "legendFormat": "IP Max per Cluster", - "range": true, - "refId": "B" - }, - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum(awscni_total_ip_addresses-awscni_assigned_ip_addresses)", - "hide": false, - "interval": "", - "legendFormat": "Warm Pool (total-assigned)", - "range": true, - "refId": "D" - }, - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum(awscni_total_ip_addresses)", - "hide": false, - "interval": "", - "legendFormat": "Total IPs", - "range": true, - "refId": "A" - } - ], - "title": "IP Addresses in the Cluster", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "prometheus-kube-state-metrics, grafana, prometheus-server, prometheus-pushgateway, and coredns are all not host-network", - "fieldConfig": { - "defaults": { - "custom": { - "align": "auto", - "displayMode": "auto", - "inspect": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 8, - "x": 0, - "y": 25 - }, - "id": 40, - "options": { - "footer": { - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "awscni_assigned_ip_addresses\n", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "All Assigned IP Addresses", - "transformations": [ - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "instance", - "pod", - "Value" - ] - } - } - }, - { - "id": "organize", - "options": { - "excludeByName": {}, - "indexByName": {}, - "renameByName": { - "Value": "Assigned IP Addresses" - } - } - } - ], - "type": "table" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "\n\nCold: max - total, could be allocated", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Warm" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "avg(awscni_add_warm_ip_req_count{k8s_app=\"aws-node\"})" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 25 - }, - "id": 67, - "maxDataPoints": 1, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "editorMode": "builder", - "expr": "awscni_eni_allocated", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "ENIs Allocated per Node", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 12, - "y": 29 - }, - "id": 55, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "builder", - "expr": "rate(awscni_add_ip_req_count{k8s_app=\"aws-node\"}[5m])", - "hide": false, - "legendFormat": "{{node}}", - "range": true, - "refId": "B" - } - ], - "title": "IP Allocation Request per Node [5m]", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 29 - }, - "id": 98, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "builder", - "expr": "delta(awscni_del_ip_req_count{k8s_app=\"aws-node\"}[5m])", - "hide": false, - "legendFormat": "{{node}}", - "range": true, - "refId": "B" - } - ], - "title": "IP Deallocation Request per Node [5m]", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "points", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 8, - "x": 0, - "y": 30 - }, - "id": 14, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "8.3.2", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum by (fn) (awscni_ipamd_action_inprogress)", - "interval": "", - "legendFormat": "{{fn}}", - "refId": "A" - } - ], - "title": "IPAMD Actions in Progress", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Warm: total - assigned, already allocated and available\n\nCold: max - total, could be allocated", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Warm" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "avg(awscni_add_warm_ip_req_count{k8s_app=\"aws-node\"})" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 3, - "w": 4, - "x": 8, - "y": 30 - }, - "id": 82, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "count(1 > awscni_eni_util{k8s_app=\"aws-node\"})", - "format": "table", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "__auto", - "refId": "C" - } - ], - "title": "Empty ENIs per Cluster", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Warm: total - assigned, already allocated and available\n\nCold: max - total, could be allocated", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Warm" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "avg(awscni_add_warm_ip_req_count{k8s_app=\"aws-node\"})" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 3, - "w": 4, - "x": 8, - "y": 33 - }, - "id": 74, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "awscni_eni_max", - "format": "table", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "__auto", - "refId": "C" - } - ], - "title": "ENI Max per Node", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "description": "Warm: total - assigned, already allocated and available\n\nCold: max - total, could be allocated", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Warm" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "avg(awscni_add_warm_ip_req_count{k8s_app=\"aws-node\"})" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 3, - "w": 4, - "x": 8, - "y": 36 - }, - "id": 66, - "maxDataPoints": 1, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "avg(awscni_ip_max / awscni_eni_max)", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "__auto", - "refId": "C" - } - ], - "title": "Max IPs per ENI", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "displayName": "${__field.labels.node}", - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 36 - }, - "id": 65, - "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "builder", - "expr": "awscni_eni_util", - "hide": false, - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "# of IPs per ENI (Labeled by Node IP) - Correlate with Allocated ENIs for Delay", - "type": "bargauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "points", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 39 - }, - "id": 4, - "links": [], - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Max", - "sortDesc": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "8.3.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "editorMode": "code", - "exemplar": true, - "expr": "avg by (api)(rate(awscni_aws_api_latency_ms_sum[5m])/rate(awscni_aws_api_latency_ms_count[5m]))", - "interval": "", - "legendFormat": "{{api}}", - "range": true, - "refId": "A" - } - ], - "title": "Average AWS API Latency [5m]", - "type": "timeseries" - } - ], - "refresh": false, - "schemaVersion": 37, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "datasource", - "options": [], - "query": "prometheus", - "queryValue": "", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "current": { - "isNone": true, - "selected": false, - "text": "None", - "value": "" - }, - "datasource": { - "uid": "$datasource" - }, - "definition": "label_values(awscni_assigned_ip_addresses{job=\"aws-cni-metrics\"}, instance)", - "hide": 2, - "includeAll": false, - "label": "", - "multi": false, - "name": "instance", - "options": [], - "query": { - "query": "label_values(awscni_assigned_ip_addresses{job=\"aws-cni-metrics\"}, instance)", - "refId": "Prometheus-instance-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "current": { - "isNone": true, - "selected": false, - "text": "None", - "value": "" - }, - "datasource": { - "uid": "$datasource" - }, - "definition": "label_values(awscni_del_ip_req_count{job=\"aws-cni-metrics\"}, reason)", - "hide": 2, - "includeAll": false, - "multi": false, - "name": "ip_deletion_reason", - "options": [], - "query": { - "query": "label_values(awscni_del_ip_req_count{job=\"aws-cni-metrics\"}, reason)", - "refId": "Prometheus-ip_deletion_reason-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "NdWXluq4z" - }, - "filters": [], - "hide": 0, - "name": "Filters", - "skipUrlSync": false, - "type": "adhoc" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ] - }, - "timezone": "", - "title": "VPC CNI IPAMD Warm Pool Testing", - "uid": "p7lNf06Vk", - "version": 115, - "weekStart": "" -} \ No newline at end of file diff --git a/pkg/ipamd/datastore/data_store.go b/pkg/ipamd/datastore/data_store.go index f224dce21f..d170647e49 100644 --- a/pkg/ipamd/datastore/data_store.go +++ b/pkg/ipamd/datastore/data_store.go @@ -124,13 +124,13 @@ var ( ) noAvailableIPAddrs = prometheus.NewCounter( prometheus.CounterOpts{ - Name: "awscni_err_no_avail_addrs", + Name: "awscni_no_available_ip_addresses", Help: "The number of pod IP assignments that fail due to no available IP addresses", }, ) eniIPsInUse = prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Name: "awscni_eni_util", + Name: "awscni_assigned_ip_per_eni", Help: "The number of allocated ips partitioned by eni", }, []string{"eni"}, @@ -538,7 +538,6 @@ func (ds *DataStore) AddENI(eniID string, deviceNumber int, isPrimary, isTrunk, DeviceNumber: deviceNumber, AvailableIPv4Cidrs: make(map[string]*CidrInfo)} - ds.GetENIUtilization() enis.Set(float64(len(ds.eniPool))) // Initialize ENI IPs In Use to 0 when an ENI is created eniIPsInUse.WithLabelValues(eniID).Set(0) @@ -823,7 +822,6 @@ func (ds *DataStore) assignPodIPAddressUnsafe(addr *AddressInfo, ipamKey IPAMKey addr.IPAMMetadata = ipamMetadata addr.AssignedTime = assignedTime - ds.log.Debugf("IP allocation request") ds.assigned++ // Prometheus gauge assignedIPs.Set(float64(ds.assigned)) @@ -840,7 +838,6 @@ func (ds *DataStore) unassignPodIPAddressUnsafe(addr *AddressInfo) { addr.IPAMKey = IPAMKey{} // unassign the addr addr.IPAMMetadata = IPAMMetadata{} ds.assigned-- - ds.log.Debugf("IP deallocation request") // Prometheus gauge assignedIPs.Set(float64(ds.assigned)) } @@ -894,24 +891,6 @@ func (ds *DataStore) GetIPStats(addressFamily string) *DataStoreStats { return stats } -// GetENIUtilization updates a Prometheus gauge vector with each ENIs id and how many ip addresses are assigned on it -func (ds *DataStore) GetENIUtilization() { - //eniUtilization.Reset() - for _, eni := range ds.eniPool { - count := 0 - for _, assignedAddr := range eni.AvailableIPv4Cidrs { - for _, addr := range assignedAddr.IPAddresses { - if addr.Assigned() { - count += 1 - } - } - } - utilization := count - eniID := eni.ID - eniUtilization.WithLabelValues(eniID).Set(float64(utilization)) - } -} - // GetTrunkENI returns the trunk ENI ID or an empty string func (ds *DataStore) GetTrunkENI() string { ds.lock.Lock() diff --git a/pkg/ipamd/ipamd.go b/pkg/ipamd/ipamd.go index e6dc667901..626e8ef9ee 100644 --- a/pkg/ipamd/ipamd.go +++ b/pkg/ipamd/ipamd.go @@ -679,8 +679,6 @@ func (c *IPAMContext) updateIPPoolIfRequired(ctx context.Context) { if c.shouldRemoveExtraENIs() { c.tryFreeENI() } - // Prometheus Metric - c.dataStore.GetENIUtilization() } // decreaseDatastorePool runs every `interval` and attempts to return unused ENIs and IPs diff --git a/test/integration/warm-pool/clear_warm_env.go b/test/integration/warm-pool/clear_warm_env.go deleted file mode 100644 index d2774bea26..0000000000 --- a/test/integration/warm-pool/clear_warm_env.go +++ /dev/null @@ -1,26 +0,0 @@ -package warm_pool - -import ( - k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" - . "github.com/onsi/ginkgo/v2" -) - -// Environment variables are not reset before and after each test so that way multiple tests can be run to -// evaluate behavior. You can run this test which will unset all warm pool environment variables. Or, if you -// want to test the behavior with some of those environment variables set, alter them in that file and run it once before -// you run the desired tests. -var _ = Describe("clear warm env", func() { - Context("Clear out environment variables for warm pool for testing", func() { - - It("Unsetting env variables", func() { - k8sUtils.UpdateEnvVarOnDaemonSetAndWaitUntilReady(f, "aws-node", "kube-system", - "aws-node", map[string]string{}, - map[string]struct{}{ - "WARM_ENI_TARGET": {}, - "WARM_IP_TARGET": {}, - "MINIMUM_IP_TARGET": {}, - "WARM_PREFIX_TARGET": {}, - }) - }) - }) -}) diff --git a/test/integration/warm-pool/set_warm_env.go b/test/integration/warm-pool/set_warm_env.go deleted file mode 100644 index e46be8ecdf..0000000000 --- a/test/integration/warm-pool/set_warm_env.go +++ /dev/null @@ -1,26 +0,0 @@ -package warm_pool - -import ( - k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - . "github.com/onsi/ginkgo/v2" - "strconv" -) - -// Environment variables are not reset before and after each test so that way multiple tests can be run to -// evaluate behavior. You can run this test which will unset all warm pool environment variables. Or, if you -// want to test the behavior with some of those environment variables set, alter them in that file and run it once before -// you run the desired tests. -var _ = Describe("set warm env", func() { - Context("Sets env variables", func() { - - It("Sets env variables", func() { - k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, - utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, - map[string]string{ - "WARM_IP_TARGET": strconv.Itoa(0), - "ENABLE_DYNAMIC_WARM_POOL": strconv.FormatBool(true), - }) - }) - }) -}) diff --git a/test/integration/warm-pool/use_case_1_test.go b/test/integration/warm-pool/use_case_1_test.go deleted file mode 100644 index 5c985745ad..0000000000 --- a/test/integration/warm-pool/use_case_1_test.go +++ /dev/null @@ -1,98 +0,0 @@ -package warm_pool - -import ( - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - v1 "k8s.io/api/core/v1" - "time" -) - -var primaryNode v1.Node - -// This test scales up the cluster to maxPods, then scales it back down to minPods. -var _ = Describe("use case 1", func() { - Context("Quick Scale Up and Down", func() { - - BeforeEach(func() { - By("Getting Warm Pool Environment Variables Before Test") - getWarmPoolEnvVars() - }) - - It("Scales the cluster and checks warm pool before and after", func() { - fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) - - start := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods\n", minPods) - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(minPods). - Build() - - _, err := f.K8sResourceManagers. - DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - if minPods != 0 { - time.Sleep(sleep) - } - - fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods\n", maxPods) - quickScale(maxPods) - - Expect(maxPods).To(Equal(busyboxPodCnt())) - - fmt.Fprintf(GinkgoWriter, "Scaling cluster down to %v pods\n", minPods) - quickScale(minPods) - - end := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) - - By("Starting Curl Container") - curlContainer := manifest.NewCurlContainer(). - Command([]string{"sleep", "1000"}).Build() - - getCurlPod := manifest.NewDefaultPodBuilder(). - Name("curl-pod"). - Namespace(utils.DefaultTestNamespace). - NodeName(primaryNode.Name). - HostNetwork(true). - Container(curlContainer). - Build() - - testPod, err := f.K8sResourceManagers.PodManager(). - CreateAndWaitTillPodCompleted(getCurlPod) - - logs, errLogs := f.K8sResourceManagers.PodManager(). - PodLogs(testPod.Namespace, testPod.Name) - Expect(errLogs).ToNot(HaveOccurred()) - fmt.Fprintln(GinkgoWriter, logs) - - By("Fetching metrics via Curl Container") - getMetrics(start, end) - - By("Deleting the deployment") - err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) - Expect(err).NotTo(HaveOccurred()) - - By("Deleting Curl Container") - err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterEach(func() { - By("Getting Warm Pool Environment Variables After Test") - getWarmPoolEnvVars() - - }) - }) -}) diff --git a/test/integration/warm-pool/use_case_2_test.go b/test/integration/warm-pool/use_case_2_test.go deleted file mode 100644 index de26bd1938..0000000000 --- a/test/integration/warm-pool/use_case_2_test.go +++ /dev/null @@ -1,104 +0,0 @@ -package warm_pool - -import ( - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - "strconv" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// This test replicates sawtooth behavior by adding a fixed amount of pods and removing the same fixed amount of pods -// over a preset number of iterations. -var _ = Describe("use case 2", func() { - Context("Sawtooth Fixed Add and Subtract", func() { - - BeforeEach(func() { - By("Getting Warm Pool Environment Variables Before Test") - getWarmPoolEnvVars() - }) - - It("Scales the cluster and checks warm pool before and after", func() { - replicas := minPods - - start := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(replicas). - Build() - - _, err := f.K8sResourceManagers. - DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - if minPods != 0 { - time.Sleep(sleep) - } - - for i := 0; i < iterations; i++ { - By("Loop " + strconv.Itoa(i)) - replicas = checkInRange(replicas + iterPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods\n", replicas) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - - replicas = checkInRange(replicas - iterPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster down to %v pods\n", replicas) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - } - - Expect(minPods).To(Equal(busyboxPodCnt())) - - end := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) - - By("Starting Curl Container") - curlContainer := manifest.NewCurlContainer(). - Command([]string{"sleep", "3600"}).Build() - - getCurlPod := manifest.NewDefaultPodBuilder(). - Name("curl-pod"). - Namespace(utils.DefaultTestNamespace). - NodeName(primaryNode.Name). - HostNetwork(true). - Container(curlContainer). - Build() - - testPod, err := f.K8sResourceManagers.PodManager(). - CreateAndWaitTillPodCompleted(getCurlPod) - - logs, errLogs := f.K8sResourceManagers.PodManager(). - PodLogs(testPod.Namespace, testPod.Name) - Expect(errLogs).ToNot(HaveOccurred()) - fmt.Fprintln(GinkgoWriter, logs) - - By("Fetching metrics via Curl Container") - getMetrics(start, end) - - By("Deleting the deployment") - err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) - Expect(err).NotTo(HaveOccurred()) - - By("Deleting Curl Container") - err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterEach(func() { - By("Getting Warm Pool Environment Variables After Test") - getWarmPoolEnvVars() - }) - }) -}) diff --git a/test/integration/warm-pool/use_case_3_test.go b/test/integration/warm-pool/use_case_3_test.go deleted file mode 100644 index 7a365c7ddc..0000000000 --- a/test/integration/warm-pool/use_case_3_test.go +++ /dev/null @@ -1,112 +0,0 @@ -package warm_pool - -import ( - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - "math/rand" - "strconv" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// This test adds a random amount of pods and then subtracts a random amount of pods, limited to the number of preset -// iterations. The number of pods should not drop below minPods. -var _ = Describe("use case 3", func() { - Context("Random Scale Fixed Add and Subtract", func() { - - BeforeEach(func() { - By("Getting Warm Pool Environment Variables Before Test") - getWarmPoolEnvVars() - }) - - It("Scales the cluster and checks warm pool before and after", func() { - replicas := minPods - - start := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(replicas). - Build() - - _, err := f.K8sResourceManagers. - DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - if minPods != 0 { - time.Sleep(sleep) - } - - for i := 0; i < iterations; i++ { - By("Loop " + strconv.Itoa(i)) - randPods := incIf(rand.Intn(randDigits)) - // Will scale to a maximum of maxPods - replicas = min(replicas+randPods, maxPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods by adding %v pods\n", replicas, randPods) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - - randPods = incIf(rand.Intn(randDigits)) - // Will scale to a minimum of minPods pods - replicas = max(replicas-randPods, minPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster down to %v pods by subtracting %v pods\n", replicas, - randPods) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - - if replicas == maxPods { - break - } - } - - end := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) - - By("Starting Curl Container") - curlContainer := manifest.NewCurlContainer(). - Command([]string{"sleep", "3600"}).Build() - - getCurlPod := manifest.NewDefaultPodBuilder(). - Name("curl-pod"). - Namespace(utils.DefaultTestNamespace). - NodeName(primaryNode.Name). - HostNetwork(true). - Container(curlContainer). - Build() - - testPod, err := f.K8sResourceManagers.PodManager(). - CreateAndWaitTillPodCompleted(getCurlPod) - - logs, errLogs := f.K8sResourceManagers.PodManager(). - PodLogs(testPod.Namespace, testPod.Name) - Expect(errLogs).ToNot(HaveOccurred()) - fmt.Fprintln(GinkgoWriter, logs) - - By("Fetching metrics via Curl Container") - getMetrics(start, end) - - By("Deleting the deployment") - err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) - Expect(err).NotTo(HaveOccurred()) - - By("Deleting Curl Container") - err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterEach(func() { - By("Getting Warm Pool Environment Variables After Test") - getWarmPoolEnvVars() - }) - }) -}) diff --git a/test/integration/warm-pool/use_case_4_test.go b/test/integration/warm-pool/use_case_4_test.go deleted file mode 100644 index 7a8487b4ff..0000000000 --- a/test/integration/warm-pool/use_case_4_test.go +++ /dev/null @@ -1,102 +0,0 @@ -package warm_pool - -import ( - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - "strconv" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// This test does a random operation with a random number of pods over a preset number of iterations. -var _ = Describe("use case 4", func() { - Context("Random Scale Random Add and Subtract Operations", func() { - - BeforeEach(func() { - By("Getting Warm Pool Environment Variables Before Test") - getWarmPoolEnvVars() - }) - - It("Scales the cluster and checks warm pool before and after", func() { - replicas := minPods - - fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) - - start := time.Now().Unix() - - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(replicas). - Build() - - _, err := f.K8sResourceManagers. - DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - if minPods != 0 { - time.Sleep(sleep) - } - - for i := 0; i < iterations; i++ { - By("Loop " + strconv.Itoa(i)) - result, op, randPods := randOpLoop(replicas) - replicas = checkInRange(result) - if op == "no change" { - fmt.Fprintf(GinkgoWriter, "No change to cluster, %v pods", replicas) - } else { - fmt.Fprintf(GinkgoWriter, "Scaling cluster to %v pods by %v %v pods\n", replicas, op, randPods) - } - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - } - - end := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) - - By("Starting Curl Container") - curlContainer := manifest.NewCurlContainer(). - Command([]string{"sleep", "3600"}).Build() - - getCurlPod := manifest.NewDefaultPodBuilder(). - Name("curl-pod"). - Namespace(utils.DefaultTestNamespace). - NodeName(primaryNode.Name). - HostNetwork(true). - Container(curlContainer). - Build() - - testPod, err := f.K8sResourceManagers.PodManager(). - CreateAndWaitTillPodCompleted(getCurlPod) - - logs, errLogs := f.K8sResourceManagers.PodManager(). - PodLogs(testPod.Namespace, testPod.Name) - Expect(errLogs).ToNot(HaveOccurred()) - fmt.Fprintln(GinkgoWriter, logs) - - By("Fetching metrics via Curl Container") - getMetrics(start, end) - - By("Deleting the deployment") - err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) - Expect(err).NotTo(HaveOccurred()) - - By("Deleting Curl Container") - err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterEach(func() { - By("Getting Warm Pool Environment Variables After Test") - getWarmPoolEnvVars() - }) - }) -}) diff --git a/test/integration/warm-pool/use_case_5_test.go b/test/integration/warm-pool/use_case_5_test.go deleted file mode 100644 index 4b9dbf0c74..0000000000 --- a/test/integration/warm-pool/use_case_5_test.go +++ /dev/null @@ -1,113 +0,0 @@ -package warm_pool - -import ( - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - "strconv" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// This test scales the cluster up to maxPods by a preset scale and back down again by that same scale -var _ = Describe("use case 5", func() { - Context("Proportionate Scaling", func() { - - BeforeEach(func() { - By("Getting Warm Pool Environment Variables Before Test") - getWarmPoolEnvVars() - }) - - It("Scales the cluster and checks warm pool before and after", func() { - replicas := minPods - - start := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(replicas). - Build() - - _, err := f.K8sResourceManagers. - DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - if minPods != 0 { - time.Sleep(sleep) - } - - scaleAmt := maxPods * scale - - for replicas < maxPods { - i := 0 - By("Loop " + strconv.Itoa(i)) - // Will scale to a maximum of maxPods - replicas = min(replicas+int(scaleAmt), maxPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods by adding %v pods\n", replicas, int(scaleAmt)) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - i++ - } - - for replicas > minPods { - i := 0 - By("Loop " + strconv.Itoa(i)) - // Will scale to a minimum of minPods - replicas = max(replicas-int(scaleAmt), minPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster down to %v pods by subtracting %v pods\n", replicas, - int(scaleAmt)) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - i++ - } - - end := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) - - By("Starting Curl Container") - curlContainer := manifest.NewCurlContainer(). - Command([]string{"sleep", "3600"}).Build() - - getCurlPod := manifest.NewDefaultPodBuilder(). - Name("curl-pod"). - Namespace(utils.DefaultTestNamespace). - NodeName(primaryNode.Name). - HostNetwork(true). - Container(curlContainer). - Build() - - testPod, err := f.K8sResourceManagers.PodManager(). - CreateAndWaitTillPodCompleted(getCurlPod) - - logs, errLogs := f.K8sResourceManagers.PodManager(). - PodLogs(testPod.Namespace, testPod.Name) - Expect(errLogs).ToNot(HaveOccurred()) - fmt.Fprintln(GinkgoWriter, logs) - - By("Fetching metrics via Curl Container") - getMetrics(start, end) - - By("Deleting the deployment") - err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) - Expect(err).NotTo(HaveOccurred()) - - By("Deleting Curl Container") - err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterEach(func() { - By("Getting Warm Pool Environment Variables After Test") - getWarmPoolEnvVars() - }) - }) -}) diff --git a/test/integration/warm-pool/use_case_6_test.go b/test/integration/warm-pool/use_case_6_test.go deleted file mode 100644 index 84c85b4392..0000000000 --- a/test/integration/warm-pool/use_case_6_test.go +++ /dev/null @@ -1,117 +0,0 @@ -package warm_pool - -import ( - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - "math/rand" - "strconv" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// This test will scale the cluster up to maxPods with a scale amount that is randomly calculated each loop based on -// maxPods and scales the cluster back down with the same way -var _ = Describe("use case 6", func() { - Context("Random Scaling", func() { - - BeforeEach(func() { - By("Getting Warm Pool Environment Variables Before Test") - getWarmPoolEnvVars() - }) - - It("Scales the cluster and checks warm pool before and after", func() { - replicas := minPods - - start := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(replicas). - Build() - - _, err := f.K8sResourceManagers. - DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - if minPods != 0 { - time.Sleep(sleep) - } - - for replicas < maxPods { - i := 0 - randScale := rand.Intn(randDigits) + 1 // prevent divide by 0 - scaleAmt := int(maxPods / randScale) - By("Loop " + strconv.Itoa(i)) - // Will scale to a maximum of maxPods - replicas = min(replicas+scaleAmt, maxPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods by adding %v pods\n", replicas, scaleAmt) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - i++ - } - - for replicas > minPods { - i := 0 - randScale := rand.Intn(randDigits) - scaleAmt := int(maxPods/randScale) + 1 // prevent divide by 0 - By("Loop " + strconv.Itoa(i)) - // Will scale to a minimum of minPods - replicas = max(replicas-scaleAmt, minPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster down to %v pods by subtracting %v pods\n", replicas, - scaleAmt) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - i++ - } - - end := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) - - By("Starting Curl Container") - curlContainer := manifest.NewCurlContainer(). - Command([]string{"sleep", "3600"}).Build() - - getCurlPod := manifest.NewDefaultPodBuilder(). - Name("curl-pod"). - Namespace(utils.DefaultTestNamespace). - NodeName(primaryNode.Name). - HostNetwork(true). - Container(curlContainer). - Build() - - testPod, err := f.K8sResourceManagers.PodManager(). - CreateAndWaitTillPodCompleted(getCurlPod) - - logs, errLogs := f.K8sResourceManagers.PodManager(). - PodLogs(testPod.Namespace, testPod.Name) - Expect(errLogs).ToNot(HaveOccurred()) - fmt.Fprintln(GinkgoWriter, logs) - - By("Fetching metrics via Curl Container") - getMetrics(start, end) - - By("Deleting the deployment") - err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) - Expect(err).NotTo(HaveOccurred()) - - By("Deleting Curl Container") - err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterEach(func() { - By("Getting Warm Pool Environment Variables After Test") - getWarmPoolEnvVars() - }) - }) -}) diff --git a/test/integration/warm-pool/use_case_7_test.go b/test/integration/warm-pool/use_case_7_test.go deleted file mode 100644 index bc8b43ce44..0000000000 --- a/test/integration/warm-pool/use_case_7_test.go +++ /dev/null @@ -1,114 +0,0 @@ -package warm_pool - -import ( - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - "math/rand" - "strconv" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// This test will create a single burst to maxPods at a random interval depending on the preset number of iterations -var _ = Describe("use case 7", func() { - Context("Single Burst Behavior", func() { - - BeforeEach(func() { - By("Getting Warm Pool Environment Variables Before Test") - getWarmPoolEnvVars() - }) - - It("Scales the cluster and checks warm pool before and after", func() { - replicas := minPods - - start := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(replicas). - Build() - - _, err := f.K8sResourceManagers. - DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - if minPods != 0 { - time.Sleep(sleep) - } - - randBurst := rand.Intn(iterations) - - for i := 0; i < iterations; i++ { - By("Loop " + strconv.Itoa(i)) - - if i == randBurst { - fmt.Fprintf(GinkgoWriter, "Burst behavior from %v to %v pods\n", replicas, maxPods) - quickScale(maxPods) - continue - } - - if i == randBurst+1 { - fmt.Fprintf(GinkgoWriter, "Burst behavior over, scaling down from %v to %v pods\n", maxPods, - replicas) - quickScale(replicas) - continue - } - - result, op := randOp(replicas, iterPods) - replicas = checkInRange(result) - fmt.Fprintf(GinkgoWriter, "%v %v pod to cluster to equal %v pods\n", op, iterPods, replicas) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - } - - end := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) - - By("Starting Curl Container") - curlContainer := manifest.NewCurlContainer(). - Command([]string{"sleep", "3600"}).Build() - - getCurlPod := manifest.NewDefaultPodBuilder(). - Name("curl-pod"). - Namespace(utils.DefaultTestNamespace). - NodeName(primaryNode.Name). - HostNetwork(true). - Container(curlContainer). - Build() - - testPod, err := f.K8sResourceManagers.PodManager(). - CreateAndWaitTillPodCompleted(getCurlPod) - - logs, errLogs := f.K8sResourceManagers.PodManager(). - PodLogs(testPod.Namespace, testPod.Name) - Expect(errLogs).ToNot(HaveOccurred()) - fmt.Fprintln(GinkgoWriter, logs) - - By("Fetching metrics via Curl Container") - getMetrics(start, end) - - By("Deleting the deployment") - err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) - Expect(err).NotTo(HaveOccurred()) - - By("Deleting Curl Container") - err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterEach(func() { - By("Getting Warm Pool Environment Variables After Test") - getWarmPoolEnvVars() - }) - }) -}) diff --git a/test/integration/warm-pool/use_case_8_test.go b/test/integration/warm-pool/use_case_8_test.go deleted file mode 100644 index 9ec6f10f2f..0000000000 --- a/test/integration/warm-pool/use_case_8_test.go +++ /dev/null @@ -1,122 +0,0 @@ -package warm_pool - -import ( - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - "math/rand" - "strconv" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// This test will simulate a preset number of bursts of different sizes relative to maxPods that occur at random -// intervals over a preset number of iterations -var _ = Describe("use case 8", func() { - Context("Multiple Burst Behavior", func() { - - BeforeEach(func() { - By("Getting Warm Pool Environment Variables Before Test") - getWarmPoolEnvVars() - }) - - It("Scales the cluster and checks warm pool before and after", func() { - replicas := minPods - - start := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(replicas). - Build() - - _, err := f.K8sResourceManagers. - DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - if minPods != 0 { - time.Sleep(sleep) - } - - // Creates some bursts of different sizes at random iterations. - burstIdx := rand.Perm(iterations)[:numBursts] - burstMap := make(map[int]int) - for i := 0; i < len(burstIdx); i++ { - key := burstIdx[i] - //value := incIf(rand.Intn(maxPods + 1)) - //value := int(maxPods / (rand.Intn(4) + 1)) - value := int(maxPods) - burstMap[key] = value - } - - for i := 0; i < iterations; i++ { - By("Loop " + strconv.Itoa(i)) - - val, present := burstMap[i] - if present { - fmt.Fprintf(GinkgoWriter, "Burst behavior from %v to %v pods\n", replicas, val) - quickScale(val) - - fmt.Fprintf(GinkgoWriter, "Burst behavior over, scaling down from %v to %v pods\n", val, - replicas) - quickScale(replicas) - continue - } - - result, op := randOp(replicas, iterPods) - replicas = checkInRange(result) - fmt.Fprintf(GinkgoWriter, "%v %v pod from cluster to equal %v pods\n", op, iterPods, replicas) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - } - - end := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) - - By("Starting Curl Container") - curlContainer := manifest.NewCurlContainer(). - Command([]string{"sleep", "3600"}).Build() - - getCurlPod := manifest.NewDefaultPodBuilder(). - Name("curl-pod"). - Namespace(utils.DefaultTestNamespace). - NodeName(primaryNode.Name). - HostNetwork(true). - Container(curlContainer). - Build() - - testPod, err := f.K8sResourceManagers.PodManager(). - CreateAndWaitTillPodCompleted(getCurlPod) - - logs, errLogs := f.K8sResourceManagers.PodManager(). - PodLogs(testPod.Namespace, testPod.Name) - Expect(errLogs).ToNot(HaveOccurred()) - fmt.Fprintln(GinkgoWriter, logs) - - By("Fetching metrics via Curl Container") - getMetrics(start, end) - - By("Deleting the deployment") - err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) - Expect(err).NotTo(HaveOccurred()) - - By("Deleting Curl Container") - err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterEach(func() { - By("Getting Warm Pool Environment Variables After Test") - getWarmPoolEnvVars() - }) - }) -}) diff --git a/test/integration/warm-pool/use_case_9_test.go b/test/integration/warm-pool/use_case_9_test.go deleted file mode 100644 index ce372e1c10..0000000000 --- a/test/integration/warm-pool/use_case_9_test.go +++ /dev/null @@ -1,115 +0,0 @@ -package warm_pool - -import ( - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" - "math/rand" - "strconv" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -// This test will add a random amount of pods until it gets to maxPods, then subtract a random amount of pods until it -// gets to minPods -var _ = Describe("use case 9", func() { - Context("Random Add to Max, Random Sub to Min", func() { - - BeforeEach(func() { - By("Getting Warm Pool Environment Variables Before Test") - getWarmPoolEnvVars() - }) - - It("Scales the cluster and checks warm pool before and after", func() { - replicas := minPods - - start := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, "Deploying %v minimum pods\n", minPods) - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(replicas). - Build() - - _, err := f.K8sResourceManagers. - DeploymentManager(). - CreateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - if minPods != 0 { - time.Sleep(sleep) - } - - for replicas < maxPods { - i := 0 - randPods := incIf(rand.Intn(randDigits)) - By("Loop " + strconv.Itoa(i)) - // Will scale to a maximum of maxPods - replicas = min(replicas+randPods, maxPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster up to %v pods by adding %v pods\n", replicas, randPods) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - i++ - } - - for replicas > minPods { - i := 0 - randPods := incIf(rand.Intn(randDigits)) - By("Loop " + strconv.Itoa(i)) - // Will scale to a minimum of minPods - replicas = max(replicas-randPods, minPods) - fmt.Fprintf(GinkgoWriter, "Scaling cluster down to %v pods by subtracting %v pods\n", replicas, - randPods) - quickScale(replicas) - Expect(replicas).To(Equal(busyboxPodCnt())) - i++ - } - - end := time.Now().Unix() - - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("Start Time: %v\n", start)) - fmt.Fprintf(GinkgoWriter, fmt.Sprintf("End Time: %v\n", end)) - - By("Starting Curl Container") - curlContainer := manifest.NewCurlContainer(). - Command([]string{"sleep", "3600"}).Build() - - getCurlPod := manifest.NewDefaultPodBuilder(). - Name("curl-pod"). - Namespace(utils.DefaultTestNamespace). - NodeName(primaryNode.Name). - HostNetwork(true). - Container(curlContainer). - Build() - - testPod, err := f.K8sResourceManagers.PodManager(). - CreateAndWaitTillPodCompleted(getCurlPod) - - logs, errLogs := f.K8sResourceManagers.PodManager(). - PodLogs(testPod.Namespace, testPod.Name) - Expect(errLogs).ToNot(HaveOccurred()) - fmt.Fprintln(GinkgoWriter, logs) - - By("Fetching metrics via Curl Container") - getMetrics(start, end) - - By("Deleting the deployment") - err = f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deploymentSpec) - Expect(err).NotTo(HaveOccurred()) - - By("Deleting Curl Container") - err = f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(getCurlPod) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterEach(func() { - By("Getting Warm Pool Environment Variables After Test") - getWarmPoolEnvVars() - }) - }) -}) diff --git a/test/integration/warm-pool/warm_pool_suite_test.go b/test/integration/warm-pool/warm_pool_suite_test.go deleted file mode 100644 index 0f8d6dadfa..0000000000 --- a/test/integration/warm-pool/warm_pool_suite_test.go +++ /dev/null @@ -1,369 +0,0 @@ -// VPC Warm Pool Test Suite -// This test suite is a foundation for evaluating a dynamic warm pool, or ip consumption in general. Pair with grafana -//metrics dashboard to look at ip allocation and consumption. Each test displays the warm pool environment variables -//before and after to evaluate the changes made to the warm pool. Environment variables are not reset before and after -//each test so that way multiple tests can be run to evaluate behavior. You can run the test "clear warm env" which will -//unset all warm pool environment variables. Or, if you want to test the behavior with some of those environment -//variables set, alter them in that test and run it once before you run the desired tests. -// Use Case Test 1: Quick Scale Up and Down -// Use Case Test 2: Sawtooth Fixed Add and Subtract -// Use Case Test 3: Random Scale Fixed Add and Subtract -// Use Case Test 4: Random Scale Random Add and Subtract Operations -// Use Case Test 5: Proportionate Scaling -// Use Case Test 6: Random Scaling -// Use Case Test 7: Single Burst Behavior -// Use Case Test 8: Multiple Burst Behavior -// Use Case Test 9: Random Add to Max, Random Sub to Min - -package warm_pool - -import ( - "encoding/json" - "fmt" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" - "github.com/aws/aws-sdk-go/service/ec2" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - v1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - "math/rand" - "slices" - "strconv" - "testing" - "time" - - "github.com/aws/amazon-vpc-cni-k8s/test/framework" - k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" - "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" -) - -// Warm Pool Test Suite Constants -// Run all tests with these constants or change individual tests to get desired outcome -// Environment variables are used in the tests listed in the (...) -const ( - randDigits = 10 // exclusive, used in rand.Intn to change scale amount, <= maxPods, (3,6,9) - scale = 0.25 // used in set proportional scaling, iterate with a fixed percentage (5) - iterations = 2 // run test over a set number of iterations (2,3,4,7,8) - iterPods = 1 // iterate with a fixed number of pods (2,7,8) - numBursts = 2 // Use Case Test 8, set number of bursts (8) - preventNoChange = 1 // retries x amount of times if randInt/randOp is out of range, if out of range no cluster - // scaling occurs, if set above 0 will increment some areas of no cluster scaling (3, 4, 6, 8, 9) - maxPods = 60 // max pods you want to work with for your cluster (all) - minPods = 0 // tests can be run with a base amount of pods at start (all) - sleep = 1 * time.Minute // sleep interval (all) -) - -var clusterIP = "10.100.140.129" // Get the cluster ip of the prometheus-server service -var primaryInstance *ec2.Instance -var f *framework.Framework -var err error -var coreDNSDeploymentCopy *v1.Deployment - -const CoreDNSDeploymentName = "coredns" -const KubeSystemNamespace = "kube-system" - -type Result struct { - Status string `json:"status"` - Data struct { - ResultType string `json:"resultType"` - Result []struct { - Metric struct { - Name string `json:"__name__"` - AppKubernetesIoInstance string `json:"app_kubernetes_io_instance"` - AppKubernetesIoName string `json:"app_kubernetes_io_name"` - ControllerRevisionHash string `json:"controller_revision_hash"` - Instance string `json:"instance"` - Job string `json:"job"` - K8SApp string `json:"k8s_app"` - Namespace string `json:"namespace"` - Node string `json:"node"` - Pod string `json:"pod"` - PodTemplateGeneration string `json:"pod_template_generation"` - } - Values [][2]interface{} `json:"values"` - } - } -} - -func TestWarmPool(t *testing.T) { - RegisterFailHandler(Fail) - RunSpecs(t, "VPC Warm Pool Test Suite") -} - -var _ = BeforeSuite(func() { - f = framework.New(framework.GlobalOptions) - - By("creating test namespace") - f.K8sResourceManagers.NamespaceManager(). - CreateNamespace(utils.DefaultTestNamespace) - - nodeList, err := f.K8sResourceManagers.NodeManager().GetNodes(f.Options.NgNameLabelKey, - f.Options.NgNameLabelVal) - Expect(err).ToNot(HaveOccurred()) - - numOfNodes := len(nodeList.Items) - Expect(numOfNodes).Should(BeNumerically(">", 1)) - - // Nominate the first untainted node as the one to run coredns deployment against - By("adding nodeSelector in coredns deployment to be scheduled on single node") - var primaryNode *corev1.Node - for _, n := range nodeList.Items { - if len(n.Spec.Taints) == 0 { - primaryNode = &n - break - } - } - Expect(primaryNode).To(Not(BeNil()), "expected to find a non-tainted node") - instanceID := k8sUtils.GetInstanceIDFromNode(*primaryNode) - primaryInstance, err = f.CloudServices.EC2().DescribeInstance(instanceID) - Expect(err).ToNot(HaveOccurred()) - - By("getting node with no pods scheduled to run tests") - coreDNSDeployment, err := f.K8sResourceManagers.DeploymentManager().GetDeployment(CoreDNSDeploymentName, - KubeSystemNamespace) - Expect(err).ToNot(HaveOccurred()) - - // Copy the deployment to restore later - coreDNSDeploymentCopy = coreDNSDeployment.DeepCopy() - - // Add nodeSelector label to coredns deployment so coredns pods are scheduled on 'primary' node - coreDNSDeployment.Spec.Template.Spec.NodeSelector = map[string]string{ - "kubernetes.io/hostname": primaryNode.Labels["kubernetes.io/hostname"], - } - err = f.K8sResourceManagers.DeploymentManager().UpdateAndWaitTillDeploymentIsReady(coreDNSDeployment, - utils.DefaultDeploymentReadyTimeout) - Expect(err).ToNot(HaveOccurred()) - - // Redefine primary node as node without coredns pods. Note that this node may have previously had coredns pods. - for _, n := range nodeList.Items { - if len(n.Spec.Taints) == 0 && n.Name != primaryNode.Name { - primaryNode = &n - break - } - } - fmt.Fprintf(GinkgoWriter, "primary node is %s\n", primaryNode.Name) - instanceID = k8sUtils.GetInstanceIDFromNode(*primaryNode) - primaryInstance, err = f.CloudServices.EC2().DescribeInstance(instanceID) - Expect(err).ToNot(HaveOccurred()) -}) - -var _ = AfterSuite(func() { - // Restore coredns deployment - By("restoring coredns deployment") - err = f.K8sResourceManagers.DeploymentManager().UpdateAndWaitTillDeploymentIsReady(coreDNSDeploymentCopy, - utils.DefaultDeploymentReadyTimeout) - - By("deleting test namespace") - f.K8sResourceManagers.NamespaceManager(). - DeleteAndWaitTillNamespaceDeleted(utils.DefaultTestNamespace) -}) - -// Helper Functions // -func getWarmPoolEnvVars() { - daemonset, _ := f.K8sResourceManagers.DaemonSetManager().GetDaemonSet("kube-system", "aws-node") - warmPoolKeys := [5]string{"WARM_ENI_TARGET", "MINIMUM_IP_TARGET", "WARM_IP_TARGET", "WARM_PREFIX_TARGET", - "ENABLE_DYNAMIC_WARM_POOL"} - print("----\n") - for _, key := range warmPoolKeys { - val := utils.GetEnvValueForKeyFromDaemonSet(key, daemonset) - if val != "" { - print(" -", key, " : ", val, "\n") - } else { - print(" -", key, " : not set", "\n") - } - } - print("----\n") -} - -// Basic Prometheus api call -func callPrometheus(url string) Result { - command := []string{"curl", "--silent", "-g", url} - stdout, _, err := f.K8sResourceManagers.PodManager().PodExec(utils.DefaultTestNamespace, "curl-pod", - command) - Expect(err).ToNot(HaveOccurred()) - Expect(stdout).ShouldNot(BeEmpty()) - var result Result - marshallErr := json.Unmarshal([]byte(stdout), &result) - if marshallErr != nil { - fmt.Printf("Cannot unmarshall json: %s", marshallErr) - } - return result -} - -// Gets Prometheus metrics over the duration of the test and displays them -func getMetrics(start int64, end int64) { - warmMetric := "awscni_total_ip_addresses-awscni_assigned_ip_addresses" - noAddrsMetric := "awscni_err_no_avail_addrs" - netMetric := "awscni_assigned_ip_addresses" - duration := strDurationMin(start, end) - step := "30s" - - // warmMetric - netWarmUrl := fmt.Sprintf("http://%s/api/v1/query_range?query=%s&start=%v&end=%v&step=%s", - clusterIP, warmMetric, start, end, step) - resultNetWarm := callPrometheus(netWarmUrl) - fmt.Printf("\n %s", warmMetric) - netMap := make(map[string]int) - fmt.Printf("\nMAX Warm Pool (%v) over test duration: \n", warmMetric) - for i := 0; i < len(resultNetWarm.Data.Result); i++ { - node := resultNetWarm.Data.Result[i].Metric.Node - var maxArr []int - for j := 0; j < len(resultNetWarm.Data.Result[i].Values); j++ { - val, _ := strconv.Atoi(resultNetWarm.Data.Result[i].Values[j][1].(string)) - maxArr = append(maxArr, val) - if j == len(resultNetWarm.Data.Result[i].Values)-1 { - netMap[node] = val - } - } - fmt.Printf("%v : %v \n", node, slices.Max(maxArr)) - } - fmt.Printf("\nNET Warm Pool (%s) over test duration: \n", warmMetric) - for k, v := range netMap { - fmt.Printf("%v : %v \n", k, v) - } - - // noAddrsMetric - fmt.Printf("\n %s", noAddrsMetric) - noAddrUrl := fmt.Sprintf("http://%s/api/v1/query_range?query=delta(%s[%sm])&start=%v&end=%v&step=%s", - clusterIP, noAddrsMetric, duration, start, end, step) - resultNoAddrs := callPrometheus(noAddrUrl) - fmt.Printf("\nMAX DELTA %s over test duration: \n", noAddrsMetric) - for i := 0; i < len(resultNoAddrs.Data.Result); i++ { - node := resultNoAddrs.Data.Result[i].Metric.Node - var maxArr []int - for j := 0; j < len(resultNoAddrs.Data.Result[i].Values); j++ { - val := resultNoAddrs.Data.Result[i].Values[j][1].(string) - floatVal, err := strconv.ParseFloat(val, 64) - if err != nil { - Expect(err).ToNot(HaveOccurred()) - } - maxArr = append(maxArr, int(floatVal)) - } - fmt.Printf("%v : %v \n", node, slices.Max(maxArr)) - } - - // netMetric - fmt.Printf("\n %s", netMetric) - netUrl := fmt.Sprintf("http://%s/api/v1/query_range?query=delta(%s[%sm])&start=%v&end=%v&step=%s", - clusterIP, netMetric, duration, start, end, step) - resultNet := callPrometheus(netUrl) - fmt.Printf("\nMAX DELTA %s over test duration: \n", netMetric) - for i := 0; i < len(resultNet.Data.Result); i++ { - node := resultNet.Data.Result[i].Metric.Node - var maxArr []int - for j := 0; j < len(resultNet.Data.Result[i].Values); j++ { - val := resultNet.Data.Result[i].Values[j][1].(string) - floatVal, err := strconv.ParseFloat(val, 64) - if err != nil { - Expect(err).ToNot(HaveOccurred()) - } - maxArr = append(maxArr, int(floatVal)) - } - fmt.Printf("%v : %v \n", node, slices.Max(maxArr)) - } -} - -// Gets the duration in minutes for Prometheus queries -func strDurationMin(start int64, end int64) string { - duration := (end - start) / 60 - durationMin := strconv.FormatInt(duration, 10) - print("TEST DURATION: ", duration) - return durationMin -} - -// Random operation, if preventNoChange is 0 this includes no change being a result, otherwise it will add or subtract -func randOp(replicas int, pods int) (int, string) { - if preventNoChange == 0 { - op := rand.Intn(3) - if op == 0 { - return replicas + pods, "adding" - } - if op == 1 { - return replicas - pods, "subtracting" - } else { - return replicas, "no change" - } - } else { - op := rand.Intn(2) - if op == 0 { - return replicas + pods, "adding" - } else { - return replicas - pods, "subtracting" - } - } -} - -// Tries to get a random op/number combo that actually changes the cluster. If preventNoChange is above 0, will -// attempt to get another random integer to add/subtract that is within range. This is not always possible depending on -// what iterations and randDigits is set to, so it is best to set preventNoChange to a low number if it is set at all. -// If you want to see periods of no change, set this to 0. -func randOpLoop(replicas int) (int, string, int) { - result := 0 - op := "" - randPods := 0 - for i := 0; i < preventNoChange+1; i++ { - randPods = rand.Intn(randDigits) - result, op = randOp(replicas, randPods) - if result > minPods && result < maxPods && randPods != 0 { - return result, op, randPods - } - } - return result, op, randPods -} - -func quickScale(pods int) { - deploymentSpec := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). - Namespace("default"). - Name("busybox"). - NodeName(primaryNode.Name). - Namespace(utils.DefaultTestNamespace). - Replicas(pods). - Build() - - err := f.K8sResourceManagers. - DeploymentManager(). - UpdateAndWaitTillDeploymentIsReady(deploymentSpec, utils.DefaultDeploymentReadyTimeout*5) - Expect(err).ToNot(HaveOccurred()) - - time.Sleep(sleep) -} - -// Check on pod count outside deployment -func busyboxPodCnt() int { - podCount := 0 - podList, _ := f.K8sResourceManagers.PodManager().GetPodsWithLabelSelector("role", "test") - for _, _ = range podList.Items { - podCount += 1 - } - return podCount -} - -func checkInRange(result int) int { - replicas := result - replicas = max(replicas, minPods) - replicas = min(replicas, maxPods) - return replicas -} - -// Tries to prevent no scaling in the cluster as rand.Intn is inclusive with 0, so just scale 1 instead. -func incIf(pods int) int { - if pods == 0 && preventNoChange > 0 { - return 1 - } else { - return pods - } -} - -func max(x, y int) int { - if x < y { - return y - } - return x -} - -func min(x, y int) int { - if y < x { - return y - } - return x -}