From 3c0615dca2ad50c22e1a554da85ee66a6cb6c981 Mon Sep 17 00:00:00 2001 From: Avi-Robusta <97387909+Avi-Robusta@users.noreply.github.com> Date: Tue, 8 Aug 2023 17:07:51 +0300 Subject: [PATCH] Unified prometheus support (#121) * initial refactoring initial changes working version aws test added eks managed prom support coralogix prom support and fix prom bug fixed bug added all aws config changes added docs and fixed requirements copying connect changes from robusta fixing bad typing reformatting added prometrix updated prometrix version * rebase fixes * black formatting * refactored check connection * Fixing victoria metrics check * updating poetry file after rebase --- README.md | 32 ++++++ poetry.lock | 100 ++++++++++++++++-- pyproject.toml | 4 +- requirements.txt | 68 ++++++------ .../core/integrations/prometheus/__init__.py | 2 +- .../core/integrations/prometheus/loader.py | 6 +- .../prometheus/metrics/__init__.py | 4 +- .../integrations/prometheus/metrics/base.py | 2 +- .../integrations/prometheus/metrics/cpu.py | 2 +- .../integrations/prometheus/metrics/memory.py | 2 +- .../metrics_service/base_metric_service.py | 8 -- .../prometheus_metrics_service.py | 37 ++----- .../metrics_service/thanos_metrics_service.py | 11 +- .../victoria_metrics_service.py | 11 +- .../prometheus/prometheus_client.py | 91 ---------------- .../prometheus/prometheus_utils.py | 50 +++++++++ robusta_krr/core/models/config.py | 11 +- robusta_krr/core/runner.py | 4 +- robusta_krr/main.py | 49 +++++++++ 19 files changed, 294 insertions(+), 200 deletions(-) delete mode 100644 robusta_krr/core/integrations/prometheus/prometheus_client.py create mode 100644 robusta_krr/core/integrations/prometheus/prometheus_utils.py diff --git a/README.md b/README.md index 677354cb..e91856ba 100644 --- a/README.md +++ b/README.md @@ -377,6 +377,38 @@ Than run the following command with PROMETHEUS_URL substituted for your Azure Ma ```sh python krr.py simple --namespace default -p PROMETHEUS_URL --prometheus-auth-header "Bearer $AZURE_BEARER" ``` +
See here about configuring labels for centralized prometheus
+ + + +## EKS managed Prometheus + +For EKS managed Prometheus you need to add your prometheus link and the flag --eks-managed-prom and krr will automatically use your aws credentials + +```sh +python krr.py simple -p "https://aps-workspaces.REGION.amazonaws.com/workspaces/..." --eks-managed-prom +``` +Additional optional parameters are: +```sh +--eks-profile-name PROFILE_NAME_HERE # to specify the profile to use from your config +--eks-access-key ACCESS_KEY # to specify your access key +--eks-secret-key SECRET_KEY # to specify your secret key +--eks-service-name SERVICE_NAME # to use a specific service name in the signature +--eks-managed-prom-region REGION_NAME # to specify the region the prometheus is in +``` +See here about configuring labels for centralized prometheus
+ + + +## Coralogix managed Prometheus + +For Coralogix managed Prometheus you need to specify your prometheus link and add the flag coralogix_token with your Logs Query Key + +```sh +python krr.py simple -p "https://prom-api.coralogix..." --coralogix_token +``` + +See here about configuring labels for centralized prometheus
diff --git a/poetry.lock b/poetry.lock index 0e1b4132..5dbdfd46 100644 --- a/poetry.lock +++ b/poetry.lock @@ -94,6 +94,44 @@ d = ["aiohttp (>=3.7.4)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] +[[package]] +name = "boto3" +version = "1.28.15" +description = "The AWS SDK for Python" +optional = false +python-versions = ">= 3.7" +files = [ + {file = "boto3-1.28.15-py3-none-any.whl", hash = "sha256:84b7952858e9319968b0348d9894a91a6bb5f31e81a45c68044d040a12362abe"}, + {file = "boto3-1.28.15.tar.gz", hash = "sha256:a6e711e0b6960c3a5b789bd30c5a18eea7263f2a59fc07f85efa5e04804e49d2"}, +] + +[package.dependencies] +botocore = ">=1.31.15,<1.32.0" +jmespath = ">=0.7.1,<2.0.0" +s3transfer = ">=0.6.0,<0.7.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "botocore" +version = "1.31.15" +description = "Low-level, data-driven core of boto 3." +optional = false +python-versions = ">= 3.7" +files = [ + {file = "botocore-1.31.15-py3-none-any.whl", hash = "sha256:b3a0f787f275711875476cbe12a0123b2e6570b2f505e2fa509dcec3c5410b57"}, + {file = "botocore-1.31.15.tar.gz", hash = "sha256:b46d1ce4e0cf42d28fdf61ce0c999904645d38b51cb809817a361c0cec16d487"}, +] + +[package.dependencies] +jmespath = ">=0.7.1,<2.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = ">=1.25.4,<1.27" + +[package.extras] +crt = ["awscrt (==0.16.26)"] + [[package]] name = "cachetools" version = "5.3.1" @@ -525,6 +563,17 @@ pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib" plugins = ["setuptools"] requirements-deprecated-finder = ["pip-api", "pipreqs"] +[[package]] +name = "jmespath" +version = "1.0.1" +description = "JSON Matching Expressions" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, + {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, +] + [[package]] name = "kiwisolver" version = "1.4.4" @@ -1040,6 +1089,23 @@ numpy = "*" pandas = ">=1.4.0" requests = "*" +[[package]] +name = "prometrix" +version = "0.1.10" +description = "" +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "prometrix-0.1.10-py3-none-any.whl", hash = "sha256:5caa0ee06d49d7ad1f881614edb81fe4b2f47f730bcd4286209627fdc41d550d"}, + {file = "prometrix-0.1.10.tar.gz", hash = "sha256:9ed61c0b77b503d38ce9c66d70742ad81a84b7afc6fbf63e8dbba9316b41a4df"}, +] + +[package.dependencies] +boto3 = ">=1.28.15,<2.0.0" +botocore = ">=1.31.15,<2.0.0" +prometheus-api-client = ">=0.5.3,<0.6.0" +pydantic = ">=1.8.1,<2.0.0" + [[package]] name = "pyasn1" version = "0.5.0" @@ -1486,6 +1552,23 @@ files = [ [package.dependencies] pyasn1 = ">=0.1.3" +[[package]] +name = "s3transfer" +version = "0.6.1" +description = "An Amazon S3 Transfer Manager" +optional = false +python-versions = ">= 3.7" +files = [ + {file = "s3transfer-0.6.1-py3-none-any.whl", hash = "sha256:3c0da2d074bf35d6870ef157158641178a4204a6e689e82546083e31e0311346"}, + {file = "s3transfer-0.6.1.tar.gz", hash = "sha256:640bb492711f4c0c0905e1f62b6aaeb771881935ad27884852411f8e9cacbca9"}, +] + +[package.dependencies] +botocore = ">=1.12.36,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] + [[package]] name = "setuptools" version = "68.0.0" @@ -1661,20 +1744,19 @@ devenv = ["black", "check-manifest", "flake8", "pyroma", "pytest (>=4.3)", "pyte [[package]] name = "urllib3" -version = "2.0.4" +version = "1.26.16" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false -python-versions = ">=3.7" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ - {file = "urllib3-2.0.4-py3-none-any.whl", hash = "sha256:de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4"}, - {file = "urllib3-2.0.4.tar.gz", hash = "sha256:8d22f86aae8ef5e410d4f539fde9ce6b2113a001bb4d189e0aed70642d602b11"}, + {file = "urllib3-1.26.16-py2.py3-none-any.whl", hash = "sha256:8d36afa7616d8ab714608411b4a3b13e58f463aee519024578e062e141dce20f"}, + {file = "urllib3-1.26.16.tar.gz", hash = "sha256:8f135f6502756bde6b2a9b28989df5fbe87c9970cecaa69041edcce7f0589b14"}, ] [package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] name = "websocket-client" @@ -1710,4 +1792,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.12" -content-hash = "936ab8c06cf4a6e5cc8c3a8cbe6bb50ff8edf019e11cc9a1e20847505a8b62a5" +content-hash = "bcef0de696e4fbf7bd3140cc79c6d72fdbf3d39c20b7101c55c9613a1cd56a40" diff --git a/pyproject.toml b/pyproject.toml index ef99a74f..a4f0cca7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,9 +27,11 @@ python = ">=3.9,<3.12" typer = {extras = ["all"], version = "^0.7.0"} pydantic = "1.10.7" kubernetes = "^26.1.0" -prometheus-api-client = "^0.5.3" numpy = "^1.24.2" alive-progress = "^3.1.2" +botocore = "^1.31.10" +boto3 = "^1.28.10" +prometrix = "^0.1.10" [tool.poetry.group.dev.dependencies] diff --git a/requirements.txt b/requirements.txt index 907cfede..f887a5e9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,52 +1,54 @@ about-time==4.2.1 ; python_version >= "3.9" and python_version < "3.12" -aiostream==0.4.5 ; python_version >= "3.9" and python_version < "3.12" -alive-progress==3.1.2 ; python_version >= "3.9" and python_version < "3.12" -cachetools==5.3.0 ; python_version >= "3.9" and python_version < "3.12" -certifi==2022.12.7 ; python_version >= "3.9" and python_version < "3.12" -charset-normalizer==3.0.1 ; python_version >= "3.9" and python_version < "3.12" -click==8.1.3 ; python_version >= "3.9" and python_version < "3.12" -colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.12" +alive-progress==3.1.4 ; python_version >= "3.9" and python_version < "3.12" +boto3==1.28.15 ; python_version >= "3.9" and python_version < "3.12" +botocore==1.31.15 ; python_version >= "3.9" and python_version < "3.12" +cachetools==5.3.1 ; python_version >= "3.9" and python_version < "3.12" +certifi==2023.7.22 ; python_version >= "3.9" and python_version < "3.12" +charset-normalizer==3.2.0 ; python_version >= "3.9" and python_version < "3.12" +click==8.1.6 ; python_version >= "3.9" and python_version < "3.12" +colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.12" and platform_system == "Windows" commonmark==0.9.1 ; python_version >= "3.9" and python_version < "3.12" -contourpy==1.0.7 ; python_version >= "3.9" and python_version < "3.12" +contourpy==1.1.0 ; python_version >= "3.9" and python_version < "3.12" cycler==0.11.0 ; python_version >= "3.9" and python_version < "3.12" -dateparser==1.1.7 ; python_version >= "3.9" and python_version < "3.12" -fonttools==4.39.0 ; python_version >= "3.9" and python_version < "3.12" -google-auth==2.16.2 ; python_version >= "3.9" and python_version < "3.12" +dateparser==1.1.8 ; python_version >= "3.9" and python_version < "3.12" +fonttools==4.42.0 ; python_version >= "3.9" and python_version < "3.12" +google-auth==2.17.3 ; python_version >= "3.9" and python_version < "3.12" grapheme==0.6.0 ; python_version >= "3.9" and python_version < "3.12" httmock==1.4.0 ; python_version >= "3.9" and python_version < "3.12" idna==3.4 ; python_version >= "3.9" and python_version < "3.12" -importlib-resources==5.12.0 ; python_version >= "3.9" and python_version < "3.10" +importlib-resources==6.0.0 ; python_version >= "3.9" and python_version < "3.10" +jmespath==1.0.1 ; python_version >= "3.9" and python_version < "3.12" kiwisolver==1.4.4 ; python_version >= "3.9" and python_version < "3.12" kubernetes==26.1.0 ; python_version >= "3.9" and python_version < "3.12" -matplotlib==3.7.1 ; python_version >= "3.9" and python_version < "3.12" -numpy==1.24.2 ; python_version >= "3.9" and python_version < "3.12" +matplotlib==3.7.2 ; python_version >= "3.9" and python_version < "3.12" +numpy==1.25.2 ; python_version >= "3.9" and python_version < "3.12" oauthlib==3.2.2 ; python_version >= "3.9" and python_version < "3.12" -packaging==23.0 ; python_version >= "3.9" and python_version < "3.12" -pandas==1.5.3 ; python_version >= "3.9" and python_version < "3.12" -pillow==9.4.0 ; python_version >= "3.9" and python_version < "3.12" +packaging==23.1 ; python_version >= "3.9" and python_version < "3.12" +pandas==2.0.3 ; python_version >= "3.9" and python_version < "3.12" +pillow==10.0.0 ; python_version >= "3.9" and python_version < "3.12" prometheus-api-client==0.5.3 ; python_version >= "3.9" and python_version < "3.12" -pyasn1-modules==0.2.8 ; python_version >= "3.9" and python_version < "3.12" -pyasn1==0.4.8 ; python_version >= "3.9" and python_version < "3.12" +prometrix==0.1.10 ; python_version >= "3.9" and python_version < "3.12" +pyasn1-modules==0.3.0 ; python_version >= "3.9" and python_version < "3.12" +pyasn1==0.5.0 ; python_version >= "3.9" and python_version < "3.12" pydantic==1.10.7 ; python_version >= "3.9" and python_version < "3.12" -pygments==2.14.0 ; python_version >= "3.9" and python_version < "3.12" +pygments==2.15.1 ; python_version >= "3.9" and python_version < "3.12" pyparsing==3.0.9 ; python_version >= "3.9" and python_version < "3.12" python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "3.12" -pytz-deprecation-shim==0.1.0.post0 ; python_version >= "3.9" and python_version < "3.12" -pytz==2022.7.1 ; python_version >= "3.9" and python_version < "3.12" -pyyaml==6.0 ; python_version >= "3.9" and python_version < "3.12" -regex==2022.10.31 ; python_version >= "3.9" and python_version < "3.12" +pytz==2023.3 ; python_version >= "3.9" and python_version < "3.12" +pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.12" +regex==2023.6.3 ; python_version >= "3.9" and python_version < "3.12" requests-oauthlib==1.3.1 ; python_version >= "3.9" and python_version < "3.12" -requests==2.28.2 ; python_version >= "3.9" and python_version < "3.12" +requests==2.31.0 ; python_version >= "3.9" and python_version < "3.12" rich==12.6.0 ; python_version >= "3.9" and python_version < "3.12" rsa==4.9 ; python_version >= "3.9" and python_version < "3.12" -setuptools==67.4.0 ; python_version >= "3.9" and python_version < "3.12" +s3transfer==0.6.1 ; python_version >= "3.9" and python_version < "3.12" +setuptools==68.0.0 ; python_version >= "3.9" and python_version < "3.12" shellingham==1.5.0.post1 ; python_version >= "3.9" and python_version < "3.12" six==1.16.0 ; python_version >= "3.9" and python_version < "3.12" typer[all]==0.7.0 ; python_version >= "3.9" and python_version < "3.12" -typing-extensions==4.5.0 ; python_version >= "3.9" and python_version < "3.12" -tzdata==2022.7 ; python_version >= "3.9" and python_version < "3.12" -tzlocal==4.2 ; python_version >= "3.9" and python_version < "3.12" -urllib3==1.26.14 ; python_version >= "3.9" and python_version < "3.12" -websocket-client==1.5.1 ; python_version >= "3.9" and python_version < "3.12" -zipp==3.15.0 ; python_version >= "3.9" and python_version < "3.10" -slack-sdk==3.21.3 ; python_version >= "3.9" and python_version < "3.12" +typing-extensions==4.7.1 ; python_version >= "3.9" and python_version < "3.12" +tzdata==2023.3 ; python_version >= "3.9" and python_version < "3.12" +tzlocal==5.0.1 ; python_version >= "3.9" and python_version < "3.12" +urllib3==1.26.16 ; python_version >= "3.9" and python_version < "3.12" +websocket-client==1.6.1 ; python_version >= "3.9" and python_version < "3.12" +zipp==3.16.2 ; python_version >= "3.9" and python_version < "3.10" diff --git a/robusta_krr/core/integrations/prometheus/__init__.py b/robusta_krr/core/integrations/prometheus/__init__.py index e7e545b2..cedf1c0b 100644 --- a/robusta_krr/core/integrations/prometheus/__init__.py +++ b/robusta_krr/core/integrations/prometheus/__init__.py @@ -1,3 +1,3 @@ from .loader import PrometheusMetricsLoader from .metrics_service.prometheus_metrics_service import PrometheusDiscovery, PrometheusNotFound -from .prometheus_client import CustomPrometheusConnect, ClusterNotSpecifiedException +from .prometheus_utils import ClusterNotSpecifiedException diff --git a/robusta_krr/core/integrations/prometheus/loader.py b/robusta_krr/core/integrations/prometheus/loader.py index ca0d6f1e..70d839ad 100644 --- a/robusta_krr/core/integrations/prometheus/loader.py +++ b/robusta_krr/core/integrations/prometheus/loader.py @@ -2,21 +2,21 @@ import datetime from concurrent.futures import ThreadPoolExecutor -from typing import Optional, TYPE_CHECKING +from typing import TYPE_CHECKING, Optional from kubernetes import config as k8s_config from kubernetes.client.api_client import ApiClient +from prometrix import MetricsNotFound, PrometheusNotFound from robusta_krr.core.models.objects import K8sObjectData from robusta_krr.utils.configurable import Configurable -from .metrics_service.base_metric_service import MetricsNotFound from .metrics_service.prometheus_metrics_service import PrometheusMetricsService, PrometheusNotFound from .metrics_service.thanos_metrics_service import ThanosMetricsService from .metrics_service.victoria_metrics_service import VictoriaMetricsService if TYPE_CHECKING: - from robusta_krr.core.abstract.strategies import MetricsPodData, BaseStrategy + from robusta_krr.core.abstract.strategies import BaseStrategy, MetricsPodData from robusta_krr.core.models.config import Config METRICS_SERVICES = { diff --git a/robusta_krr/core/integrations/prometheus/metrics/__init__.py b/robusta_krr/core/integrations/prometheus/metrics/__init__.py index daee396a..6212a35c 100644 --- a/robusta_krr/core/integrations/prometheus/metrics/__init__.py +++ b/robusta_krr/core/integrations/prometheus/metrics/__init__.py @@ -1,3 +1,3 @@ -from .cpu import CPULoader, MaxCPULoader, PercentileCPULoader -from .memory import MemoryLoader, MaxMemoryLoader, PercentileMemoryLoader from .base import PrometheusMetric +from .cpu import CPULoader, MaxCPULoader, PercentileCPULoader +from .memory import MaxMemoryLoader, MemoryLoader, PercentileMemoryLoader diff --git a/robusta_krr/core/integrations/prometheus/metrics/base.py b/robusta_krr/core/integrations/prometheus/metrics/base.py index d076ff47..709be92f 100644 --- a/robusta_krr/core/integrations/prometheus/metrics/base.py +++ b/robusta_krr/core/integrations/prometheus/metrics/base.py @@ -5,7 +5,7 @@ import datetime import enum from concurrent.futures import ThreadPoolExecutor -from typing import Any, TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Any, Optional import numpy as np import pydantic as pd diff --git a/robusta_krr/core/integrations/prometheus/metrics/cpu.py b/robusta_krr/core/integrations/prometheus/metrics/cpu.py index 3aab7b48..f4b9058f 100644 --- a/robusta_krr/core/integrations/prometheus/metrics/cpu.py +++ b/robusta_krr/core/integrations/prometheus/metrics/cpu.py @@ -1,6 +1,6 @@ from robusta_krr.core.models.objects import K8sObjectData -from .base import QueryMetric, QueryRangeMetric, FilterMetric +from .base import FilterMetric, QueryMetric, QueryRangeMetric class CPULoader(QueryRangeMetric, FilterMetric): diff --git a/robusta_krr/core/integrations/prometheus/metrics/memory.py b/robusta_krr/core/integrations/prometheus/metrics/memory.py index 5fc6f732..d8779f25 100644 --- a/robusta_krr/core/integrations/prometheus/metrics/memory.py +++ b/robusta_krr/core/integrations/prometheus/metrics/memory.py @@ -1,6 +1,6 @@ from robusta_krr.core.models.objects import K8sObjectData -from .base import QueryMetric, QueryRangeMetric, FilterMetric +from .base import FilterMetric, QueryMetric, QueryRangeMetric class MemoryLoader(QueryRangeMetric, FilterMetric): diff --git a/robusta_krr/core/integrations/prometheus/metrics_service/base_metric_service.py b/robusta_krr/core/integrations/prometheus/metrics_service/base_metric_service.py index b0adfb74..8b4beef5 100644 --- a/robusta_krr/core/integrations/prometheus/metrics_service/base_metric_service.py +++ b/robusta_krr/core/integrations/prometheus/metrics_service/base_metric_service.py @@ -13,14 +13,6 @@ from ..metrics import PrometheusMetric -class MetricsNotFound(Exception): - """ - An exception raised when Metrics service is not found. - """ - - pass - - class MetricsService(Configurable, abc.ABC): def __init__( self, diff --git a/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py b/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py index b2cc1357..54805124 100644 --- a/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py +++ b/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py @@ -1,11 +1,12 @@ import asyncio import datetime import time -from typing import List, Optional from concurrent.futures import ThreadPoolExecutor +from typing import List, Optional from kubernetes.client import ApiClient from prometheus_api_client import PrometheusApiClientException +from prometrix import PrometheusNotFound, get_custom_prometheus_connect from requests.exceptions import ConnectionError, HTTPError from robusta_krr.core.abstract.strategies import PodsTimeData @@ -14,8 +15,8 @@ from robusta_krr.utils.service_discovery import MetricsServiceDiscovery from ..metrics import PrometheusMetric -from ..prometheus_client import ClusterNotSpecifiedException, CustomPrometheusConnect -from .base_metric_service import MetricsNotFound, MetricsService +from ..prometheus_utils import ClusterNotSpecifiedException, generate_prometheus_config +from .base_metric_service import MetricsService class PrometheusDiscovery(MetricsServiceDiscovery): @@ -41,14 +42,6 @@ def find_metrics_url(self, *, api_client: Optional[ApiClient] = None) -> Optiona ) -class PrometheusNotFound(MetricsNotFound): - """ - An exception raised when Prometheus is not found. - """ - - pass - - class PrometheusMetricsService(MetricsService): """ A class for fetching metrics from Prometheus. @@ -90,8 +83,10 @@ def __init__( headers |= {"Authorization": self.auth_header} elif not self.config.inside_cluster and self.api_client is not None: self.api_client.update_params_for_auth(headers, {}, ["BearerToken"]) - - self.prometheus = CustomPrometheusConnect(url=self.url, disable_ssl=not self.ssl_enabled, headers=headers) + self.prom_config = generate_prometheus_config( + config, url=self.url, headers=headers, metrics_service=self + ) + self.prometheus = get_custom_prometheus_connect(self.prom_config) def check_connection(self): """ @@ -99,20 +94,8 @@ def check_connection(self): Raises: PrometheusNotFound: If the connection to Prometheus cannot be established. """ - try: - response = self.prometheus._session.get( - f"{self.prometheus.url}/api/v1/query", - verify=self.prometheus.ssl_verification, - headers=self.prometheus.headers, - # This query should return empty results, but is correct - params={"query": "example"}, - ) - response.raise_for_status() - except (ConnectionError, HTTPError) as e: - raise PrometheusNotFound( - f"Couldn't connect to Prometheus found under {self.prometheus.url}\nCaused by {e.__class__.__name__}: {e})" - ) from e - + self.prometheus.check_prometheus_connection() + async def query(self, query: str) -> dict: loop = asyncio.get_running_loop() return await loop.run_in_executor(self.executor, lambda: self.prometheus.custom_query(query=query)) diff --git a/robusta_krr/core/integrations/prometheus/metrics_service/thanos_metrics_service.py b/robusta_krr/core/integrations/prometheus/metrics_service/thanos_metrics_service.py index 3b7ef851..eaf16201 100644 --- a/robusta_krr/core/integrations/prometheus/metrics_service/thanos_metrics_service.py +++ b/robusta_krr/core/integrations/prometheus/metrics_service/thanos_metrics_service.py @@ -1,10 +1,11 @@ from typing import Optional from kubernetes.client import ApiClient +from prometrix import MetricsNotFound, ThanosMetricsNotFound from robusta_krr.utils.service_discovery import MetricsServiceDiscovery -from .prometheus_metrics_service import MetricsNotFound, PrometheusMetricsService +from .prometheus_metrics_service import PrometheusMetricsService class ThanosMetricsDiscovery(MetricsServiceDiscovery): @@ -27,14 +28,6 @@ def find_metrics_url(self, *, api_client: Optional[ApiClient] = None) -> Optiona ) -class ThanosMetricsNotFound(MetricsNotFound): - """ - An exception raised when Thanos is not found. - """ - - pass - - class ThanosMetricsService(PrometheusMetricsService): """ A class for fetching metrics from Thanos. diff --git a/robusta_krr/core/integrations/prometheus/metrics_service/victoria_metrics_service.py b/robusta_krr/core/integrations/prometheus/metrics_service/victoria_metrics_service.py index 925136a6..a0f10100 100644 --- a/robusta_krr/core/integrations/prometheus/metrics_service/victoria_metrics_service.py +++ b/robusta_krr/core/integrations/prometheus/metrics_service/victoria_metrics_service.py @@ -1,10 +1,11 @@ from typing import Optional from kubernetes.client import ApiClient +from prometrix import MetricsNotFound, VictoriaMetricsNotFound from robusta_krr.utils.service_discovery import MetricsServiceDiscovery -from .prometheus_metrics_service import MetricsNotFound, PrometheusMetricsService +from .prometheus_metrics_service import PrometheusMetricsService class VictoriaMetricsDiscovery(MetricsServiceDiscovery): @@ -26,14 +27,6 @@ def find_metrics_url(self, *, api_client: Optional[ApiClient] = None) -> Optiona ) -class VictoriaMetricsNotFound(MetricsNotFound): - """ - An exception raised when Victoria Metrics is not found. - """ - - pass - - class VictoriaMetricsService(PrometheusMetricsService): """ A class for fetching metrics from Victoria Metrics. diff --git a/robusta_krr/core/integrations/prometheus/prometheus_client.py b/robusta_krr/core/integrations/prometheus/prometheus_client.py deleted file mode 100644 index ac93608a..00000000 --- a/robusta_krr/core/integrations/prometheus/prometheus_client.py +++ /dev/null @@ -1,91 +0,0 @@ -from typing import no_type_check - -import requests -from datetime import datetime -from prometheus_api_client import PrometheusConnect, Retry, PrometheusApiClientException -from requests.adapters import HTTPAdapter - - -class ClusterNotSpecifiedException(Exception): - """ - An exception raised when a prometheus requires a cluster label but an invalid one is provided. - """ - - pass - - -class CustomPrometheusConnect(PrometheusConnect): - """ - Custom PrometheusConnect class to handle retries. - """ - - @no_type_check - def __init__( - self, - url: str = "http://127.0.0.1:9090", - headers: dict = None, - disable_ssl: bool = False, - retry: Retry = None, - auth: tuple = None, - ): - super().__init__(url, headers, disable_ssl, retry, auth) - self._session = requests.Session() - self._session.mount(self.url, HTTPAdapter(max_retries=retry, pool_maxsize=10, pool_block=True)) - - @no_type_check - def custom_query(self, query: str, params: dict = None): - params = params or {} - data = None - query = str(query) - # using the query API to get raw data - response = self._session.post( - "{0}/api/v1/query".format(self.url), - data={"query": query, **params}, - verify=self.ssl_verification, - headers=self.headers, - auth=self.auth, - ) - if response.status_code == 200: - data = response.json()["data"]["result"] - else: - raise PrometheusApiClientException( - "HTTP Status Code {} ({!r})".format(response.status_code, response.content) - ) - - return data - - @no_type_check - def custom_query_range( - self, - query: str, - start_time: datetime, - end_time: datetime, - step: str, - params: dict = None, - ): - start = round(start_time.timestamp()) - end = round(end_time.timestamp()) - params = params or {} - data = None - query = str(query) - # using the query_range API to get raw data - response = self._session.post( - "{0}/api/v1/query_range".format(self.url), - data={ - "query": query, - "start": start, - "end": end, - "step": step, - **params, - }, - verify=self.ssl_verification, - headers=self.headers, - auth=self.auth, - ) - if response.status_code == 200: - data = response.json()["data"]["result"] - else: - raise PrometheusApiClientException( - "HTTP Status Code {} ({!r})".format(response.status_code, response.content) - ) - return data diff --git a/robusta_krr/core/integrations/prometheus/prometheus_utils.py b/robusta_krr/core/integrations/prometheus/prometheus_utils.py new file mode 100644 index 00000000..d4c6fd40 --- /dev/null +++ b/robusta_krr/core/integrations/prometheus/prometheus_utils.py @@ -0,0 +1,50 @@ +import boto3 +from prometrix import AWSPrometheusConfig, CoralogixPrometheusConfig, PrometheusConfig, VictoriaMetricsPrometheusConfig + +from robusta_krr.core.models.config import Config + + +class ClusterNotSpecifiedException(Exception): + """ + An exception raised when a prometheus requires a cluster label but an invalid one is provided. + """ + + pass + + +def generate_prometheus_config( + config: Config, url: str, headers: dict[str, str], metrics_service: "PrometheusMetricsService" +) -> PrometheusConfig: + from .metrics_service.victoria_metrics_service import VictoriaMetricsService + + baseconfig = { + "url": url, + "disable_ssl": not config.prometheus_ssl_enabled, + "headers": headers, + } + + # aws config + if config.eks_managed_prom: + session = boto3.Session(profile_name=config.eks_managed_prom_profile_name) + credentials = session.get_credentials() + credentials = credentials.get_frozen_credentials() + region = config.eks_managed_prom_region if config.eks_managed_prom_region else session.region_name + access_key = config.eks_access_key if config.eks_access_key else credentials.access_key + secret_key = config.eks_secret_key if config.eks_secret_key else credentials.secret_key + service_name = config.eks_service_name if config.eks_secret_key else "aps" + if not region: + raise Exception("No eks region specified") + + return AWSPrometheusConfig( + access_key=access_key, + secret_access_key=secret_key, + aws_region=region, + service_name=service_name, + **baseconfig, + ) + # coralogix config + if config.coralogix_token: + return CoralogixPrometheusConfig(**baseconfig, prometheus_token=config.coralogix_token) + if isinstance(metrics_service, VictoriaMetricsService): + return VictoriaMetricsPrometheusConfig(**baseconfig) + return PrometheusConfig(**baseconfig) diff --git a/robusta_krr/core/models/config.py b/robusta_krr/core/models/config.py index 7354a3da..70ae7a9f 100644 --- a/robusta_krr/core/models/config.py +++ b/robusta_krr/core/models/config.py @@ -19,8 +19,8 @@ class Config(pd.BaseSettings): selector: Optional[str] = None # Value settings - cpu_min_value: int = pd.Field(5, ge=0) # in millicores - memory_min_value: int = pd.Field(10, ge=0) # in megabytes + cpu_min_value: int = pd.Field(100, ge=0) # in millicores + memory_min_value: int = pd.Field(100, ge=0) # in megabytes # Prometheus Settings prometheus_url: Optional[str] = pd.Field(None) @@ -29,6 +29,13 @@ class Config(pd.BaseSettings): prometheus_ssl_enabled: bool = pd.Field(False) prometheus_cluster_label: Optional[str] = pd.Field(None) prometheus_label: Optional[str] = pd.Field(None) + eks_managed_prom: bool = pd.Field(False) + eks_managed_prom_profile_name: Optional[str] = pd.Field(None) + eks_access_key: Optional[str] = pd.Field(None) + eks_secret_key: Optional[str] = pd.Field(None) + eks_service_name: Optional[str] = pd.Field(None) + eks_managed_prom_region: Optional[str] = pd.Field(None) + coralogix_token: Optional[str] = pd.Field(None) # Threading settings max_workers: int = pd.Field(6, ge=1) diff --git a/robusta_krr/core/runner.py b/robusta_krr/core/runner.py index 26ac3b15..f419c7cb 100644 --- a/robusta_krr/core/runner.py +++ b/robusta_krr/core/runner.py @@ -9,10 +9,10 @@ from robusta_krr.core.abstract.strategies import ResourceRecommendation, RunResult from robusta_krr.core.integrations.kubernetes import KubernetesLoader from robusta_krr.core.integrations.prometheus import ( - ClusterNotSpecifiedException, PrometheusMetricsLoader, - PrometheusNotFound, + ClusterNotSpecifiedException, ) +from prometrix import PrometheusNotFound from robusta_krr.core.models.config import Config from robusta_krr.core.models.objects import K8sObjectData from robusta_krr.core.models.result import ( diff --git a/robusta_krr/main.py b/robusta_krr/main.py index 0b460d6c..5fbe4566 100644 --- a/robusta_krr/main.py +++ b/robusta_krr/main.py @@ -118,6 +118,48 @@ def {func_name}( help="The label in prometheus used to differentiate clusters. (Only relevant for centralized prometheus)", rich_help_panel="Prometheus Settings", ), + eks_managed_prom: bool = typer.Option( + False, + "--eks-managed-prom", + help="Adds additional signitures for eks prometheus connection.", + rich_help_panel="Prometheus EKS Settings", + ), + eks_managed_prom_profile_name: Optional[str] = typer.Option( + None, + "--eks-profile-name", + help="Sets the profile name for eks prometheus connection.", + rich_help_panel="Prometheus EKS Settings", + ), + eks_access_key: Optional[str] = typer.Option( + None, + "--eks-access-key", + help="Sets the access key for eks prometheus connection.", + rich_help_panel="Prometheus EKS Settings", + ), + eks_secret_key: Optional[str] = typer.Option( + None, + "--eks-secret-key", + help="Sets the secret key for eks prometheus connection.", + rich_help_panel="Prometheus EKS Settings", + ), + eks_service_name: Optional[str] = typer.Option( + "aps", + "--eks-service-name", + help="Sets the service name for eks prometheus connection.", + rich_help_panel="Prometheus EKS Settings", + ), + eks_managed_prom_region: Optional[str] = typer.Option( + None, + "--eks-managed-prom-region", + help="Sets the region for eks prometheus connection.", + rich_help_panel="Prometheus EKS Settings", + ), + coralogix_token: Optional[str] = typer.Option( + None, + "--coralogix-token", + help="Adds the token needed to query Coralogix managed prometheus.", + rich_help_panel="Prometheus Coralogix Settings", + ), max_workers: int = typer.Option( 10, "--max-workers", @@ -146,6 +188,13 @@ def {func_name}( prometheus_ssl_enabled=prometheus_ssl_enabled, prometheus_cluster_label=prometheus_cluster_label, prometheus_label=prometheus_label, + eks_managed_prom=eks_managed_prom, + eks_managed_prom_region=eks_managed_prom_region, + eks_managed_prom_profile_name=eks_managed_prom_profile_name, + eks_access_key=eks_access_key, + eks_secret_key=eks_secret_key, + eks_service_name=eks_service_name, + coralogix_token=coralogix_token, max_workers=max_workers, format=format, verbose=verbose,