Skip to content

Commit

Permalink
Update data_visualizations.py and visualization.py
Browse files Browse the repository at this point in the history
  • Loading branch information
joaquinsgi committed Sep 12, 2024
1 parent d7d0628 commit 189bd23
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def get_script_arguments():
help=f'Path to Json with Columns to Plot. Default {None}.')
parser.add_argument('-u', '--unify', dest='unify', action='store_true',
help='Unify data of the binary processes with their subprocesses to plot.')
parser.add_argument('-x', help="Title of the generated chart, add extra info here.", type=str, dest='plot_title')

return parser.parse_args()

Expand All @@ -67,6 +68,7 @@ def main():
'dataframes_paths': options.csv_list,
'store_path': options.destination,
'base_name': options.name
'plot_title': options.plot_title
}

strategy = target
Expand Down
64 changes: 43 additions & 21 deletions deps/wazuh_testing/wazuh_testing/tools/performance/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,22 @@ class DataVisualizer(ABC):
dataframe (pandas.Dataframe): dataframe containing the info from all the CSVs.
store_path (str): path to store the CSV images. Defaults to the temp directory.
base_name (str, optional): base name used to store the images.
plot_title (str, optional): Title for the generated plots.
"""

def __init__(self, dataframes_paths, store_path=gettempdir(), base_name=None):
def __init__(self, dataframes_paths, store_path=gettempdir(), base_name=None, plot_title=None):
"""Initializes the DataVisualizer.
Args:
dataframes_paths (list): List of paths to CSV files.
store_path (str, optional): Path to store the CSV images. Defaults to the temp directory.
base_name (str, optional): Base name used to store the images.
plot_title (str, optional): Title for the generated plots.
"""
self.dataframes_paths = dataframes_paths
self.store_path = store_path
self.base_name = base_name
self.plot_title = plot_title
self.dataframe = pd.DataFrame()

self._load_dataframes()
Expand Down Expand Up @@ -198,6 +201,7 @@ class BinaryDatavisualizer(DataVisualizer):
dataframe (pandas.Dataframe): dataframe containing the info from all the CSVs.
store_path (str): path to store the CSV images. Defaults to the temp directory.
base_name (str, optional): base name used to store the images.
plot_title (str, optional): Title for the generated plots.
binary_metrics_fields_to_plot (list): List of binary metrics fields to plot.
binary_metrics_extra_fields (list): List of additional binary metrics fields.
binary_metrics_fields (list): Combined list of binary metrics fields.
Expand All @@ -209,16 +213,17 @@ class BinaryDatavisualizer(DataVisualizer):
binary_metrics_extra_fields = ["Daemon", "Version", "PID"]
binary_metrics_fields = binary_metrics_fields_to_plot + binary_metrics_extra_fields

def __init__(self, dataframes_paths, store_path=gettempdir(), base_name=None, unify_child_daemon_metrics=False):
def __init__(self, dataframes_paths, store_path=gettempdir(), base_name=None, unify_child_daemon_metrics=False, plot_title=None):
"""Initialize the BinaryDatavisualizer.
Args:
dataframes (list): List of dataframes containing binary metrics data.
store_path (str, optional): Path to store visualizations. Defaults to system temp directory.
base_name (str, optional): Base name for saved visualizations. Defaults to None.
plot_title (str, optional): Title for the generated plots.
unify_child_daemon_metrics (bool, optional): Whether to unify child daemon metrics. Defaults to False.
"""
super().__init__(dataframes_paths, store_path, base_name)
super().__init__(dataframes_paths, store_path, base_name, plot_title)
self._validate_dataframe()
if unify_child_daemon_metrics:
self.dataframe = self.dataframe.reset_index(drop=False)
Expand Down Expand Up @@ -302,6 +307,7 @@ def plot(self):
This method creates and saves plots for each binary metric field.
"""
p_title = self.plot_title.replace('<<TAB>>', ' ')
columns_to_plot = self._get_fields_to_plot()
for element in columns_to_plot:
_, ax = plt.subplots()
Expand All @@ -311,7 +317,7 @@ def plot(self):
self._basic_plot(ax, self.dataframe[self.dataframe.Daemon == daemon][element],
label=daemon, color=color)

self._save_custom_plot(ax, element, element)
self._save_custom_plot(ax, element, p_title)


class DaemonStatisticsVisualizer(DataVisualizer):
Expand All @@ -322,6 +328,7 @@ class DaemonStatisticsVisualizer(DataVisualizer):
dataframe (pandas.Dataframe): dataframe containing the info from all the CSVs.
store_path (str): path to store the CSV images. Defaults to the temp directory.
base_name (str, optional): base name used to store the images.
plot_title (str, optional): Title for the generated plots.
daemon (str): Name of the daemon for which statistics are visualized.
plots_data (dict): Data required for plotting statistics.
expected_fields (list): List of expected fields for the daemon statistics.
Expand All @@ -331,17 +338,18 @@ class DaemonStatisticsVisualizer(DataVisualizer):
statistics_plot_data_directory = join(dirname(realpath(__file__)), '..', '..', 'data', 'data_visualizer')
statistics_filename_suffix = '_csv_headers.json'

def __init__(self, dataframes_paths, daemon, store_path=gettempdir(), base_name=None):
def __init__(self, dataframes_paths, daemon, store_path=gettempdir(), base_name=None, plot_title=None):
"""Initialize the DaemonStatisticsVisualizer.
Args:
dataframes (list): List of dataframes containing daemon statistics data.
daemon (str): Name of the daemon for which statistics are visualized.
store_path (str, optional): Path to store visualizations. Defaults to system temp directory.
plot_title (str, optional): Title for the generated plots.
base_name (str, optional): Base name for saved visualizations. Defaults to None.
"""
self.daemon = daemon
super().__init__(dataframes_paths, store_path, base_name)
super().__init__(dataframes_paths, store_path, base_name, plot_title)
self.plots_data = self._load_plot_data()
self.expected_fields = []
for graph in self.plots_data.values():
Expand Down Expand Up @@ -383,6 +391,7 @@ def plot(self):
This method creates and saves plots for each statistic field.
"""
p_title = self.plot_title.replace('<<TAB>>', ' ')
for element in self.plots_data.values():
columns = element['columns']
title = element['title']
Expand All @@ -391,7 +400,7 @@ def plot(self):
_, ax = plt.subplots()
for column, color in zip(columns, colors):
self._basic_plot(ax, self.dataframe[column], label=column, color=color)
self._save_custom_plot(ax, title, title)
self._save_custom_plot(ax, title, p_title)


class LogcollectorStatisticsVisualizer(DaemonStatisticsVisualizer):
Expand All @@ -402,19 +411,21 @@ class LogcollectorStatisticsVisualizer(DaemonStatisticsVisualizer):
dataframe (pandas.Dataframe): dataframe containing the info from all the CSVs.
store_path (str): path to store the CSV images. Defaults to the temp directory.
base_name (str, optional): base name used to store the images.
plot_title (str, optional): Title for the generated plots.
general_fields (list): List of general fields for logcollector statistics.
"""
general_fields = ['Location', 'Target']

def __init__(self, dataframes_paths, store_path=gettempdir(), base_name=None):
def __init__(self, dataframes_paths, store_path=gettempdir(), base_name=None, plot_title=None):
"""Initialize the LogcollectorStatisticsVisualizer.
Args:
dataframes (list): List of dataframes containing logcollector statistics data.
store_path (str, optional): Path to store visualizations. Defaults to system temp directory.
base_name (str, optional): Base name for saved visualizations. Defaults to None.
plot_title (str, optional): Title for the generated plots.
"""
super().__init__(dataframes_paths, 'logcollector', store_path, base_name)
super().__init__(dataframes_paths, 'logcollector', store_path, base_name, plot_title)

def _get_expected_fields(self):
"""Get the list of expected fields for logcollector statistics.
Expand All @@ -437,6 +448,7 @@ def plot(self):
This method creates and saves plots for each logcollector target.
"""
p_title = self.plot_title.replace('<<TAB>>', ' ')
for element in self.plots_data.values():
_, ax = plt.subplots()
targets = self._get_logcollector_location()
Expand All @@ -445,7 +457,7 @@ def plot(self):
self._basic_plot(ax, self.dataframe[self.dataframe.Location == target][element['columns']],
label=target, color=color)

self._save_custom_plot(ax, element['title'], element['title'])
self._save_custom_plot(ax, element['title'], p_title)


class ClusterStatisticsVisualizer(DataVisualizer):
Expand All @@ -456,19 +468,21 @@ class ClusterStatisticsVisualizer(DataVisualizer):
dataframe (pandas.Dataframe): dataframe containing the info from all the CSVs.
store_path (str): path to store the CSV images. Defaults to the temp directory.
base_name (str, optional): base name used to store the images.
plot_title (str, optional): Title for the generated plots.
expected_cluster_fields (list): List of expected fields for cluster statistics.
"""
expected_cluster_fields = ['node_name', 'activity', 'time_spent(s)']

def __init__(self, dataframes_paths, store_path=gettempdir(), base_name=None):
def __init__(self, dataframes_paths, store_path=gettempdir(), base_name=None, plot_title=None):
"""Initialize the ClusterStatisticsVisualizer.
Args:
dataframes_paths (list): List of paths to dataframes containing cluster statistics data.
store_path (str, optional): Path to store visualizations. Defaults to system temp directory.
plot_title (str, optional): Title for the generated plots.
base_name (str, optional): Base name for saved visualizations. Defaults to None.
"""
super().__init__(dataframes_paths, store_path, base_name)
super().__init__(dataframes_paths, store_path, base_name, plot_title)
self._validate_dataframe()

def _get_expected_fields(self) -> list:
Expand All @@ -485,7 +499,8 @@ def plot(self):
This method creates and saves plots for each cluster activity.
"""
elements = list(self.dataframe['activity'].unique())

self.base_name = element.replace(' ', '_').lower()
p_title = self.plot_title.replace('<<TAB>>', ' ')
for element in elements:
_, ax = plt.subplots()
nodes = self.dataframe[self.dataframe.activity == element]['node_name'].unique()
Expand All @@ -494,7 +509,7 @@ def plot(self):
for node, color in zip(nodes, self._color_palette(len(nodes))):
self._basic_plot(ax=ax, dataframe=current_df[current_df.node_name == node]['time_spent(s)'],
label=node, color=color)
self._save_custom_plot(ax, 'time_spent(s)', element.replace(' ', '_').lower(), disable_x_labels=True,
self._save_custom_plot(ax, 'time_spent(s)', p_title, disable_x_labels=True,
statistics=DataVisualizer._get_statistics(
current_df['time_spent(s)'], calculate_mean=True, calculate_median=True))

Expand All @@ -506,19 +521,21 @@ class IndexerAlerts(DataVisualizer):
dataframes_paths (list): paths of the CSVs.
dataframe (pandas.Dataframe): dataframe containing the info from all the CSVs.
store_path (str): path to store the CSV images. Defaults to the temp directory.
plot_title (str, optional): Title for the generated plots.
expected_fields (list): List of expected fields for indexer alerts.
"""
expected_fields = ['Total alerts']

def __init__(self, dataframes_paths, store_path=gettempdir(), base_name=None):
def __init__(self, dataframes_paths, store_path=gettempdir(), base_name=None, plot_title=None):
"""Initialize the IndexerAlerts visualizer.
Args:
dataframes_paths (list): List of paths to dataframes containing indexer alerts data.
store_path (str, optional): Path to store visualizations. Defaults to system temp directory.
plot_title (str, optional): Title for the generated plots.
base_name (str, optional): Base name for saved visualizations. Defaults to None.
"""
super().__init__(dataframes_paths, store_path, base_name)
super().__init__(dataframes_paths, store_path, base_name, plot_title)
self._validate_dataframe()

def _get_expected_fields(self):
Expand All @@ -543,23 +560,25 @@ def _plot_agregated_alerts(self):
This method creates and saves a plot for the aggregated alerts.
"""
p_title = self.plot_title.replace('<<TAB>>', ' ')
_, ax = plt.subplots()
self.dataframe['Difference'] = self.dataframe['Total alerts'].diff()
self.dataframe['Difference'] = self.dataframe['Difference'] / self._calculate_timestamp_interval()

self._basic_plot(ax=ax, dataframe=self.dataframe['Difference'], label='Alerts per timestamp',
color=self._color_palette(1)[0])
self._save_custom_plot(ax, 'Different alerts', 'Difference alerts')
self._save_custom_plot(ax, 'Different alerts', p_title)

def _plot_plain_alerts(self):
"""Plot the total alerts.
This method creates and saves a plot for the total alerts.
"""
p_title = self.plot_title.replace('<<TAB>>', ' ')
_, ax = plt.subplots()
self._basic_plot(ax=ax, dataframe=self.dataframe['Total alerts'], label='Total alerts',
color=self._color_palette(1)[0])
self._save_custom_plot(ax, 'Total alerts', 'Total alerts')
self._save_custom_plot(ax, 'Total alerts', p_title)

def plot(self):
"""Plot the indexer alerts data.
Expand All @@ -577,19 +596,21 @@ class IndexerVulnerabilities(DataVisualizer):
dataframes_paths (list): paths of the CSVs.
dataframe (pandas.Dataframe): dataframe containing the info from all the CSVs.
store_path (str): path to store the CSV images. Defaults to the temp directory.
plot_title (str, optional): Title for the generated plots.
expected_fields (list): List of expected fields for indexer vulnerabilities.
"""
expected_fields = ['Total vulnerabilities']

def __init__(self, dataframes_paths, store_path=gettempdir(), base_name=None):
def __init__(self, dataframes_paths, store_path=gettempdir(), base_name=None, plot_title=None):
"""Initialize the IndexerVulnerabilities visualizer.
Args:
dataframes_paths (list): List of paths to dataframes containing indexer vulnerabilities data.
store_path (str, optional): Path to store visualizations. Defaults to system temp directory.
plot_title (str, optional): Title for the generated plots.
base_name (str, optional): Base name for saved visualizations. Defaults to None.
"""
super().__init__(dataframes_paths, store_path, base_name)
super().__init__(dataframes_paths, store_path, base_name, plot_title)
self._validate_dataframe()

def _get_expected_fields(self):
Expand All @@ -605,7 +626,8 @@ def plot(self):
This method creates and saves a plot for the total vulnerabilities.
"""
p_title = self.plot_title.replace('<<TAB>>', ' ')
_, ax = plt.subplots()
self._basic_plot(ax=ax, dataframe=self.dataframe['Total vulnerabilities'], label='Indexed Vulnerabilities',
color=self._color_palette(1)[0])
self._save_custom_plot(ax, 'Total Vulnerabilities', 'Total vulnerabilities')
self._save_custom_plot(ax, 'Total Vulnerabilities', p_title)

0 comments on commit 189bd23

Please sign in to comment.