diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fc4663c36..b04a3f68ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - The `stop_date` argument to the `modal_location_from_dates` and `meaningful_locations_*` functions in FlowClient has been renamed `end_date` [#470](https://github.com/Flowminder/FlowKit/issues/470) - `get_result_by_query_id` now accepts a `poll_interval` argument, which allows polling frequency to be changed - `RadiusOfGyration` now returns a `value` column instead of an `rog` column +- `TotalNetworkObjects` and `AggregateNetworkObjects` now return a `value` column, rather than `statistic_name` ### Changed - All environment variables are now in a single `development_environment` file in the project root, development environment setup has been simplified @@ -46,6 +47,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Fixed - Fixed being unable to add new users or servers when running FlowAuth with a Postgres database [#622](https://github.com/Flowminder/FlowKit/issues/622) - Resetting the cache using `reset_cache` will now reset the state of queries in redis as well [#650](https://github.com/Flowminder/FlowKit/issues/650) +- Fixed `mode` statistic for `AggregateNetworkObjects` [#651](https://github.com/Flowminder/FlowKit/issues/651) ### Removed - Removed `docker-compose-dev.yml`, and docker-compose files in `docs/`, `flowdb/tests/` and `integration_tests/`. diff --git a/flowmachine/flowmachine/features/network/total_network_objects.py b/flowmachine/flowmachine/features/network/total_network_objects.py index ecc3f837c7..012cccf096 100644 --- a/flowmachine/flowmachine/features/network/total_network_objects.py +++ b/flowmachine/flowmachine/features/network/total_network_objects.py @@ -148,7 +148,7 @@ def __init__( @property def column_names(self) -> List[str]: return get_columns_for_level(self.level, self.joined.column_name) + [ - "total", + "value", "datetime", ] @@ -158,7 +158,7 @@ def _make_query(self): get_columns_for_level(self.level, self.joined.column_name) ) sql = """ - SELECT {group_cols}, COUNT(*) as total, + SELECT {group_cols}, COUNT(*) as value, datetime FROM (SELECT DISTINCT {group_cols}, {cols}, datetime FROM (SELECT {group_cols}, {cols}, date_trunc('{total_by}', x.datetime) AS datetime @@ -246,7 +246,7 @@ def __init__(self, *, total_network_objects, statistic="avg", aggregate_by=None) def column_names(self) -> List[str]: return get_columns_for_level( self.total_objs.level, self.total_objs.joined.column_name - ) + [self.statistic, "datetime"] + ) + ["value", "datetime"] def _make_query(self): group_cols = ",".join( @@ -254,16 +254,15 @@ def _make_query(self): self.total_objs.level, self.total_objs.joined.column_name ) ) - sql = """ - SELECT {group_cols}, {stat}(z.total) as {stat}, - date_trunc('{aggregate_by}', z.datetime) as datetime FROM - ({totals}) z - GROUP BY {group_cols}, date_trunc('{aggregate_by}', z.datetime) - ORDER BY {group_cols}, date_trunc('{aggregate_by}', z.datetime) - """.format( - aggregate_by=self.aggregate_by, - stat=self.statistic, - totals=self.total_objs.get_query(), - group_cols=group_cols, - ) + if self.statistic == "mode": + av_call = f"pg_catalog.mode() WITHIN GROUP(ORDER BY z.value)" + else: + av_call = f"{self.statistic}(z.value)" + sql = f""" + SELECT {group_cols}, {av_call} as value, + date_trunc('{self.aggregate_by}', z.datetime) as datetime FROM + ({self.total_objs.get_query()}) z + GROUP BY {group_cols}, date_trunc('{self.aggregate_by}', z.datetime) + ORDER BY {group_cols}, date_trunc('{self.aggregate_by}', z.datetime) + """ return sql diff --git a/flowmachine/tests/test_total_network_objects.py b/flowmachine/tests/test_total_network_objects.py index 19fc9275c2..9d7013774a 100644 --- a/flowmachine/tests/test_total_network_objects.py +++ b/flowmachine/tests/test_total_network_objects.py @@ -14,6 +14,39 @@ from flowmachine.features import TotalNetworkObjects, AggregateNetworkObjects +@pytest.mark.parametrize( + "stat, expected", + [ + ("avg", 30.541666666666668), + ("max", 38), + ("min", 21), + ("median", 31.0), + ("mode", 27), + ("stddev", 4.096437122848253), + ("variance", 16.780797101449277), + ], +) +def test_aggregate_returns_correct_values(stat, expected, get_dataframe): + """ + AggregateNetworkObjects returns correct values. + + """ + instance = network.AggregateNetworkObjects( + total_network_objects=network.TotalNetworkObjects( + start="2016-01-01", stop="2016-12-30", table="calls", total_by="hour" + ), + statistic=stat, + ) + df = get_dataframe(instance) + + # + # This will compare the very first + # value with an independently + # computed value. + # + assert pytest.approx(df.value[0]) == expected + + def test_count_returns_correct_values(get_dataframe): """ TotalNetworkObjects returns correct values. @@ -29,7 +62,7 @@ def test_count_returns_correct_values(get_dataframe): # value with an independently # computed value. # - assert df.total[34] == 31 + assert df.value[34] == 31 @pytest.mark.parametrize( @@ -73,7 +106,7 @@ def test_median_returns_correct_values(get_dataframe): # value with an independently # computed value. # - assert get_dataframe(instance).head(1)["median"][0] == 25 + assert get_dataframe(instance).head(1)["value"][0] == 25 def test_mean_returns_correct_values(get_dataframe): @@ -96,7 +129,7 @@ def test_mean_returns_correct_values(get_dataframe): # value with an independently # computed value. # - assert get_dataframe(instance).head(1)["avg"][0] == pytest.approx(28.7916666666) + assert get_dataframe(instance).head(1)["value"][0] == pytest.approx(28.7916666666) @pytest.mark.parametrize(