Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix mode for AggregateNetworkObjects #663

Merged
merged 4 commits into from
Apr 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- The `stop_date` argument to the `modal_location_from_dates` and `meaningful_locations_*` functions in FlowClient has been renamed `end_date` [#470](https://github.com/Flowminder/FlowKit/issues/470)
- `get_result_by_query_id` now accepts a `poll_interval` argument, which allows polling frequency to be changed
- `RadiusOfGyration` now returns a `value` column instead of an `rog` column
- `TotalNetworkObjects` and `AggregateNetworkObjects` now return a `value` column, rather than `statistic_name`

### Changed
- All environment variables are now in a single `development_environment` file in the project root, development environment setup has been simplified
Expand Down Expand Up @@ -46,6 +47,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
### Fixed
- Fixed being unable to add new users or servers when running FlowAuth with a Postgres database [#622](https://github.com/Flowminder/FlowKit/issues/622)
- Resetting the cache using `reset_cache` will now reset the state of queries in redis as well [#650](https://github.com/Flowminder/FlowKit/issues/650)
- Fixed `mode` statistic for `AggregateNetworkObjects` [#651](https://github.com/Flowminder/FlowKit/issues/651)

### Removed
- Removed `docker-compose-dev.yml`, and docker-compose files in `docs/`, `flowdb/tests/` and `integration_tests/`.
Expand Down
29 changes: 14 additions & 15 deletions flowmachine/flowmachine/features/network/total_network_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def __init__(
@property
def column_names(self) -> List[str]:
return get_columns_for_level(self.level, self.joined.column_name) + [
"total",
"value",
"datetime",
]

Expand All @@ -158,7 +158,7 @@ def _make_query(self):
get_columns_for_level(self.level, self.joined.column_name)
)
sql = """
SELECT {group_cols}, COUNT(*) as total,
SELECT {group_cols}, COUNT(*) as value,
datetime FROM
(SELECT DISTINCT {group_cols}, {cols}, datetime FROM
(SELECT {group_cols}, {cols}, date_trunc('{total_by}', x.datetime) AS datetime
Expand Down Expand Up @@ -246,24 +246,23 @@ def __init__(self, *, total_network_objects, statistic="avg", aggregate_by=None)
def column_names(self) -> List[str]:
return get_columns_for_level(
self.total_objs.level, self.total_objs.joined.column_name
) + [self.statistic, "datetime"]
) + ["value", "datetime"]

def _make_query(self):
group_cols = ",".join(
get_columns_for_level(
self.total_objs.level, self.total_objs.joined.column_name
)
)
sql = """
SELECT {group_cols}, {stat}(z.total) as {stat},
date_trunc('{aggregate_by}', z.datetime) as datetime FROM
({totals}) z
GROUP BY {group_cols}, date_trunc('{aggregate_by}', z.datetime)
ORDER BY {group_cols}, date_trunc('{aggregate_by}', z.datetime)
""".format(
aggregate_by=self.aggregate_by,
stat=self.statistic,
totals=self.total_objs.get_query(),
group_cols=group_cols,
)
if self.statistic == "mode":
av_call = f"pg_catalog.mode() WITHIN GROUP(ORDER BY z.value)"
else:
av_call = f"{self.statistic}(z.value)"
sql = f"""
SELECT {group_cols}, {av_call} as value,
date_trunc('{self.aggregate_by}', z.datetime) as datetime FROM
({self.total_objs.get_query()}) z
GROUP BY {group_cols}, date_trunc('{self.aggregate_by}', z.datetime)
ORDER BY {group_cols}, date_trunc('{self.aggregate_by}', z.datetime)
"""
return sql
39 changes: 36 additions & 3 deletions flowmachine/tests/test_total_network_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,39 @@
from flowmachine.features import TotalNetworkObjects, AggregateNetworkObjects


@pytest.mark.parametrize(
"stat, expected",
[
("avg", 30.541666666666668),
("max", 38),
("min", 21),
("median", 31.0),
("mode", 27),
("stddev", 4.096437122848253),
("variance", 16.780797101449277),
],
)
def test_aggregate_returns_correct_values(stat, expected, get_dataframe):
"""
AggregateNetworkObjects returns correct values.

"""
instance = network.AggregateNetworkObjects(
total_network_objects=network.TotalNetworkObjects(
start="2016-01-01", stop="2016-12-30", table="calls", total_by="hour"
),
statistic=stat,
)
df = get_dataframe(instance)

#
# This will compare the very first
# value with an independently
# computed value.
#
assert pytest.approx(df.value[0]) == expected


def test_count_returns_correct_values(get_dataframe):
"""
TotalNetworkObjects returns correct values.
Expand All @@ -29,7 +62,7 @@ def test_count_returns_correct_values(get_dataframe):
# value with an independently
# computed value.
#
assert df.total[34] == 31
assert df.value[34] == 31


@pytest.mark.parametrize(
Expand Down Expand Up @@ -73,7 +106,7 @@ def test_median_returns_correct_values(get_dataframe):
# value with an independently
# computed value.
#
assert get_dataframe(instance).head(1)["median"][0] == 25
assert get_dataframe(instance).head(1)["value"][0] == 25


def test_mean_returns_correct_values(get_dataframe):
Expand All @@ -96,7 +129,7 @@ def test_mean_returns_correct_values(get_dataframe):
# value with an independently
# computed value.
#
assert get_dataframe(instance).head(1)["avg"][0] == pytest.approx(28.7916666666)
assert get_dataframe(instance).head(1)["value"][0] == pytest.approx(28.7916666666)


@pytest.mark.parametrize(
Expand Down