Skip to content

Commit

Permalink
Merge pull request #1937 from voxel51/feature/quantiles
Browse files Browse the repository at this point in the history
Adding quantiles aggregation
  • Loading branch information
brimoor authored Jul 19, 2022
2 parents 7347ef1 + 68d5bf1 commit 79fe485
Show file tree
Hide file tree
Showing 6 changed files with 345 additions and 3 deletions.
29 changes: 29 additions & 0 deletions docs/source/user_guide/using_aggregations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,35 @@ collection:
print(dataset.mean("predictions.detections.confidence"))
# 0.34994137249820706
.. _aggregations-quantiles:

Quantiles
_________

You can use the
:meth:`quantiles() <fiftyone.core.collections.SampleCollection.quantiles>`
aggregation to compute the quantile(s) of the (non-``None``) values of a field
in a collection:

.. code-block:: python
:linenos:
import fiftyone.zoo as foz
dataset = foz.load_zoo_dataset("quickstart")
# Compute quantiles of the `uniqueness` field
print(dataset.quantiles("uniqueness", [0.25, 0.5, 0.75, 0.9]))
# [0.22027, 0.33771, 0.62554, 0.69488]
# Compute quantiles of detection confidence in the `predictions` field
quantiles = dataset.quantiles(
"predictions.detections.confidence",
[0.25, 0.5, 0.75, 0.9],
)
print(quantiles)
# [0.09231, 0.20251, 0.56273, 0.94354]
.. _aggregations-std:

Standard deviation
Expand Down
1 change: 1 addition & 0 deletions fiftyone/__public__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
Distinct,
HistogramValues,
Mean,
Quantiles,
Std,
Sum,
Values,
Expand Down
166 changes: 166 additions & 0 deletions fiftyone/core/aggregations.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import fiftyone.core.expressions as foe
from fiftyone.core.expressions import VALUE
from fiftyone.core.expressions import ViewExpression as E
from fiftyone.core.expressions import ViewField as F
import fiftyone.core.fields as fof
import fiftyone.core.media as fom
Expand Down Expand Up @@ -1332,6 +1333,171 @@ def to_mongo(self, sample_collection):
return pipeline


class Quantiles(Aggregation):
"""Computes the quantile(s) of the field values of a collection.
``None``-valued fields are ignored.
This aggregation is typically applied to *numeric* field types (or lists of
such types):
- :class:`fiftyone.core.fields.IntField`
- :class:`fiftyone.core.fields.FloatField`
Examples::
import fiftyone as fo
from fiftyone import ViewField as F
dataset = fo.Dataset()
dataset.add_samples(
[
fo.Sample(
filepath="/path/to/image1.png",
numeric_field=1.0,
numeric_list_field=[1, 2, 3],
),
fo.Sample(
filepath="/path/to/image2.png",
numeric_field=4.0,
numeric_list_field=[1, 2],
),
fo.Sample(
filepath="/path/to/image3.png",
numeric_field=None,
numeric_list_field=None,
),
]
)
#
# Compute the quantiles of a numeric field
#
aggregation = fo.Quantiles("numeric_field", [0.1, 0.5, 0.9])
quantiles = dataset.aggregate(aggregation)
print(quantiles) # the quantiles
#
# Compute the quantiles of a numeric list field
#
aggregation = fo.Quantiles("numeric_list_field", [0.1, 0.5, 0.9])
quantiles = dataset.aggregate(aggregation)
print(quantiles) # the quantiles
#
# Compute the mean of a transformation of a numeric field
#
aggregation = fo.Quantiles(2 * (F("numeric_field") + 1), [0.1, 0.5, 0.9])
quantiles = dataset.aggregate(aggregation)
print(quantiles) # the quantiles
Args:
field_or_expr: a field name, ``embedded.field.name``,
:class:`fiftyone.core.expressions.ViewExpression`, or
`MongoDB expression <https://docs.mongodb.com/manual/meta/aggregation-quick-reference/#aggregation-expressions>`_
defining the field or expression to aggregate
quantiles: the quantile or iterable of quantiles to compute. Each
quantile must be a numeric value in ``[0, 1]``
expr (None): a :class:`fiftyone.core.expressions.ViewExpression` or
`MongoDB expression <https://docs.mongodb.com/manual/meta/aggregation-quick-reference/#aggregation-expressions>`_
to apply to ``field_or_expr`` (which must be a field) before
aggregating
safe (False): whether to ignore nan/inf values when dealing with
floating point values
"""

def __init__(self, field_or_expr, quantiles, expr=None, safe=False):
quantiles_list, is_scalar = self._parse_quantiles(quantiles)

super().__init__(field_or_expr, expr=expr, safe=safe)
self._quantiles = quantiles

self._quantiles_list = quantiles_list
self._is_scalar = is_scalar

def _kwargs(self):
return [
["field_or_expr", self._field_name],
["quantiles", self._quantiles],
["expr", self._expr],
["safe", self._safe],
]

def default_result(self):
"""Returns the default result for this aggregation.
Returns:
``None`` or ``[None, None, None]``
"""
if self._is_scalar:
return None

return [None] * len(self._quantiles_list)

def parse_result(self, d):
"""Parses the output of :meth:`to_mongo`.
Args:
d: the result dict
Returns:
the quantile or list of quantiles
"""
if self._is_scalar:
return d["quantiles"][0]

return d["quantiles"]

def to_mongo(self, sample_collection):
path, pipeline, _, id_to_str, _ = _parse_field_and_expr(
sample_collection,
self._field_name,
expr=self._expr,
safe=self._safe,
)

if id_to_str:
value = {"$toString": "$" + path}
else:
value = "$" + path

# Compute quantile
# Note that we don't need to explicitly handle empty `values` here
# because the `group` stage only outputs a document if there's at least
# one value to compute on!
array = F("values").sort(numeric=True)
idx = ((F() * array.length()).ceil() - 1).max(0)
quantile_expr = array.let_in(E(self._quantiles_list).map(array[idx]))

pipeline.extend(
[
{"$match": {"$expr": {"$isNumber": value}}},
{"$group": {"_id": None, "values": {"$push": value}}},
{"$project": {"quantiles": quantile_expr.to_mongo()}},
]
)

return pipeline

def _parse_quantiles(self, quantiles):
is_scalar = not etau.is_container(quantiles)

if is_scalar:
quantiles = [quantiles]
else:
quantiles = list(quantiles)

if any(not etau.is_numeric(q) or q < 0 or q > 1 for q in quantiles):
raise ValueError(
"Quantiles must be numbers in [0, 1]; found %s" % quantiles
)

return quantiles, is_scalar


class Std(Aggregation):
"""Computes the standard deviation of the field values of a collection.
Expand Down
81 changes: 81 additions & 0 deletions fiftyone/core/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -5786,6 +5786,87 @@ def mean(self, field_or_expr, expr=None, safe=False):
)
return self._make_and_aggregate(make, field_or_expr)

@aggregation
def quantiles(self, field_or_expr, quantiles, expr=None, safe=False):
"""Computes the quantile(s) of the field values of a collection.
``None``-valued fields are ignored.
This aggregation is typically applied to *numeric* field types (or
lists of such types):
- :class:`fiftyone.core.fields.IntField`
- :class:`fiftyone.core.fields.FloatField`
Examples::
import fiftyone as fo
from fiftyone import ViewField as F
dataset = fo.Dataset()
dataset.add_samples(
[
fo.Sample(
filepath="/path/to/image1.png",
numeric_field=1.0,
numeric_list_field=[1, 2, 3],
),
fo.Sample(
filepath="/path/to/image2.png",
numeric_field=4.0,
numeric_list_field=[1, 2],
),
fo.Sample(
filepath="/path/to/image3.png",
numeric_field=None,
numeric_list_field=None,
),
]
)
#
# Compute the quantiles of a numeric field
#
quantiles = dataset.quantiles("numeric_field", [0.1, 0.5, 0.9])
print(quantiles) # the quantiles
#
# Compute the quantiles of a numeric list field
#
quantiles = dataset.quantiles("numeric_list_field", [0.1, 0.5, 0.9])
print(quantiles) # the quantiles
#
# Compute the mean of a transformation of a numeric field
#
quantiles = dataset.quantiles(2 * (F("numeric_field") + 1), [0.1, 0.5, 0.9])
print(quantiles) # the quantiles
Args:
field_or_expr: a field name, ``embedded.field.name``,
:class:`fiftyone.core.expressions.ViewExpression`, or
`MongoDB expression <https://docs.mongodb.com/manual/meta/aggregation-quick-reference/#aggregation-expressions>`_
defining the field or expression to aggregate
quantiles: the quantile or iterable of quantiles to compute. Each
quantile must be a numeric value in ``[0, 1]``
expr (None): a :class:`fiftyone.core.expressions.ViewExpression` or
`MongoDB expression <https://docs.mongodb.com/manual/meta/aggregation-quick-reference/#aggregation-expressions>`_
to apply to ``field_or_expr`` (which must be a field) before
aggregating
safe (False): whether to ignore nan/inf values when dealing with
floating point values
Returns:
the quantile or list of quantiles
"""
make = lambda field_or_expr: foa.Quantiles(
field_or_expr, quantiles, expr=expr, safe=safe
)
return self._make_and_aggregate(make, field_or_expr)

@aggregation
def std(self, field_or_expr, expr=None, safe=False, sample=False):
"""Computes the standard deviation of the field values of the
Expand Down
16 changes: 13 additions & 3 deletions fiftyone/core/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2584,7 +2584,7 @@ def reverse(self):
"""
return ViewExpression({"$reverseArray": self})

def sort(self, key=None, reverse=False):
def sort(self, key=None, numeric=False, reverse=False):
"""Sorts this expression, which must resolve to an array.
If no ``key`` is provided, this array must contain elements whose
Expand Down Expand Up @@ -2628,7 +2628,7 @@ def sort(self, key=None, reverse=False):
view = dataset.set_field(
"predictions.detections",
F("detections").sort(key="confidence", reverse=True)
F("detections").sort(key="confidence", numeric=True, reverse=True)
)
sample = view.first()
Expand All @@ -2637,13 +2637,23 @@ def sort(self, key=None, reverse=False):
Args:
key (None): an optional field or ``embedded.field.name`` to sort by
numeric (False): whether the array contains numeric values. By
default, the values will be sorted alphabetically by their
string representations
reverse (False): whether to sort in descending order
Returns:
a :class:`ViewExpression`
"""
if key is not None:
comp = "(a, b) => a.{key} - b.{key}".format(key=key)
if numeric:
comp = "(a, b) => a.{key} - b.{key}"
else:
comp = "(a, b) => ('' + a.{key}).localeCompare(b.{key})"

comp = comp.format(key=key)
elif numeric:
comp = "(a, b) => a - b"
else:
comp = ""

Expand Down
Loading

0 comments on commit 79fe485

Please sign in to comment.