From fd2d570e15d5621651c2b2e46af77647b19da739 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Oct 2017 03:32:18 +0100 Subject: [PATCH 01/40] Added initial structure for stats elements, operations and plots --- holoviews/element/__init__.py | 1 + holoviews/element/comparison.py | 15 -- holoviews/element/stats.py | 50 ++++ holoviews/interface/__init__.py | 12 - holoviews/interface/pandas.py | 155 ------------ holoviews/interface/seaborn.py | 140 ----------- holoviews/operation/stats.py | 122 ++++++++++ holoviews/plotting/bokeh/__init__.py | 9 +- holoviews/plotting/bokeh/element.py | 1 + holoviews/plotting/bokeh/stats.py | 63 +++++ holoviews/plotting/mpl/__init__.py | 9 +- holoviews/plotting/mpl/pandas.py | 152 ------------ holoviews/plotting/mpl/seaborn.py | 330 -------------------------- holoviews/plotting/mpl/stats.py | 49 ++++ holoviews/plotting/plotly/__init__.py | 1 - 15 files changed, 299 insertions(+), 810 deletions(-) create mode 100644 holoviews/element/stats.py delete mode 100644 holoviews/interface/pandas.py delete mode 100644 holoviews/interface/seaborn.py create mode 100644 holoviews/operation/stats.py create mode 100644 holoviews/plotting/bokeh/stats.py delete mode 100644 holoviews/plotting/mpl/pandas.py delete mode 100644 holoviews/plotting/mpl/seaborn.py create mode 100644 holoviews/plotting/mpl/stats.py diff --git a/holoviews/element/__init__.py b/holoviews/element/__init__.py index 7630c88b2d..d07f5a4281 100644 --- a/holoviews/element/__init__.py +++ b/holoviews/element/__init__.py @@ -6,6 +6,7 @@ from .graphs import * # noqa (API import) from .path import * # noqa (API import) from .raster import * # noqa (API import) +from .stats import * # noqa (API import) from .tabular import * # noqa (API import) diff --git a/holoviews/element/comparison.py b/holoviews/element/comparison.py index f090b1d73a..4ed798ed40 100644 --- a/holoviews/element/comparison.py +++ b/holoviews/element/comparison.py @@ -28,10 +28,6 @@ GridSpace, DynamicMap, GridMatrix, OrderedDict) from ..core.options import Options, Cycle from ..core.util import pd -from ..interface.pandas import DFrame as PandasDFrame -from ..interface.pandas import DataFrameView -from ..interface.seaborn import DFrame, Bivariate, Distribution, \ - Regression, TimeSeries class ComparisonInterface(object): @@ -181,16 +177,10 @@ def register(cls): cls.equality_type_funcs[Table] = cls.compare_tables cls.equality_type_funcs[Points] = cls.compare_points - # Pandas DFrame objects - cls.equality_type_funcs[DataFrameView] = cls.compare_dframe - cls.equality_type_funcs[PandasDFrame] = cls.compare_dframe - cls.equality_type_funcs[DFrame] = cls.compare_dframe - # Seaborn Views cls.equality_type_funcs[Bivariate] = cls.compare_bivariate cls.equality_type_funcs[Distribution] = cls.compare_distribution cls.equality_type_funcs[Regression] = cls.compare_regression - cls.equality_type_funcs[TimeSeries] = cls.compare_timeseries # NdMappings cls.equality_type_funcs[NdLayout] = cls.compare_gridlayout @@ -678,11 +668,6 @@ def compare_dframe(cls, el1, el2, msg='DFrame'): def compare_distribution(cls, el1, el2, msg='Distribution'): cls.compare_dataset(el1, el2, msg) - @classmethod - def compare_timeseries(cls, el1, el2, msg='TimeSeries'): - cls.compare_dimensioned(el1, el2) - cls.compare_arrays(el1.data, el2.data, msg) - @classmethod def compare_bivariate(cls, el1, el2, msg='Bivariate'): cls.compare_dataset(el1, el2, msg) diff --git a/holoviews/element/stats.py b/holoviews/element/stats.py new file mode 100644 index 0000000000..2b55a2a54a --- /dev/null +++ b/holoviews/element/stats.py @@ -0,0 +1,50 @@ +import param + +from ..core.dimension import Dimension +from .chart import Chart, Scatter + + +class Bivariate(Chart): + """ + Bivariate Views are containers for two dimensional data, + which is to be visualized as a kernel density estimate. The + data should be supplied as an Nx2 array, containing the x- + and y-data. + """ + + kdims = param.List(default=[Dimension('x'), Dimension('y')]) + + vdims = param.List(default=[], bounds=(0,1)) + + group = param.String(default="Bivariate", constant=True) + + + +class Distribution(Chart): + """ + Distribution Views provide a container for data to be + visualized as a one-dimensional distribution. The data should + be supplied as a simple one-dimensional array or + list. Internally it uses Seaborn to make all the conversions. + """ + + kdims = param.List(default=[]) + + group = param.String(default='Distribution', constant=True) + + vdims = param.List(default=[Dimension('Value')]) + + _auto_indexable_1d = False + + +class Regression(Scatter): + """ + Regression is identical to a Scatter plot but is visualized + using the Seaborn regplot interface. This allows it to + implement linear regressions, confidence intervals and a lot + more. + """ + + group = param.String(default='Regression', constant=True) + + diff --git a/holoviews/interface/__init__.py b/holoviews/interface/__init__.py index c74fbc2332..8de1316596 100644 --- a/holoviews/interface/__init__.py +++ b/holoviews/interface/__init__.py @@ -1,17 +1,5 @@ from ..core import Dimensioned, AttrTree -try: - import pandas - from .pandas import DFrame # noqa (API import) -except: - pandas = None - -try: - import seaborn - from .seaborn import * # noqa (API import) -except: - seaborn = None - from .collector import * # noqa (API import) def public(obj): diff --git a/holoviews/interface/pandas.py b/holoviews/interface/pandas.py deleted file mode 100644 index fbea155476..0000000000 --- a/holoviews/interface/pandas.py +++ /dev/null @@ -1,155 +0,0 @@ -""" -The interface subpackage provides View and Plot types to wrap external -objects with. Currently only a Pandas compatibility wrapper is -provided, which allows integrating Pandas DataFrames within the -HoloViews compositioning and animation framework. Additionally, it -provides methods to apply operations to the underlying data and -convert it to standard HoloViews View types. -""" - -from __future__ import absolute_import - -import numpy as np - -try: - import pandas as pd - from ..core.data import PandasInterface -except: - pd = None - PandasInterface = None - -import param - -from ..core import ViewableElement, NdMapping, Dataset, NdOverlay,\ - NdLayout, GridSpace, HoloMap - - -class DataFrameView(Dataset): - """ - DataFrameView provides a convenient compatibility wrapper around - Pandas DataFrames. It provides several core functions: - - * Allows integrating several Pandas plot types with the - HoloViews plotting system (includes plot, boxplot, histogram - and scatter_matrix). - - * Provides several convenient wrapper methods to apply - DataFrame methods and slice data. This includes: - - 1) The apply method, which takes the DataFrame method to - be applied as the first argument and passes any - supplied args or kwargs along. - - 2) The select and __getitem__ method which allow for - selecting and slicing the data using NdMapping. - """ - - plot_type = param.ObjectSelector(default=None, - objects=['plot', 'boxplot', - 'hist', 'scatter_matrix', - 'autocorrelation_plot', - None], - doc="""Selects which Pandas plot type to use, - when visualizing the ViewableElement.""") - - x = param.String(doc="""Dimension to visualize along the x-axis.""") - - x2 = param.String(doc="""Dimension to visualize along a second - dependent axis.""") - - y = param.String(doc="""Dimension to visualize along the y-axis.""") - - group = param.String(default='DFrame', constant=True) - - vdims = param.List(doc="DataFrameView has no value dimension.") - - def __init__(self, data, dimensions={}, kdims=None, clone_override=False, - index=None, columns=None, dtype=None, copy=True, **params): - if pd is None: - raise Exception("Pandas is required for the Pandas interface.") - if not isinstance(data, pd.DataFrame): - data = pd.DataFrame(data, index=index, columns=columns, dtype=dtype) - elif copy: - data = pd.DataFrame(data, copy=True) - if clone_override: - dim_dict = {d.name: d for d in kdims} - dims = [dim_dict.get(k, k) for k in data.columns] - elif kdims: - if len(kdims) != len(data.columns): - raise ValueError("Supplied key dimensions do not match data columns") - dims = kdims - else: - dims = list(data.columns) - for name, dim in dimensions.items(): - if name in data.columns: - dims[list(data.columns).index(name)] = dim - - ViewableElement.__init__(self, data, kdims=dims, **params) - self.interface = PandasInterface - self.data.columns = self.dimensions('key', True) - - - def groupby(self, dimensions, container_type=NdMapping): - invalid_dims = [d for d in dimensions if d not in self.dimensions()] - if invalid_dims: - raise Exception('Following dimensions could not be found %s.' - % invalid_dims) - - index_dims = [self.get_dimension(d) for d in dimensions] - mapping_data = [] - for k, v in self.data.groupby([self.get_dimension(d).name for d in dimensions]): - data = v.drop(dimensions, axis=1) - mapping_data.append((k, self.clone(data, kdims=[self.get_dimension(d) - for d in data.columns]))) - return container_type(mapping_data, kdims=index_dims) - - - def apply(self, name, *args, **kwargs): - """ - Applies the Pandas dframe method corresponding to the supplied - name with the supplied args and kwargs. - """ - return self.clone(getattr(self.data, name)(*args, **kwargs), - clone_override=True) - - def overlay(self, dimensions): - return self.groupby(dimensions, NdOverlay) - - - def layout(self, dimensions=[], cols=4): - return self.groupby(dimensions, NdLayout).cols(4) - - - def grid(self, dimensions): - """ - Splits the supplied the dimensions out into a GridSpace. - """ - if len(dimensions) > 2: - raise Exception('Grids hold a maximum of two dimensions.') - return self.groupby(dimensions, GridSpace) - - - def holomap(self, kdims=[]): - """ - Splits the supplied dimensions out into a HoloMap. - """ - return self.groupby(kdims, HoloMap) - - -def is_type(df, baseType): - test = [issubclass(np.dtype(d).type, baseType) for d in df.dtypes] - return pd.DataFrame(data=test, index=df.columns, columns=["numeric"]) - - -def is_number(df): - try: - return is_type(df, np.number) - except: - return False - - -class DFrame(DataFrameView): - """ - DFrame is a specialized Dataset type useful as an interface for - pandas plots. - """ diff --git a/holoviews/interface/seaborn.py b/holoviews/interface/seaborn.py deleted file mode 100644 index 7d86761f2a..0000000000 --- a/holoviews/interface/seaborn.py +++ /dev/null @@ -1,140 +0,0 @@ -""" -The HoloViews Seaborn interface wraps around a wide range -of Seaborn plot types including time series, kernel density -estimates, distributions and regression plots. -""" - -from __future__ import absolute_import - -import numpy as np - -import param - -from ..core import Dimension, NdMapping, Element2D -from ..element import Chart, Scatter -from .pandas import DFrame as PandasDFrame - - -class TimeSeries(Element2D): - """ - TimeSeries is a container for any set of curves, which the - Seaborn interface combines into a confidence interval, error - bar or overlaid plot. - - The curves should be supplied as an NxM dimensional array, - x-values may also be supplied and must be of length N or M. - - Alternatively a UniformNdMapping or NdOverlay of Curve objects may be - supplied. - """ - - kdims = param.List(default=[Dimension('x'), Dimension('n')], - bounds=(2, 2)) - - group = param.String(default='TimeSeries', constant=True) - - vdims = param.List(default=[Dimension('z')], - bounds=(1, 1)) - - def __init__(self, data, xdata=None, **params): - if isinstance(data, NdMapping): - self.xdata = data.values()[0].data[:, 0] - params = dict(data.values()[0].get_param_values(onlychanged=True), **params) - data = np.array([dv.data[:, 1] for dv in data]) - else: - self.xdata = np.array(range(len(data[0, :]))) if xdata is None\ - else xdata - super(TimeSeries, self).__init__(data, **params) - - - def dimension_values(self, dimension): - dim_idx = self.get_dimension_index(dimension) - if dim_idx == 0: - return self.xdata - elif dim_idx == 1: - return self.data.flatten() - elif dim_idx == 2: - return range(self.data.shape[1]) - else: - return super(TimeSeries, self).dimension_values(dimension) - - - def sample(self, samples=[], **sample_values): - raise NotImplementedError('Cannot sample a TimeSeries.') - - - def reduce(self, dimensions=[], function=None, **reduce_map): - raise NotImplementedError('Reduction of TimeSeries not ' - 'implemented.') - - - -class Bivariate(Chart): - """ - Bivariate Views are containers for two dimensional data, - which is to be visualized as a kernel density estimate. The - data should be supplied as an Nx2 array, containing the x- - and y-data. - """ - - kdims = param.List(default=[Dimension('x'), Dimension('y')]) - - vdims = param.List(default=[], bounds=(0,1)) - - group = param.String(default="Bivariate", constant=True) - - - -class Distribution(Chart): - """ - Distribution Views provide a container for data to be - visualized as a one-dimensional distribution. The data should - be supplied as a simple one-dimensional array or - list. Internally it uses Seaborn to make all the conversions. - """ - - kdims = param.List(default=[]) - - group = param.String(default='Distribution', constant=True) - - vdims = param.List(default=[Dimension('Value')]) - - _auto_indexable_1d = False - - -class Regression(Scatter): - """ - Regression is identical to a Scatter plot but is visualized - using the Seaborn regplot interface. This allows it to - implement linear regressions, confidence intervals and a lot - more. - """ - - group = param.String(default='Regression', constant=True) - - -class DFrame(PandasDFrame): - """ - The SNSFrame is largely the same as a DFrame but can only be - visualized via Seaborn plotting functions. Since most Seaborn - plots are two dimensional, the x and y dimensions can be set - directly on this class to visualize a particular relationship - in a multi-dimensional Pandas dframe. - """ - - plot_type = param.ObjectSelector(default=None, - objects=['interact', 'regplot', - 'lmplot', 'corrplot', - 'plot', 'boxplot', - 'hist', 'scatter_matrix', - 'autocorrelation_plot', - 'pairgrid', 'facetgrid', - 'pairplot', 'violinplot', - 'factorplot', - None], - doc="""Selects which Pandas or Seaborn plot - type to use, when visualizing the plot.""") - - -__all__ = ['DFrame', 'Bivariate', 'Distribution', - 'TimeSeries', 'Regression'] diff --git a/holoviews/operation/stats.py b/holoviews/operation/stats.py new file mode 100644 index 0000000000..36e92d3e51 --- /dev/null +++ b/holoviews/operation/stats.py @@ -0,0 +1,122 @@ +import param +import numpy as np + +from ..core import Dimension, Dataset, NdOverlay +from ..core.dimension import Dimension +from ..core.operation import Operation +from ..core.util import basestring, find_minmax, cartesian_product +from ..element import Curve, Area, Image + +from .element import contours + + +class univariate_kde(Operation): + + bw_method = param.ObjectSelector(default='scott', objects=['scott', 'silverman'], doc=""" + Method of automatically determining KDE bandwidth""") + + bandwidth = param.Number(default=None, doc=""" + Allows supplying explicit bandwidth value rather than relying on scott or silverman method.""") + + bin_range = param.NumericTuple(default=None, length=2, doc=""" + Specifies the range within which to compute the KDE.""") + + dimension = param.String(default=None, doc=""" + Along which dimension of the Element to compute the KDE.""") + + filled = param.Boolean(default=False, doc=""" + Controls whether to return filled or unfilled KDE.""") + + n_samples = param.Integer(default=100, doc=""" + Number of samples to compute the KDE over.""") + + groupby = param.ClassSelector(default=None, class_=(basestring, Dimension), doc=""" + Defines a dimension to group the Histogram returning an NdOverlay of Histograms.""") + + def _process(self, element, key=None): + if self.p.groupby: + if not isinstance(element, Dataset): + raise ValueError('Cannot use histogram groupby on non-Dataset Element') + grouped = element.groupby(self.p.groupby, group_type=Dataset, container_type=NdOverlay) + self.p.groupby = None + return grouped.map(self._process, Dataset) + + try: + from scipy import stats + except ImportError: + raise ImportError('%s operation requires SciPy to be installed.' % type(self).__name__) + + if self.p.dimension: + selected_dim = self.p.dimension + else: + selected_dim = [d.name for d in element.vdims + element.kdims][0] + data = element.dimension_values(selected_dim) + bin_range = find_minmax((np.nanmin(data), np.nanmax(data)), (0, -float('inf')))\ + if self.p.bin_range is None else self.p.bin_range + + xs = np.linspace(bin_range[0], bin_range[1], self.p.n_samples) + kde = stats.gaussian_kde(data) + if self.p.bandwidth: + kde.set_bandwidth(self.p.bandwidth) + ys = kde.evaluate(xs) + + vdims = [Dimension('{}_density'.format(selected_dim), + label='{} Density'.format(selected_dim))] + + element_type = Area if self.p.filled else Curve + return Area((xs, ys), kdims=[selected_dim], vdims=vdims) + + + +class bivariate_kde(Operation): + + contours = param.Boolean(default=False) + + bw_method = param.ObjectSelector(default='scott', objects=['scott', 'silverman'], doc=""" + Method of automatically determining KDE bandwidth""") + + bandwidth = param.Number(default=None, doc=""" + Allows supplying explicit bandwidth value rather than relying on scott or silverman method.""") + + bin_range = param.NumericTuple(default=None, length=2, doc=""" + Specifies the range within which to compute the KDE.""") + + filled = param.Boolean(default=False, doc=""" + Controls whether to return filled or unfilled contours.""") + + n_samples = param.Integer(default=100, doc=""" + Number of samples to compute the KDE over.""") + + x_range = param.NumericTuple(default=None, length=2, doc=""" + The x_range as a tuple of min and max x-value. Auto-ranges + if set to None.""") + + y_range = param.NumericTuple(default=None, length=2, doc=""" + The x_range as a tuple of min and max y-value. Auto-ranges + if set to None.""") + + def _process(self, element, key=None): + try: + from scipy import stats + except ImportError: + raise ImportError('%s operation requires SciPy to be installed.' % type(self).__name__) + + data = element.array([0, 1]).T + bin_range = find_minmax((np.nanmin(data), np.nanmax(data)), (0, -float('inf')))\ + if self.p.bin_range is None else self.p.bin_range + + xmin, xmax = self.p.x_range or element.range(0) + ymin, ymax = self.p.y_range or element.range(1) + kde = stats.gaussian_kde(data) + if self.p.bandwidth: + kde.set_bandwidth(self.p.bandwidth) + xs = np.linspace(xmin, xmax, self.p.n_samples) + ys = np.linspace(ymin, ymax, self.p.n_samples) + xx, yy = cartesian_product([xs, ys], False) + positions = np.vstack([xx.ravel(), yy.ravel()]) + f = np.reshape(kde(positions).T, xx.shape) + + img = Image((xs, ys, f.T), kdims=element.dimensions()[:2], vdims=['Density']) + if self.p.contours: + return contours(img, filled=self.p.filled, ) + return img diff --git a/holoviews/plotting/bokeh/__init__.py b/holoviews/plotting/bokeh/__init__.py index 35d520fc0a..bb1e738377 100644 --- a/holoviews/plotting/bokeh/__init__.py +++ b/holoviews/plotting/bokeh/__init__.py @@ -13,7 +13,7 @@ Box, Bounds, Ellipse, Polygons, BoxWhisker, Arrow, ErrorBars, Text, HLine, VLine, Spline, Spikes, Table, ItemTable, Area, HSV, QuadMesh, VectorField, - Graph, Nodes, EdgePaths) + Graph, Nodes, EdgePaths, Distribution, Bivariate) from ...core.options import Options, Cycle, Palette from ...core.util import VersionError @@ -38,6 +38,7 @@ from .plot import GridPlot, LayoutPlot, AdjointLayoutPlot from .raster import RasterPlot, RGBPlot, HeatMapPlot, HSVPlot, QuadMeshPlot from .renderer import BokehRenderer +from .stats import DistributionPlot, BivariatePlot from .tabular import TablePlot from .util import bokeh_version @@ -99,7 +100,11 @@ # Tabular Table: TablePlot, - ItemTable: TablePlot} + ItemTable: TablePlot, + + # Statistics + Distribution: DistributionPlot, + Bivariate: BivariatePlot} if DFrame is not None: associations[DFrame] = TablePlot diff --git a/holoviews/plotting/bokeh/element.py b/holoviews/plotting/bokeh/element.py index 3bfadc135c..c55e8a4943 100644 --- a/holoviews/plotting/bokeh/element.py +++ b/holoviews/plotting/bokeh/element.py @@ -755,6 +755,7 @@ def initialize_plot(self, ranges=None, plot=None, plots=None, source=None): self.current_ranges = ranges self.current_frame = element self.current_key = key + style_element = element.last if self.batched else element ranges = util.match_spec(style_element, ranges) diff --git a/holoviews/plotting/bokeh/stats.py b/holoviews/plotting/bokeh/stats.py new file mode 100644 index 0000000000..bfbf76e466 --- /dev/null +++ b/holoviews/plotting/bokeh/stats.py @@ -0,0 +1,63 @@ +from functools import partial + +import param +import numpy as np + +from bokeh.models.ranges import DataRange1d + +from ...element import Polygons, Contours, Distribution, Bivariate +from ...operation.stats import univariate_kde, bivariate_kde + +from .chart import AreaPlot +from .path import PolygonPlot + + +class DistributionPlot(AreaPlot): + """ + DistributionPlot visualizes a distribution of values as a KDE. + """ + + bw = param.Number(default=None) + + def __init__(self, element, plot=None, **params): + element = element.map(self._convert_element, Distribution) + super(DistributionPlot, self).__init__(element, plot, **params) + + def _convert_element(self, element): + plot_opts = self.lookup_options(element, 'plot').options + style_opts = self.lookup_options(element, 'style').kwargs + return univariate_kde(element, bandwidth=plot_opts.get('bw')).opts(plot=plot_opts, style=style_opts) + + + +class BivariatePlot(PolygonPlot): + """ + Bivariate plot visualizes two-dimensional kernel density + estimates. Additionally, by enabling the joint option, the + marginals distributions can be plotted alongside each axis (does + not animate or compose). + """ + + bw = param.Number(default=None) + + filled = param.Boolean(default=False) + + def __init__(self, element, plot=None, **params): + element = element.map(self._convert_element, Bivariate) + super(BivariatePlot, self).__init__(element, plot, batched=True, **params) + + def _convert_element(self, element): + plot_opts = self.lookup_options(element, 'plot').options + style_opts = self.lookup_options(element, 'style').kwargs + return bivariate_kde(element, contours=True, filled=plot_opts.get('filled', self.filled), + bandwidth=plot_opts.get('bw')).opts(plot=plot_opts, style=style_opts) + + def get_data(self, element, ranges, style): + data, mapping, style = super(BivariatePlot, self).get_data(element, ranges, style) + if not self.filled and 'fill_color' in mapping: + mapping['line_color'] = mapping.pop('fill_color') + if self.filled: + style['line_color'] = 'black' + else: + style['fill_alpha'] = 0 + return data, mapping, style diff --git a/holoviews/plotting/mpl/__init__.py b/holoviews/plotting/mpl/__init__.py index fe547e480f..8337b4fdcc 100644 --- a/holoviews/plotting/mpl/__init__.py +++ b/holoviews/plotting/mpl/__init__.py @@ -16,9 +16,8 @@ from .path import * # noqa (API import) from .plot import * # noqa (API import) from .raster import * # noqa (API import) +from .stats import * # noqa (API import) from .tabular import * # noqa (API import) -from . import pandas # noqa (API import) -from . import seaborn # noqa (API import) from .renderer import MPLRenderer @@ -165,7 +164,11 @@ def grid_selector(grid): Box: PathPlot, Bounds: PathPlot, Ellipse: PathPlot, - Polygons: PolygonPlot}, 'matplotlib', style_aliases=style_aliases) + Polygons: PolygonPlot, + + # Statistics elements + Distribution: DistributionPlot, + Bivariate: BivariatePlot}, 'matplotlib', style_aliases=style_aliases) MPLPlot.sideplots.update({Histogram: SideHistogramPlot, diff --git a/holoviews/plotting/mpl/pandas.py b/holoviews/plotting/mpl/pandas.py deleted file mode 100644 index 4ab96e7ef2..0000000000 --- a/holoviews/plotting/mpl/pandas.py +++ /dev/null @@ -1,152 +0,0 @@ -from __future__ import absolute_import - -import numpy as np -from matplotlib import pyplot as plt - -import param - -from ...core.options import Store -from ...interface.pandas import DFrame, DataFrameView, pd -from .element import ElementPlot -from .plot import mpl_rc_context - - -class DFrameViewPlot(ElementPlot): - """ - DFramePlot provides a wrapper around Pandas dataframe plots. It - takes a single DataFrameView or DFrameMap as input and plots it - using the plotting command selected via the plot_type. - - The plot_options specifies the valid options to be supplied to the - selected plot_type via options.style_opts. - """ - - aspect = param.Parameter(default='square', doc=""" - Aspect ratio defaults to square, 'equal' or numeric values - are also supported.""") - - show_grid = param.Boolean(default=False, doc=""" - Whether to show a Cartesian grid on the plot.""") - - plot_type = param.ObjectSelector(default='scatter_matrix', - objects=['plot', 'boxplot', - 'hist', 'scatter_matrix', - 'autocorrelation_plot'], doc=""" - Selects which Pandas plot type to use, valid options include: 'plot', - 'boxplot', 'hist', 'scatter_matrix' and 'autocorrelation_plot'.""") - - dframe_options = {'plot': ['kind', 'stacked', 'xerr', - 'yerr', 'share_x', 'share_y', - 'table', 'style', 'x', 'y', - 'secondary_y', 'legend', - 'logx', 'logy', 'position', - 'colormap', 'mark_right'], - 'hist': ['column', 'by', 'grid', - 'xlabelsize', 'xrot', - 'ylabelsize', 'yrot', - 'sharex', 'sharey', 'hist', - 'layout', 'bins'], - 'boxplot': ['column', 'by', 'fontsize', - 'layout', 'grid', 'rot'], - 'scatter_matrix': ['alpha', 'grid', 'diagonal', - 'marker', 'range_padding', - 'hist_kwds', 'density_kwds'], - 'autocorrelation': ['kwds']} - - xticks = param.Number(default=None, doc=""" - By default we don't mess with Pandas based tickmarks""") - - yticks = param.Number(default=None, doc=""" - By default we don't mess with Pandas based tickmarks""") - - style_opts = list({opt for opts in dframe_options.values() for opt in opts}) - - def __init__(self, view, **params): - super(DFrameViewPlot, self).__init__(view, **params) - if self.hmap.last.plot_type and 'plot_type' not in params: - self.plot_type = self.hmap.last.plot_type - - @mpl_rc_context - def initialize_plot(self, ranges=None): - element = self.hmap.last - self._validate(element) - - style = self._process_style(self.style[self.cyclic_index]) - axis = self.handles['axis'] - self._update_plot(axis, element, style) - if 'fig' in self.handles and self.handles['fig'] != plt.gcf(): - self.handles['fig'] = plt.gcf() - - return self._finalize_axis(self.keys[-1], element=element, - **self.get_axis_kwargs(element)) - - - def _process_style(self, style): - style_keys = style.keys() - for k in style_keys: - if k not in self.dframe_options[self.plot_type]: - self.warning('Plot option %s does not apply to %s plot type.' % (k, self.plot_type)) - style.pop(k) - if self.plot_type not in ['autocorrelation_plot']: - style['figsize'] = self.fig_size - - # Legacy fix for Pandas, can be removed for Pandas >0.14 - if self.plot_type == 'boxplot': - style['return_type'] = 'axes' - return style - - - def get_extents(self, view, ranges): - x0, y0, x1, y1 = (np.NaN,) * 4 - if ranges: - if view.x: - x0, x1 = ranges[view.x] - if view.x2: - y0, y1 = ranges[view.x2] - elif view.y: - y0, y1 = ranges[view.y] - return (x0, y0, x1, y1) - - - def get_axis_kwargs(self, element): - if element.x: - xlabel = element.get_dimension(element.x).pprint_label - if element.x2: - ylabel = element.get_dimension(element.x2).pprint_label - elif element.y: - ylabel = element.get_dimension(element.y).pprint_label - return dict(xlabel=xlabel, ylabel=ylabel) - - - def _validate(self, dfview): - composed = self.handles['axis'] is not None - - if composed and dfview.ndims > 1 and self.plot_type in ['hist']: - raise Exception("Multiple %s plots cannot be composed." % self.plot_type) - - - def _update_plot(self, axis, view, style): - if self.plot_type == 'scatter_matrix': - pd.scatter_matrix(view.data, ax=axis, **style) - elif self.plot_type == 'autocorrelation_plot': - pd.tools.plotting.autocorrelation_plot(view.data, ax=axis, **style) - elif self.plot_type == 'plot': - opts = dict({'x': view.x, 'y': view.y}, **style) - view.data.plot(ax=self.handles['axis'], **opts) - else: - getattr(view.data, self.plot_type)(ax=axis, **style) - - - def update_handles(self, key, axis, view, ranges, style): - """ - Update the plot for an animation. - """ - if not self.plot_type in ['hist', 'scatter_matrix']: - if self.zorder == 0 and axis: - axis.cla() - self._update_plot(axis, view, style) - return self.get_axis_kwargs(view) - - -Store.register({DataFrameView: DFrameViewPlot, - DFrame: DFrameViewPlot}, 'matplotlib') diff --git a/holoviews/plotting/mpl/seaborn.py b/holoviews/plotting/mpl/seaborn.py deleted file mode 100644 index 4fc2e9d300..0000000000 --- a/holoviews/plotting/mpl/seaborn.py +++ /dev/null @@ -1,330 +0,0 @@ -from __future__ import absolute_import - -import matplotlib.pyplot as plt - -try: - import seaborn as sns -except: - sns = None - -import param - -from ...interface.pandas import DFrame, DataFrameView -from ...interface.seaborn import Regression, TimeSeries, Bivariate, Distribution -from ...interface.seaborn import DFrame as SNSFrame -from ...core.options import Store -from ...core import config -from .element import ElementPlot -from .pandas import DFrameViewPlot -from .plot import MPLPlot, AdjoinedPlot, mpl_rc_context - - -class SeabornPlot(ElementPlot): - """ - SeabornPlot provides an abstract baseclass, defining an - update_frame method, which completely wipes the axis and - redraws the plot. - """ - - aspect = param.Parameter(default='square', doc=""" - Aspect ratio defaults to square, 'equal' or numeric values - are also supported.""") - - show_grid = param.Boolean(default=True, doc=""" - Enables the axis grid.""") - - _abstract = True - - def teardown_handles(self): - if self.zorder == 0: - self.handles['axis'].cla() - - -class RegressionPlot(SeabornPlot): - """ - RegressionPlot visualizes Regression Views using the Seaborn - regplot interface, allowing the user to perform and plot - linear regressions on a set of scatter points. Parameters - to the replot function can be supplied via the opts magic. - """ - - style_opts = ['x_estimator', 'x_bins', 'x_ci', 'scatter', - 'fit_reg', 'color', 'n_boot', 'order', - 'logistic', 'lowess', 'robust', 'truncate', - 'scatter_kws', 'line_kws', 'ci', 'dropna', - 'x_jitter', 'y_jitter', 'x_partial', 'y_partial'] - - def init_artists(self, ax, plot_data, plot_kwargs): - plot_kwargs.pop('zorder') - return {'axis': sns.regplot(*plot_data, ax=ax, **plot_kwargs)} - - def get_data(self, element, ranges, style): - xs, ys = (element[d] for d in element.dimensions()[:2]) - return (xs, ys), style, {} - - -class BivariatePlot(SeabornPlot): - """ - Bivariate plot visualizes two-dimensional kernel density - estimates using the Seaborn kdeplot function. Additionally, - by enabling the joint option, the marginals distributions - can be plotted alongside each axis (does not animate or - compose). - """ - - joint = param.Boolean(default=False, doc=""" - Whether to visualize the kernel density estimate with marginal - distributions along each axis. Does not animate or compose - when enabled.""") - - style_opts = ['color', 'alpha', 'err_style', 'interpolate', - 'ci', 'kind', 'bw', 'kernel', 'cumulative', - 'shade', 'vertical', 'cmap'] - - def init_artists(self, ax, plot_data, plot_kwargs): - if self.joint: - if self.joint and self.subplot: - raise Exception("Joint plots can't be animated or laid out in a grid.") - return {'fig': sns.jointplot(*plot_data, **plot_kwargs).fig} - else: - return {'axis': sns.kdeplot(*plot_data, ax=ax, **plot_kwargs)} - - def get_data(self, element, ranges, style): - xs, ys = (element[d] for d in element.dimensions()[:2]) - if self.joint: - style.pop('cmap', None) - style.pop('zorder', None) - return (xs, ys), style, {} - - - -class TimeSeriesPlot(SeabornPlot): - """ - TimeSeries visualizes sets of curves using the Seaborn - tsplot function. This provides functionality to plot - error bars with various styles alongside the averaged - curve. - """ - - show_legend = param.Boolean(default=True, doc=""" - Whether to show legend for the plot.""") - - style_opts = ['color', 'alpha', 'err_style', 'interpolate', - 'ci', 'n_boot', 'err_kws', 'err_palette', - 'estimator', 'kwargs'] - - def get_data(self, element, ranges, style): - style.pop('zorder', None) - if 'label' in style: - style['condition'] = style.pop('label') - axis_kwargs = {'xlabel': element.kdims[0].pprint_label, - 'ylabel': element.vdims[0].pprint_label} - return (element.data, element.xdata), style, axis_kwargs - - def init_artists(self, ax, plot_data, plot_kwargs): - return {'axis': sns.tsplot(*plot_data, ax=ax, **plot_kwargs)} - - - -class DistributionPlot(SeabornPlot): - """ - DistributionPlot visualizes Distribution Views using the - Seaborn distplot function. This allows visualizing a 1D - array as a histogram, kernel density estimate, or rugplot. - """ - - apply_ranges = param.Boolean(default=False, doc=""" - Whether to compute the plot bounds from the data itself.""") - - style_opts = ['bins', 'hist', 'kde', 'rug', 'fit', 'hist_kws', - 'kde_kws', 'rug_kws', 'fit_kws', 'color'] - - def get_data(self, element, ranges, style): - style.pop('zorder', None) - if self.invert_axes: - style['vertical'] = True - vdim = element.vdims[0] - axis_kwargs = dict(dimensions=[vdim]) - return (element.dimension_values(vdim),), style, axis_kwargs - - def init_artists(self, ax, plot_data, plot_kwargs): - return {'axis': sns.distplot(*plot_data, ax=ax, **plot_kwargs)} - - - -class SideDistributionPlot(AdjoinedPlot, DistributionPlot): - - border_size = param.Number(default=0.2, doc=""" - The size of the border expressed as a fraction of the main plot.""") - - -class SNSFramePlot(DFrameViewPlot): - """ - SNSFramePlot takes an SNSFrame as input and plots the - contained data using the set plot_type. This largely mirrors - the way DFramePlot works, however, since most Seaborn plot - types plot one dimension against another it uses the x and y - parameters, which can be set on the SNSFrame. - """ - - plot_type = param.ObjectSelector(default='scatter_matrix', - objects=['interact', 'regplot', - 'lmplot', 'corrplot', - 'plot', 'boxplot', - 'hist', 'scatter_matrix', - 'autocorrelation_plot', - 'pairgrid', 'facetgrid', - 'pairplot', 'violinplot', - 'factorplot' - ], - doc=""" - Selects which Seaborn plot type to use, when visualizing the - SNSFrame. The options that can be passed to the plot_type are - defined in dframe_options. Valid options are 'interact', 'regplot', - 'lmplot', 'corrplot', 'plot', 'boxplot', 'hist', 'scatter_matrix', - 'autocorrelation_plot', 'pairgrid', 'facetgrid', 'pairplot', - 'violinplot' and 'factorplot'""") - - dframe_options = dict(DFrameViewPlot.dframe_options, - **{'regplot': RegressionPlot.style_opts, - 'factorplot': ['kind', 'col', 'aspect', 'row', - 'col_wrap', 'ci', 'linestyles', - 'markers', 'palette', 'dodge', - 'join', 'size', 'legend', - 'sharex', 'sharey', 'hue', 'estimator'], - 'boxplot': ['order', 'hue_order', 'orient', 'color', - 'palette', 'saturation', 'width', 'fliersize', - 'linewidth', 'whis', 'notch'], - 'violinplot':['groupby', 'positions', - 'inner', 'join_rm', 'bw', 'cut', 'split'], - 'lmplot': ['hue', 'col', 'row', 'palette', - 'sharex', 'dropna', 'legend'], - 'interact': ['filled', 'cmap', 'colorbar', - 'levels', 'logistic', 'contour_kws', - 'scatter_kws'], - 'pairgrid': ['hue', 'hue_order', 'palette', - 'hue_kws', 'vars', 'x_vars', 'y_vars' - 'size', 'aspect', 'despine', 'map', - 'map_diag', 'map_offdiag', - 'map_upper', 'map_lower'], - 'pairplot': ['hue', 'hue_order', 'palette', - 'vars', 'x_vars', 'y_vars', 'diag_kind', - 'kind', 'plot_kws', 'diag_kws', 'grid_kws'], - 'facetgrid': ['hue', 'row', 'col', 'col_wrap', - 'map', 'sharex', 'sharey', 'size', - 'aspect', 'palette', 'row_order', - 'col_order', 'hue_order', 'legend', - 'legend_out', 'xlim', 'ylim', 'despine'], - }) - - style_opts = list({opt for opts in dframe_options.values() for opt in opts}) - - def __init__(self, view, **params): - if self.plot_type in ['pairgrid', 'pairplot', 'facetgrid']: - self._create_fig = False - super(SNSFramePlot, self).__init__(view, **params) - - - @mpl_rc_context - def initialize_plot(self, ranges=None): - dfview = self.hmap.last - axis = self.handles['axis'] - self._validate(dfview) - - style = self._process_style(self.style[self.cyclic_index]) - - self._update_plot(axis, dfview, style) - if 'fig' in self.handles and self.handles['fig'] != plt.gcf(): - self.handles['fig'] = plt.gcf() - - return self._finalize_axis(self.keys[-1], element=dfview) - - - def _process_style(self, styles): - styles = super(SNSFramePlot, self)._process_style(styles) - if self.plot_type not in DFrameViewPlot.params()['plot_type'].objects: - styles.pop('figsize', None) - return styles - - - def _validate(self, dfview): - super(SNSFramePlot, self)._validate(dfview) - multi_dim = dfview.ndims > 1 - if self.subplot and multi_dim and self.plot_type == 'lmplot': - raise Exception("Multiple %s plots cannot be composed." - % self.plot_type) - - @mpl_rc_context - def update_frame(self, key, ranges=None): - element = self.hmap.get(key, None) - axis = self.handles['axis'] - if axis: - axis.set_visible(element is not None) - - style = dict(zorder=self.zorder, **self.style[self.cyclic_index]) - if self.show_legend: - style['label'] = element.label - - axis_kwargs = self.update_handles(key, axis, element, ranges, style) - if axis: - self._finalize_axis(key, element=element, **(axis_kwargs if axis_kwargs else {})) - - - def _update_plot(self, axis, view, style): - style.pop('zorder', None) - if self.plot_type == 'factorplot': - opts = dict(style, **({'hue': view.x2} if view.x2 else {})) - sns.factorplot(x=view.x, y=view.y, data=view.data, **opts) - elif self.plot_type == 'regplot': - sns.regplot(x=view.x, y=view.y, data=view.data, - ax=axis, **style) - elif self.plot_type == 'boxplot': - style.pop('return_type', None) - style.pop('figsize', None) - sns.boxplot(view.data[view.y], view.data[view.x], ax=axis, - **style) - elif self.plot_type == 'violinplot': - if view.x: - sns.violinplot(view.data[view.y], view.data[view.x], ax=axis, - **style) - else: - sns.violinplot(view.data, ax=axis, **style) - elif self.plot_type == 'interact': - sns.interactplot(view.x, view.x2, view.y, - data=view.data, ax=axis, **style) - elif self.plot_type == 'lmplot': - sns.lmplot(x=view.x, y=view.y, data=view.data, - ax=axis, **style) - elif self.plot_type in ['pairplot', 'pairgrid', 'facetgrid']: - style_keys = list(style.keys()) - map_opts = [(k, style.pop(k)) for k in style_keys if 'map' in k] - if self.plot_type == 'pairplot': - g = sns.pairplot(view.data, **style) - elif self.plot_type == 'pairgrid': - g = sns.PairGrid(view.data, **style) - elif self.plot_type == 'facetgrid': - g = sns.FacetGrid(view.data, **style) - for opt, args in map_opts: - plot_fn = getattr(sns, args[0]) if hasattr(sns, args[0]) else getattr(plt, args[0]) - getattr(g, opt)(plot_fn, *args[1:]) - if self._close_figures: - plt.close(self.handles['fig']) - self.handles['fig'] = plt.gcf() - else: - super(SNSFramePlot, self)._update_plot(axis, view, style) - - -Store.register({TimeSeries: TimeSeriesPlot, - Bivariate: BivariatePlot, - Distribution: DistributionPlot, - Regression: RegressionPlot, - SNSFrame: SNSFramePlot, - DFrame: SNSFramePlot, - DataFrameView: SNSFramePlot}, 'matplotlib') - -MPLPlot.sideplots.update({Distribution: SideDistributionPlot}) - - -if config.style_17: - for framelesscls in [TimeSeriesPlot, DistributionPlot]: - framelesscls.show_frame = False diff --git a/holoviews/plotting/mpl/stats.py b/holoviews/plotting/mpl/stats.py new file mode 100644 index 0000000000..d3d3fb7592 --- /dev/null +++ b/holoviews/plotting/mpl/stats.py @@ -0,0 +1,49 @@ +import param +import numpy as np + +from ...element import Polygons, Contours, Distribution, Bivariate +from ...operation.stats import univariate_kde, bivariate_kde + +from .chart import AreaPlot +from .path import PolygonPlot + + +class DistributionPlot(AreaPlot): + """ + DistributionPlot visualizes a distribution of values as a KDE. + """ + + bw = param.Number(default=None) + + def __init__(self, element, **params): + element = element.map(self._convert_element, Distribution) + super(DistributionPlot, self).__init__(element, **params) + + def _convert_element(self, element): + plot_opts = self.lookup_options(element, 'plot').options + style_opts = self.lookup_options(element, 'style').options + return univariate_kde(element, bandwidth=plot_opts.get('bw')).opts(plot=plot_opts, style=style_opts) + + + +class BivariatePlot(PolygonPlot): + """ + Bivariate plot visualizes two-dimensional kernel density + estimates. Additionally, by enabling the joint option, the + marginals distributions can be plotted alongside each axis (does + not animate or compose). + """ + + bw = param.Number(default=None) + + filled = param.Boolean(default=False) + + def __init__(self, element, **params): + element = element.map(self._convert_element, Bivariate) + super(BivariatePlot, self).__init__(element, **params) + + def _convert_element(self, element): + plot_opts = self.lookup_options(element, 'plot').options + style_opts = self.lookup_options(element, 'style').options + return bivariate_kde(element, contours=True, filled=plot_opts.get('filled', self.filled), + bandwidth=plot_opts.get('bw')).opts(plot=plot_opts, style=style_opts) diff --git a/holoviews/plotting/plotly/__init__.py b/holoviews/plotting/plotly/__init__.py index 0d3ee9be18..d0bd93376c 100644 --- a/holoviews/plotting/plotly/__init__.py +++ b/holoviews/plotting/plotly/__init__.py @@ -1,7 +1,6 @@ from ...core.options import Store, Cycle, Options from ...core import (Overlay, NdOverlay, Layout, NdLayout, GridSpace, GridMatrix, config) -from ...interface.seaborn import * # noqa (Element import for registration) from ...element import * # noqa (Element import for registration) from .renderer import PlotlyRenderer From db0152bfc54adafb470bbc7536d2d003ca67b177 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 30 Oct 2017 19:31:05 +0000 Subject: [PATCH 02/40] Added support for virtual_vdims --- holoviews/core/data/__init__.py | 4 ++++ holoviews/core/data/array.py | 2 +- holoviews/core/data/dictionary.py | 5 ++++- holoviews/core/data/interface.py | 7 +++++-- holoviews/core/data/pandas.py | 7 +++++-- 5 files changed, 19 insertions(+), 6 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 3a0b9b4476..baa62a6e56 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -164,6 +164,10 @@ class Dataset(Element): # to supplied data _auto_indexable_1d = True + # Determines whether value dimensions are in data or should be emulated + # Useful for elements which compute statistics from the data + _virtual_vdims = False + # Define a class used to transform Datasets into other Element types _conversion_interface = DataConversion diff --git a/holoviews/core/data/array.py b/holoviews/core/data/array.py index 3194130962..d3a05155ae 100644 --- a/holoviews/core/data/array.py +++ b/holoviews/core/data/array.py @@ -72,7 +72,7 @@ def init(cls, eltype, data, kdims, vdims): @classmethod def validate(cls, dataset): - ndims = len(dataset.dimensions()) + ndims = dataset.ndims if dataset._virtual_vdims else len(dataset.dimensions()) ncols = dataset.data.shape[1] if dataset.data.ndim > 1 else 1 if ncols < ndims: raise DataError("Supplied data does not match specified " diff --git a/holoviews/core/data/dictionary.py b/holoviews/core/data/dictionary.py index 8cd231b520..0a44ee5dc1 100644 --- a/holoviews/core/data/dictionary.py +++ b/holoviews/core/data/dictionary.py @@ -101,7 +101,10 @@ def init(cls, eltype, data, kdims, vdims): @classmethod def validate(cls, dataset): - dimensions = dataset.dimensions(label='name') + if dataset._virtual_vdims: + dimensions = dataset.dimensions('key', label='name') + else: + dimensions = dataset.dimensions(label='name') not_found = [d for d in dimensions if d not in dataset.data] if not_found: raise DataError('Following columns specified as dimensions ' diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index dfe15c3727..4ffa8a91ce 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -202,8 +202,11 @@ def initialize(cls, eltype, data, kdims, vdims, datatype=None): @classmethod def validate(cls, dataset): - not_found = [d for d in dataset.dimensions(label='name') - if d not in dataset.data] + if dataset._virtual_vdims: + dimensions = dataset.dimensions('key', label='name') + else: + dimensions = dataset.dimensions(label='name') + not_found = [d for d in dimensions if d not in dataset.data] if not_found: raise DataError("Supplied data does not contain specified " "dimensions, the following dimensions were " diff --git a/holoviews/core/data/pandas.py b/holoviews/core/data/pandas.py index b53a3a3027..fcf6e064ae 100644 --- a/holoviews/core/data/pandas.py +++ b/holoviews/core/data/pandas.py @@ -92,8 +92,11 @@ def isscalar(cls, dataset, dim): @classmethod def validate(cls, dataset): - not_found = [d for d in dataset.dimensions(label='name') - if d not in dataset.data.columns] + if dataset._virtual_vdims: + dimensions = dataset.dimensions('key', label='name') + else: + dimensions = dataset.dimensions(label='name') + not_found = [d for d in dimensions if d not in dataset.data.columns] if not_found: raise DataError("Supplied data does not contain specified " "dimensions, the following dimensions were " From b479c3446e0e312f77a39ec76aaf283aba33314a Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 30 Oct 2017 19:32:17 +0000 Subject: [PATCH 03/40] Added StatisticalElement baseclass for Distribution and Bivariate --- holoviews/element/stats.py | 85 +++++++++++++++++++++++++++++++++++--- 1 file changed, 79 insertions(+), 6 deletions(-) diff --git a/holoviews/element/stats.py b/holoviews/element/stats.py index 2b55a2a54a..e0a082869f 100644 --- a/holoviews/element/stats.py +++ b/holoviews/element/stats.py @@ -1,10 +1,83 @@ import param +import numpy as np -from ..core.dimension import Dimension +from ..core.dimension import Dimension, Dimensioned from .chart import Chart, Scatter -class Bivariate(Chart): +class _StatisticsElement(Chart): + """ + StatisticsElement provides a baseclass for Element types that + compute statistics based on the input data. The baseclass + overrides standard Dataset methods emulating the existence + of the value dimensions. + """ + + def __init__(self, data, kdims=None, vdims=None, **params): + super(_StatisticsElement, self).__init__(data, kdims, vdims, **params) + if not self.vdims: + self.vdims = [Dimension('Density')] + + + def range(self, dim, data_range=True): + dim = self.get_dimension(dim) + if dim in self.vdims: + return Dimensioned.range(self, dim, data_range=True) + return super(_StatisticsElement, self).range(dim, data_range) + + + def dimension_values(self, dim, expanded=True, flat=True): + """ + Returns the values along a particular dimension. If unique + values are requested will return only unique values. + """ + dim = self.get_dimension(dim, strict=True) + if dim in self.vdims: + return np.full(len(self), np.NaN) + return self.interface.values(self, dim, expanded, flat) + + + def get_dimension_type(self, dim): + """ + Returns the specified Dimension type if specified or + if the dimension_values types are consistent otherwise + None is returned. + """ + dim = self.get_dimension(dim) + if dim is None: + return None + elif dim.type is not None: + return dim.type + elif dim in self.vdims: + return np.float64 + return self.interface.dimension_type(self, dim) + + + def dframe(self, dimensions=None): + """ + Returns the data in the form of a DataFrame. Supplying a list + of dimensions filters the dataframe. If the data is already + a DataFrame a copy is returned. + """ + if dimensions: + dimensions = [self.get_dimension(d, strict=True) for d in dimensions] + else: + dimensions = dimensions.kdims + dim = [dim.name for dim in dims if dim in dimensions.kdims] + return self.interface.dframe(self, dimensions) + + + def columns(self, dimensions=None): + if dimensions is None: + dimensions = self.kdims + else: + dimensions = [self.get_dimension(d, strict=True) for d in dimensions] + return OrderedDict([(d.name, self.dimension_values(d)) + for d in dimensions if d in self.kdims]) + + + +class Bivariate(_StatisticsElement): """ Bivariate Views are containers for two dimensional data, which is to be visualized as a kernel density estimate. The @@ -14,13 +87,13 @@ class Bivariate(Chart): kdims = param.List(default=[Dimension('x'), Dimension('y')]) - vdims = param.List(default=[], bounds=(0,1)) + vdims = param.List(default=[Dimension('Density')], bounds=(1,1)) group = param.String(default="Bivariate", constant=True) -class Distribution(Chart): +class Distribution(_StatisticsElement): """ Distribution Views provide a container for data to be visualized as a one-dimensional distribution. The data should @@ -28,11 +101,11 @@ class Distribution(Chart): list. Internally it uses Seaborn to make all the conversions. """ - kdims = param.List(default=[]) + kdims = param.List(default=[Dimension('Value')]) group = param.String(default='Distribution', constant=True) - vdims = param.List(default=[Dimension('Value')]) + vdims = param.List(default=[Dimension('Density')]) _auto_indexable_1d = False From 381dae8974f73846fbc3e78b8afa4a0e0552fd08 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 30 Oct 2017 20:05:52 +0000 Subject: [PATCH 04/40] Improved bokeh statistical plot implementations --- holoviews/plotting/bokeh/__init__.py | 3 +++ holoviews/plotting/bokeh/stats.py | 12 ++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/holoviews/plotting/bokeh/__init__.py b/holoviews/plotting/bokeh/__init__.py index bb1e738377..a67547e2cf 100644 --- a/holoviews/plotting/bokeh/__init__.py +++ b/holoviews/plotting/bokeh/__init__.py @@ -220,3 +220,6 @@ def colormap_generator(palette): options.Points = Options('style', muted_alpha=0.2) options.Polygons = Options('style', muted_alpha=0.2) +# Statistics +options.Distribution = Options('style', fill_color=Cycle(), line_color='black', + fill_alpha=0.5) diff --git a/holoviews/plotting/bokeh/stats.py b/holoviews/plotting/bokeh/stats.py index bfbf76e466..716940767f 100644 --- a/holoviews/plotting/bokeh/stats.py +++ b/holoviews/plotting/bokeh/stats.py @@ -3,8 +3,6 @@ import param import numpy as np -from bokeh.models.ranges import DataRange1d - from ...element import Polygons, Contours, Distribution, Bivariate from ...operation.stats import univariate_kde, bivariate_kde @@ -26,7 +24,8 @@ def __init__(self, element, plot=None, **params): def _convert_element(self, element): plot_opts = self.lookup_options(element, 'plot').options style_opts = self.lookup_options(element, 'style').kwargs - return univariate_kde(element, bandwidth=plot_opts.get('bw')).opts(plot=plot_opts, style=style_opts) + bw = plot_opts.pop('bw', univariate_kde.bandwidth) + return univariate_kde(element, bandwidth=bw).opts(plot=plot_opts, style=style_opts) @@ -44,13 +43,14 @@ class BivariatePlot(PolygonPlot): def __init__(self, element, plot=None, **params): element = element.map(self._convert_element, Bivariate) - super(BivariatePlot, self).__init__(element, plot, batched=True, **params) + super(BivariatePlot, self).__init__(element, plot, **params) def _convert_element(self, element): plot_opts = self.lookup_options(element, 'plot').options style_opts = self.lookup_options(element, 'style').kwargs - return bivariate_kde(element, contours=True, filled=plot_opts.get('filled', self.filled), - bandwidth=plot_opts.get('bw')).opts(plot=plot_opts, style=style_opts) + bw = plot_opts.pop('bw', univariate_kde.bandwidth) + return bivariate_kde(element, contours=True, filled=True, + bandwidth=bw).opts(plot=plot_opts, style=style_opts) def get_data(self, element, ranges, style): data, mapping, style = super(BivariatePlot, self).get_data(element, ranges, style) From c4f99f7daccfe92c96c9e07d30e8aee66d75b5d9 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 30 Oct 2017 20:07:32 +0000 Subject: [PATCH 05/40] Allowed defining which backend to set options on --- holoviews/core/options.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/holoviews/core/options.py b/holoviews/core/options.py index 654c78862b..e5e2053e18 100644 --- a/holoviews/core/options.py +++ b/holoviews/core/options.py @@ -1511,7 +1511,7 @@ def id_offset(cls): @classmethod - def update_backends(cls, id_mapping, custom_trees): + def update_backends(cls, id_mapping, custom_trees, backend=None): """ Given the id_mapping from previous ids to new ids and the new custom tree dictionary, update the current backend with the @@ -1519,7 +1519,7 @@ def update_backends(cls, id_mapping, custom_trees): stay linked with the current object. """ # Update the custom option entries for the current backend - Store.custom_options().update(custom_trees) + Store.custom_options(backend=backend).update(custom_trees) # Update the entries in other backends so the ids match correctly for backend in [k for k in Store.renderers.keys() if k != Store.current_backend]: for (old_id, new_id) in id_mapping: @@ -1529,7 +1529,7 @@ def update_backends(cls, id_mapping, custom_trees): @classmethod - def set_options(cls, obj, options=None, **kwargs): + def set_options(cls, obj, options=None, backend=None, **kwargs): """ Pure Python function for customize HoloViews objects in terms of their style, plot and normalization options. @@ -1572,7 +1572,7 @@ def set_options(cls, obj, options=None, **kwargs): # {'Image.Channel:{'plot': Options(size=50), # 'style': Options('style', cmap='Blues')]} - options = cls.merge_options(Store.options().groups.keys(), options, **kwargs) + options = cls.merge_options(Store.options(backend=backend).groups.keys(), options, **kwargs) spec, compositor_applied = cls.expand_compositor_keys(options) custom_trees, id_mapping = cls.create_custom_trees(obj, spec) cls.update_backends(id_mapping, custom_trees) From dcf49bf213c63afaacc4d2ccdbfd6883e3d93234 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 30 Oct 2017 20:08:14 +0000 Subject: [PATCH 06/40] Improved kernel density operations --- holoviews/operation/stats.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/holoviews/operation/stats.py b/holoviews/operation/stats.py index 36e92d3e51..3d90be9a02 100644 --- a/holoviews/operation/stats.py +++ b/holoviews/operation/stats.py @@ -48,17 +48,27 @@ def _process(self, element, key=None): if self.p.dimension: selected_dim = self.p.dimension + elif element._virtual_vdims: + selected_dim = element.kdims[0] else: selected_dim = [d.name for d in element.vdims + element.kdims][0] + dim_template = element.vdims[0] if element._virtual_vdims else Dimension + vdims = [dim_template('{}_density'.format(selected_dim), + label='{} Density'.format(selected_dim))] + data = element.dimension_values(selected_dim) bin_range = find_minmax((np.nanmin(data), np.nanmax(data)), (0, -float('inf')))\ if self.p.bin_range is None else self.p.bin_range xs = np.linspace(bin_range[0], bin_range[1], self.p.n_samples) - kde = stats.gaussian_kde(data) - if self.p.bandwidth: - kde.set_bandwidth(self.p.bandwidth) - ys = kde.evaluate(xs) + data = data[np.isfinite(data)] + if len(data): + kde = stats.gaussian_kde(data) + if self.p.bandwidth: + kde.set_bandwidth(self.p.bandwidth) + ys = kde.evaluate(xs) + else: + ys = np.full_like(xs, 0) vdims = [Dimension('{}_density'.format(selected_dim), label='{} Density'.format(selected_dim))] @@ -70,7 +80,7 @@ def _process(self, element, key=None): class bivariate_kde(Operation): - contours = param.Boolean(default=False) + contours = param.Boolean(default=True) bw_method = param.ObjectSelector(default='scott', objects=['scott', 'silverman'], doc=""" Method of automatically determining KDE bandwidth""") @@ -116,7 +126,9 @@ def _process(self, element, key=None): positions = np.vstack([xx.ravel(), yy.ravel()]) f = np.reshape(kde(positions).T, xx.shape) - img = Image((xs, ys, f.T), kdims=element.dimensions()[:2], vdims=['Density']) + vdim = element.vdims[0] if element._virtual_vdims else 'Density' + img = Image((xs, ys, f.T), kdims=element.dimensions()[:2], vdims=[vdim]) if self.p.contours: - return contours(img, filled=self.p.filled, ) + cntr = contours(img, filled=self.p.filled) + return cntr.clone(cntr.data[1:]) return img From 6b544cc3f6b850798ff8bc109c0edd00e3f129fc Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 30 Oct 2017 20:08:42 +0000 Subject: [PATCH 07/40] Enabled virtual_vdims on StatisticalElements --- holoviews/element/stats.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/holoviews/element/stats.py b/holoviews/element/stats.py index e0a082869f..6102cb4a7e 100644 --- a/holoviews/element/stats.py +++ b/holoviews/element/stats.py @@ -13,6 +13,8 @@ class _StatisticsElement(Chart): of the value dimensions. """ + _virtual_vdims = True + def __init__(self, data, kdims=None, vdims=None, **params): super(_StatisticsElement, self).__init__(data, kdims, vdims, **params) if not self.vdims: From a5b00413d6cf618448f6da3476111638af3a8d2a Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 30 Oct 2017 20:09:08 +0000 Subject: [PATCH 08/40] Made input_ranges optional on Operation --- holoviews/core/operation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/core/operation.py b/holoviews/core/operation.py index 01bd682a08..29f6102970 100644 --- a/holoviews/core/operation.py +++ b/holoviews/core/operation.py @@ -37,7 +37,7 @@ class Operation(param.ParameterizedFunction): 'default' the mode will be determined based on the input type, i.e. if the data is a DynamicMap it will stay dynamic.""") - input_ranges = param.ClassSelector(default={}, + input_ranges = param.ClassSelector(default={}, allow_None=True, class_=(dict, tuple), doc=""" Ranges to be used for input normalization (if applicable) in a format appropriate for the Normalization.ranges parameter. From a116df2cd01eb587b16d676b59c0a1f57092581b Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 30 Oct 2017 20:09:32 +0000 Subject: [PATCH 09/40] Recursively apply Compositor until no more matches found --- holoviews/core/options.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/holoviews/core/options.py b/holoviews/core/options.py index e5e2053e18..fc521977fa 100644 --- a/holoviews/core/options.py +++ b/holoviews/core/options.py @@ -811,16 +811,24 @@ def collapse_element(cls, overlay, key=None, ranges=None, mode='data'): Finds any applicable compositor and applies it. """ from .overlay import Overlay - match = cls.strongest_match(overlay, mode) - if match is None: return overlay - (_, applicable_op, (start, stop)) = match - values = overlay.values() - sliced = Overlay.from_values(values[start:stop]) - result = applicable_op.apply(sliced, ranges, key=key) - result = result.relabel(group=applicable_op.group) - output = Overlay.from_values(values[:start]+[result]+values[stop:]) - output.id = overlay.id - return output + while True: + match = cls.strongest_match(overlay, mode) + if match is None: return overlay + (_, applicable_op, (start, stop)) = match + if isinstance(overlay, Overlay): + values = overlay.values() + sliced = Overlay.from_values(values[start:stop]) + result = applicable_op.apply(sliced, ranges, key=key) + result = result.relabel(group=applicable_op.group) + overlay = Overlay.from_values(values[:start]+[result]+values[stop:]) + overlay.id = overlay.id + else: + values = overlay.items() + sliced = overlay.clone(values[start:stop]) + result = applicable_op.apply(sliced, ranges, key=key) + result = result.relabel(group=applicable_op.group) + result = list(zip(sliced.keys(), [result])) + overlay = overlay.clone(values[:start]+result+values[stop:]) @classmethod From c65af984c7e158c8bc9ab31153ccdf85d47a5d26 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 30 Oct 2017 20:09:57 +0000 Subject: [PATCH 10/40] Added Store method to transfer options from one Element to another --- holoviews/core/options.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/holoviews/core/options.py b/holoviews/core/options.py index fc521977fa..144f7d8f05 100644 --- a/holoviews/core/options.py +++ b/holoviews/core/options.py @@ -1113,6 +1113,25 @@ def lookup(cls, backend, obj): return cls._custom_options[backend][list(ids)[0]] + @classmethod + def transfer_options(cls, obj, new_obj, drop=[]): + """ + Transfers options for all backends from one object to another. + Drops any options defined in the supplied drop list. + """ + type_name = type(new_obj).__name__ + group = type_name if obj.group == type(obj).__name__ else obj.group + spec = '.'.join([s for s in (type_name, group, obj.label) if s]) + for backend in cls.renderers: + options = [] + for group in ['plot', 'style', 'norm']: + opts = cls.lookup_options(backend, obj, group).kwargs + opts = {k: v for k, v in opts.items() if k not in drop} + if opts: + options.append(Options(group, **opts)) + StoreOptions.set_options(new_obj, {spec: options}, backend) + + @classmethod def add_style_opts(cls, component, new_options, backend=None): """ From c10af53fe7dea9a84cb46d6cfce85dd86dfefe25 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 30 Oct 2017 20:10:33 +0000 Subject: [PATCH 11/40] Added compositing operations for Distribution and Bivariate --- holoviews/operation/stats.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/holoviews/operation/stats.py b/holoviews/operation/stats.py index 3d90be9a02..d06be62d93 100644 --- a/holoviews/operation/stats.py +++ b/holoviews/operation/stats.py @@ -4,8 +4,9 @@ from ..core import Dimension, Dataset, NdOverlay from ..core.dimension import Dimension from ..core.operation import Operation +from ..core.options import Compositor, Store, Options, StoreOptions from ..core.util import basestring, find_minmax, cartesian_product -from ..element import Curve, Area, Image +from ..element import Curve, Area, Image, Polygons from .element import contours @@ -132,3 +133,31 @@ def _process(self, element, key=None): cntr = contours(img, filled=self.p.filled) return cntr.clone(cntr.data[1:]) return img + + +class univariate_composite(Operation): + + output_type = Area + + def _process(self, element, key=None): + plot_opts = Store.lookup_options(Store.current_backend, element, 'plot').kwargs + bw = plot_opts.pop('bw', univariate_kde.bandwidth) + transformed = univariate_kde(element, bandwidth=bw) + Store.transfer_options(element, transformed, ['bw']) + return transformed + + +class bivariate_composite(Operation): + + output_type = Polygons + + def _process(self, element, key=None): + plot_opts = Store.lookup_options(Store.current_backend, element, 'plot').kwargs + bw = plot_opts.pop('bw', bivariate_kde.bandwidth) + filled = plot_opts.pop('filled', bivariate_kde.filled) + transformed = bivariate_kde(element, bandwidth=bw, filled=filled) + Store.transfer_options(element, transformed, ['bw', 'filled']) + return transformed + +Compositor.register(Compositor("Distribution", univariate_composite, 'Area', 'data')) +Compositor.register(Compositor("Bivariate", bivariate_composite, 'Polygons', 'data')) From 64ed9cc058393999a8713bc57950768ce55bca7d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 30 Oct 2017 20:15:13 +0000 Subject: [PATCH 12/40] Moved compositing operations into plotting utilities --- holoviews/operation/stats.py | 28 ---------------------------- holoviews/plotting/__init__.py | 6 +++++- holoviews/plotting/util.py | 27 +++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 29 deletions(-) diff --git a/holoviews/operation/stats.py b/holoviews/operation/stats.py index d06be62d93..6cc39ce8b3 100644 --- a/holoviews/operation/stats.py +++ b/holoviews/operation/stats.py @@ -133,31 +133,3 @@ def _process(self, element, key=None): cntr = contours(img, filled=self.p.filled) return cntr.clone(cntr.data[1:]) return img - - -class univariate_composite(Operation): - - output_type = Area - - def _process(self, element, key=None): - plot_opts = Store.lookup_options(Store.current_backend, element, 'plot').kwargs - bw = plot_opts.pop('bw', univariate_kde.bandwidth) - transformed = univariate_kde(element, bandwidth=bw) - Store.transfer_options(element, transformed, ['bw']) - return transformed - - -class bivariate_composite(Operation): - - output_type = Polygons - - def _process(self, element, key=None): - plot_opts = Store.lookup_options(Store.current_backend, element, 'plot').kwargs - bw = plot_opts.pop('bw', bivariate_kde.bandwidth) - filled = plot_opts.pop('filled', bivariate_kde.filled) - transformed = bivariate_kde(element, bandwidth=bw, filled=filled) - Store.transfer_options(element, transformed, ['bw', 'filled']) - return transformed - -Compositor.register(Compositor("Distribution", univariate_composite, 'Area', 'data')) -Compositor.register(Compositor("Bivariate", bivariate_composite, 'Polygons', 'data')) diff --git a/holoviews/plotting/__init__.py b/holoviews/plotting/__init__.py index 6bb84fd526..72724abf4f 100644 --- a/holoviews/plotting/__init__.py +++ b/holoviews/plotting/__init__.py @@ -6,9 +6,13 @@ display in the IPython Notebook (optional). """ -from ..core.options import Cycle +from ..core.options import Cycle, Compositor from .plot import Plot from .renderer import Renderer, HTML_TAGS # noqa (API import) +from .util import univariate_composite, bivariate_composite + +Compositor.register(Compositor("Distribution", univariate_composite, 'Area', 'data')) +Compositor.register(Compositor("Bivariate", bivariate_composite, 'Polygons', 'data')) def public(obj): if not isinstance(obj, type): return False diff --git a/holoviews/plotting/util.py b/holoviews/plotting/util.py index 8ae3fb7736..5c26eb5a1b 100644 --- a/holoviews/plotting/util.py +++ b/holoviews/plotting/util.py @@ -10,6 +10,8 @@ from ..core.spaces import get_nested_streams from ..core.util import (match_spec, is_number, wrap_tuple, basestring, get_overlay_spec, unique_iterator) +from ..element import Area, Polygons +from ..operation.stats import univariate_kde, bivariate_kde, Operation from ..streams import LinkedStream def displayable(obj): @@ -451,6 +453,31 @@ def get_min_distance(element): return 0 +class univariate_composite(Operation): + + output_type = Area + + def _process(self, element, key=None): + plot_opts = Store.lookup_options(Store.current_backend, element, 'plot').kwargs + bw = plot_opts.pop('bw', univariate_kde.bandwidth) + transformed = univariate_kde(element, bandwidth=bw) + Store.transfer_options(element, transformed, ['bw']) + return transformed + + +class bivariate_composite(Operation): + + output_type = Polygons + + def _process(self, element, key=None): + plot_opts = Store.lookup_options(Store.current_backend, element, 'plot').kwargs + bw = plot_opts.pop('bw', bivariate_kde.bandwidth) + filled = plot_opts.pop('filled', bivariate_kde.filled) + transformed = bivariate_kde(element, bandwidth=bw, filled=filled) + Store.transfer_options(element, transformed, ['bw', 'filled']) + return transformed + + def rgb2hex(rgb): """ Convert RGB(A) tuple to hex. From 7425c2f7972f259a812a1e501e7d3b814d107192 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 30 Oct 2017 22:33:08 +0000 Subject: [PATCH 13/40] Removed stats element plots --- holoviews/plotting/bokeh/__init__.py | 7 +--- holoviews/plotting/bokeh/stats.py | 63 ---------------------------- holoviews/plotting/mpl/__init__.py | 7 +--- holoviews/plotting/mpl/stats.py | 49 ---------------------- 4 files changed, 2 insertions(+), 124 deletions(-) delete mode 100644 holoviews/plotting/bokeh/stats.py delete mode 100644 holoviews/plotting/mpl/stats.py diff --git a/holoviews/plotting/bokeh/__init__.py b/holoviews/plotting/bokeh/__init__.py index a67547e2cf..ae8f33fac9 100644 --- a/holoviews/plotting/bokeh/__init__.py +++ b/holoviews/plotting/bokeh/__init__.py @@ -38,7 +38,6 @@ from .plot import GridPlot, LayoutPlot, AdjointLayoutPlot from .raster import RasterPlot, RGBPlot, HeatMapPlot, HSVPlot, QuadMeshPlot from .renderer import BokehRenderer -from .stats import DistributionPlot, BivariatePlot from .tabular import TablePlot from .util import bokeh_version @@ -100,11 +99,7 @@ # Tabular Table: TablePlot, - ItemTable: TablePlot, - - # Statistics - Distribution: DistributionPlot, - Bivariate: BivariatePlot} + ItemTable: TablePlot} if DFrame is not None: associations[DFrame] = TablePlot diff --git a/holoviews/plotting/bokeh/stats.py b/holoviews/plotting/bokeh/stats.py deleted file mode 100644 index 716940767f..0000000000 --- a/holoviews/plotting/bokeh/stats.py +++ /dev/null @@ -1,63 +0,0 @@ -from functools import partial - -import param -import numpy as np - -from ...element import Polygons, Contours, Distribution, Bivariate -from ...operation.stats import univariate_kde, bivariate_kde - -from .chart import AreaPlot -from .path import PolygonPlot - - -class DistributionPlot(AreaPlot): - """ - DistributionPlot visualizes a distribution of values as a KDE. - """ - - bw = param.Number(default=None) - - def __init__(self, element, plot=None, **params): - element = element.map(self._convert_element, Distribution) - super(DistributionPlot, self).__init__(element, plot, **params) - - def _convert_element(self, element): - plot_opts = self.lookup_options(element, 'plot').options - style_opts = self.lookup_options(element, 'style').kwargs - bw = plot_opts.pop('bw', univariate_kde.bandwidth) - return univariate_kde(element, bandwidth=bw).opts(plot=plot_opts, style=style_opts) - - - -class BivariatePlot(PolygonPlot): - """ - Bivariate plot visualizes two-dimensional kernel density - estimates. Additionally, by enabling the joint option, the - marginals distributions can be plotted alongside each axis (does - not animate or compose). - """ - - bw = param.Number(default=None) - - filled = param.Boolean(default=False) - - def __init__(self, element, plot=None, **params): - element = element.map(self._convert_element, Bivariate) - super(BivariatePlot, self).__init__(element, plot, **params) - - def _convert_element(self, element): - plot_opts = self.lookup_options(element, 'plot').options - style_opts = self.lookup_options(element, 'style').kwargs - bw = plot_opts.pop('bw', univariate_kde.bandwidth) - return bivariate_kde(element, contours=True, filled=True, - bandwidth=bw).opts(plot=plot_opts, style=style_opts) - - def get_data(self, element, ranges, style): - data, mapping, style = super(BivariatePlot, self).get_data(element, ranges, style) - if not self.filled and 'fill_color' in mapping: - mapping['line_color'] = mapping.pop('fill_color') - if self.filled: - style['line_color'] = 'black' - else: - style['fill_alpha'] = 0 - return data, mapping, style diff --git a/holoviews/plotting/mpl/__init__.py b/holoviews/plotting/mpl/__init__.py index 8337b4fdcc..a9e715b5e1 100644 --- a/holoviews/plotting/mpl/__init__.py +++ b/holoviews/plotting/mpl/__init__.py @@ -16,7 +16,6 @@ from .path import * # noqa (API import) from .plot import * # noqa (API import) from .raster import * # noqa (API import) -from .stats import * # noqa (API import) from .tabular import * # noqa (API import) from .renderer import MPLRenderer @@ -164,11 +163,7 @@ def grid_selector(grid): Box: PathPlot, Bounds: PathPlot, Ellipse: PathPlot, - Polygons: PolygonPlot, - - # Statistics elements - Distribution: DistributionPlot, - Bivariate: BivariatePlot}, 'matplotlib', style_aliases=style_aliases) + Polygons: PolygonPlot}, 'matplotlib', style_aliases=style_aliases) MPLPlot.sideplots.update({Histogram: SideHistogramPlot, diff --git a/holoviews/plotting/mpl/stats.py b/holoviews/plotting/mpl/stats.py deleted file mode 100644 index d3d3fb7592..0000000000 --- a/holoviews/plotting/mpl/stats.py +++ /dev/null @@ -1,49 +0,0 @@ -import param -import numpy as np - -from ...element import Polygons, Contours, Distribution, Bivariate -from ...operation.stats import univariate_kde, bivariate_kde - -from .chart import AreaPlot -from .path import PolygonPlot - - -class DistributionPlot(AreaPlot): - """ - DistributionPlot visualizes a distribution of values as a KDE. - """ - - bw = param.Number(default=None) - - def __init__(self, element, **params): - element = element.map(self._convert_element, Distribution) - super(DistributionPlot, self).__init__(element, **params) - - def _convert_element(self, element): - plot_opts = self.lookup_options(element, 'plot').options - style_opts = self.lookup_options(element, 'style').options - return univariate_kde(element, bandwidth=plot_opts.get('bw')).opts(plot=plot_opts, style=style_opts) - - - -class BivariatePlot(PolygonPlot): - """ - Bivariate plot visualizes two-dimensional kernel density - estimates. Additionally, by enabling the joint option, the - marginals distributions can be plotted alongside each axis (does - not animate or compose). - """ - - bw = param.Number(default=None) - - filled = param.Boolean(default=False) - - def __init__(self, element, **params): - element = element.map(self._convert_element, Bivariate) - super(BivariatePlot, self).__init__(element, **params) - - def _convert_element(self, element): - plot_opts = self.lookup_options(element, 'plot').options - style_opts = self.lookup_options(element, 'style').options - return bivariate_kde(element, contours=True, filled=plot_opts.get('filled', self.filled), - bandwidth=plot_opts.get('bw')).opts(plot=plot_opts, style=style_opts) From 3162c28c33fef578a2f98c24fc7e8517e6e2700c Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 30 Oct 2017 22:35:43 +0000 Subject: [PATCH 14/40] Removed _virtual_vdims --- holoviews/core/data/__init__.py | 7 ++----- holoviews/core/data/array.py | 4 ++-- holoviews/core/data/dictionary.py | 8 +++----- holoviews/core/data/grid.py | 4 ++-- holoviews/core/data/image.py | 2 +- holoviews/core/data/interface.py | 8 +++----- holoviews/core/data/iris.py | 4 ++-- holoviews/core/data/multipath.py | 4 ++-- holoviews/core/data/pandas.py | 8 +++----- holoviews/element/stats.py | 9 +++------ holoviews/operation/stats.py | 28 ++++++++++++++++++++-------- 11 files changed, 43 insertions(+), 43 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index baa62a6e56..6eb9135d52 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -164,10 +164,6 @@ class Dataset(Element): # to supplied data _auto_indexable_1d = True - # Determines whether value dimensions are in data or should be emulated - # Useful for elements which compute statistics from the data - _virtual_vdims = False - # Define a class used to transform Datasets into other Element types _conversion_interface = DataConversion @@ -196,8 +192,9 @@ def __init__(self, data, kdims=None, vdims=None, **kwargs): initialized = Interface.initialize(type(self), data, kdims, vdims, datatype=kwargs.get('datatype')) (data, self.interface, dims, extra_kws) = initialized + validate_vdims = kwargs.pop('_validate_vdims', True) super(Dataset, self).__init__(data, **dict(kwargs, **dict(dims, **extra_kws))) - self.interface.validate(self) + self.interface.validate(self, validate_vdims) self.redim = redim(self, mode='dataset') diff --git a/holoviews/core/data/array.py b/holoviews/core/data/array.py index d3a05155ae..ac3f9fdc2c 100644 --- a/holoviews/core/data/array.py +++ b/holoviews/core/data/array.py @@ -71,8 +71,8 @@ def init(cls, eltype, data, kdims, vdims): return data, {'kdims':kdims, 'vdims':vdims}, {} @classmethod - def validate(cls, dataset): - ndims = dataset.ndims if dataset._virtual_vdims else len(dataset.dimensions()) + def validate(cls, dataset, vdims=True): + ndims = len(dataset.dimensions()) if vdims else dataset.ndims ncols = dataset.data.shape[1] if dataset.data.ndim > 1 else 1 if ncols < ndims: raise DataError("Supplied data does not match specified " diff --git a/holoviews/core/data/dictionary.py b/holoviews/core/data/dictionary.py index 0a44ee5dc1..97d9f41c9d 100644 --- a/holoviews/core/data/dictionary.py +++ b/holoviews/core/data/dictionary.py @@ -100,11 +100,9 @@ def init(cls, eltype, data, kdims, vdims): @classmethod - def validate(cls, dataset): - if dataset._virtual_vdims: - dimensions = dataset.dimensions('key', label='name') - else: - dimensions = dataset.dimensions(label='name') + def validate(cls, dataset, vdims=True): + dim_types = 'key' if vdims else 'all' + dimensions = dataset.dimensions(dim_types, label='name') not_found = [d for d in dimensions if d not in dataset.data] if not_found: raise DataError('Following columns specified as dimensions ' diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py index c5e7f6ebbd..e57299554d 100644 --- a/holoviews/core/data/grid.py +++ b/holoviews/core/data/grid.py @@ -88,8 +88,8 @@ def isscalar(cls, dataset, dim): @classmethod - def validate(cls, dataset): - Interface.validate(dataset) + def validate(cls, dataset, vdims=True): + Interface.validate(dataset, vdims) @classmethod diff --git a/holoviews/core/data/image.py b/holoviews/core/data/image.py index a8a7f7dd16..3bc68922b3 100644 --- a/holoviews/core/data/image.py +++ b/holoviews/core/data/image.py @@ -65,7 +65,7 @@ def length(cls, dataset): @classmethod - def validate(cls, dataset): + def validate(cls, dataset, vdims): pass @classmethod diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index 4ffa8a91ce..742cee7683 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -201,11 +201,9 @@ def initialize(cls, eltype, data, kdims, vdims, datatype=None): @classmethod - def validate(cls, dataset): - if dataset._virtual_vdims: - dimensions = dataset.dimensions('key', label='name') - else: - dimensions = dataset.dimensions(label='name') + def validate(cls, dataset, vdims=True): + dim_types = 'key' if vdims else 'all' + dimensions = dataset.dimensions(dim_types, label='name') not_found = [d for d in dimensions if d not in dataset.data] if not_found: raise DataError("Supplied data does not contain specified " diff --git a/holoviews/core/data/iris.py b/holoviews/core/data/iris.py index d7e1cb7d41..caebdb26b4 100644 --- a/holoviews/core/data/iris.py +++ b/holoviews/core/data/iris.py @@ -127,8 +127,8 @@ def init(cls, eltype, data, kdims, vdims): @classmethod - def validate(cls, dataset): - if len(dataset.vdims) > 1: + def validate(cls, dataset, vdims=True): + if vdims and len(dataset.vdims) > 1: raise DataError("Iris cubes do not support more than one value dimension", cls) diff --git a/holoviews/core/data/multipath.py b/holoviews/core/data/multipath.py index e8f1d317e8..1223dd561e 100644 --- a/holoviews/core/data/multipath.py +++ b/holoviews/core/data/multipath.py @@ -51,13 +51,13 @@ def init(cls, eltype, data, kdims, vdims): return new_data, dims, {} @classmethod - def validate(cls, dataset): + def validate(cls, dataset, vdims=True): if not dataset.data: return ds = cls._inner_dataset_template(dataset) for d in dataset.data: ds.data = d - ds.interface.validate(ds) + ds.interface.validate(ds, vdims) @classmethod diff --git a/holoviews/core/data/pandas.py b/holoviews/core/data/pandas.py index fcf6e064ae..7fd9fa0aae 100644 --- a/holoviews/core/data/pandas.py +++ b/holoviews/core/data/pandas.py @@ -91,11 +91,9 @@ def isscalar(cls, dataset, dim): @classmethod - def validate(cls, dataset): - if dataset._virtual_vdims: - dimensions = dataset.dimensions('key', label='name') - else: - dimensions = dataset.dimensions(label='name') + def validate(cls, dataset, vdims=True): + dim_types = 'key' if vdims else 'all' + dimensions = dataset.dimensions(dim_types, label='name') not_found = [d for d in dimensions if d not in dataset.data.columns] if not_found: raise DataError("Supplied data does not contain specified " diff --git a/holoviews/element/stats.py b/holoviews/element/stats.py index 6102cb4a7e..77c8a711e4 100644 --- a/holoviews/element/stats.py +++ b/holoviews/element/stats.py @@ -13,19 +13,16 @@ class _StatisticsElement(Chart): of the value dimensions. """ - _virtual_vdims = True - def __init__(self, data, kdims=None, vdims=None, **params): + params['_validate_vdims'] = False super(_StatisticsElement, self).__init__(data, kdims, vdims, **params) if not self.vdims: self.vdims = [Dimension('Density')] def range(self, dim, data_range=True): - dim = self.get_dimension(dim) - if dim in self.vdims: - return Dimensioned.range(self, dim, data_range=True) - return super(_StatisticsElement, self).range(dim, data_range) + iskdim = self.get_dimension(dim) not in self.vdims + return super(_StatisticsElement, self).range(dim, data_range=iskdim) def dimension_values(self, dim, expanded=True, flat=True): diff --git a/holoviews/operation/stats.py b/holoviews/operation/stats.py index 6cc39ce8b3..3c14a07861 100644 --- a/holoviews/operation/stats.py +++ b/holoviews/operation/stats.py @@ -6,7 +6,7 @@ from ..core.operation import Operation from ..core.options import Compositor, Store, Options, StoreOptions from ..core.util import basestring, find_minmax, cartesian_product -from ..element import Curve, Area, Image, Polygons +from ..element import Curve, Area, Image, Polygons, Distribution, Bivariate from .element import contours @@ -47,13 +47,18 @@ def _process(self, element, key=None): except ImportError: raise ImportError('%s operation requires SciPy to be installed.' % type(self).__name__) - if self.p.dimension: - selected_dim = self.p.dimension - elif element._virtual_vdims: + params = {} + dim_template = Dimension + if isinstance(element, Distribution): selected_dim = element.kdims[0] + if element.group != type(element).__name__: + params['group'] = element.group + params['label'] = element.label + dim_template = element.vdims[0] + elif self.p.dimension: + selected_dim = self.p.dimension else: selected_dim = [d.name for d in element.vdims + element.kdims][0] - dim_template = element.vdims[0] if element._virtual_vdims else Dimension vdims = [dim_template('{}_density'.format(selected_dim), label='{} Density'.format(selected_dim))] @@ -127,9 +132,16 @@ def _process(self, element, key=None): positions = np.vstack([xx.ravel(), yy.ravel()]) f = np.reshape(kde(positions).T, xx.shape) - vdim = element.vdims[0] if element._virtual_vdims else 'Density' - img = Image((xs, ys, f.T), kdims=element.dimensions()[:2], vdims=[vdim]) + params = {} + if isinstance(element, Bivariate): + if element.group != type(element).__name__: + params['group'] = element.group + params['label'] = element.label + vdim = element.vdims[0] + else: + vdim = 'Density' + img = Image((xs, ys, f.T), kdims=element.dimensions()[:2], vdims=[vdim], **params) if self.p.contours: cntr = contours(img, filled=self.p.filled) - return cntr.clone(cntr.data[1:]) + return cntr.clone(cntr.data[1:], **params) return img From cb0ddb15ac953d0c4338fc50e1d6d63fdfc98e9f Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 30 Oct 2017 22:37:07 +0000 Subject: [PATCH 15/40] Apply compositor in renderer --- holoviews/core/options.py | 4 +++- holoviews/plotting/bokeh/plot.py | 3 --- holoviews/plotting/mpl/plot.py | 2 -- holoviews/plotting/plot.py | 1 - holoviews/plotting/renderer.py | 13 ++++++++----- 5 files changed, 11 insertions(+), 12 deletions(-) diff --git a/holoviews/core/options.py b/holoviews/core/options.py index 144f7d8f05..7279d83eac 100644 --- a/holoviews/core/options.py +++ b/holoviews/core/options.py @@ -810,7 +810,9 @@ def collapse_element(cls, overlay, key=None, ranges=None, mode='data'): """ Finds any applicable compositor and applies it. """ - from .overlay import Overlay + from .overlay import Overlay, CompositeOverlay + if not isinstance(overlay, CompositeOverlay): + overlay = Overlay([overlay]) while True: match = cls.strongest_match(overlay, mode) if match is None: return overlay diff --git a/holoviews/plotting/bokeh/plot.py b/holoviews/plotting/bokeh/plot.py index 91e6e21e74..4b0d693e35 100644 --- a/holoviews/plotting/bokeh/plot.py +++ b/holoviews/plotting/bokeh/plot.py @@ -327,9 +327,6 @@ def __init__(self, layout, ranges=None, layout_num=1, keys=None, **params): def _create_subplots(self, layout, ranges): - layout = layout.map(Compositor.collapse_element, [CompositeOverlay], - clone=False) - subplots = OrderedDict() frame_ranges = self.compute_ranges(layout, None, ranges) frame_ranges = OrderedDict([(key, self.compute_ranges(layout, key, frame_ranges)) diff --git a/holoviews/plotting/mpl/plot.py b/holoviews/plotting/mpl/plot.py index bdf9c441c6..d6fdadf99c 100644 --- a/holoviews/plotting/mpl/plot.py +++ b/holoviews/plotting/mpl/plot.py @@ -355,8 +355,6 @@ def _get_size(self): def _create_subplots(self, layout, axis, ranges, create_axes): - layout = layout.map(Compositor.collapse_element, [CompositeOverlay], - clone=False) norm_opts = self._traverse_options(layout, 'norm', ['axiswise'], [Element]) axiswise = all(norm_opts['axiswise']) if not ranges: diff --git a/holoviews/plotting/plot.py b/holoviews/plotting/plot.py index d6e873260d..7794eac7f0 100644 --- a/holoviews/plotting/plot.py +++ b/holoviews/plotting/plot.py @@ -797,7 +797,6 @@ def __init__(self, overlay, ranges=None, batched=True, keys=None, **params): batched=batched, **params) # Apply data collapse - self.hmap = Compositor.collapse(self.hmap, None, mode='data') self.hmap = self._apply_compositor(self.hmap, ranges, self.keys) self.subplots = self._create_subplots(ranges) self.traverse(lambda x: setattr(x, 'comm', self.comm)) diff --git a/holoviews/plotting/renderer.py b/holoviews/plotting/renderer.py index c59661117c..66653fbe89 100644 --- a/holoviews/plotting/renderer.py +++ b/holoviews/plotting/renderer.py @@ -10,9 +10,9 @@ import param from ..core.io import Exporter -from ..core.options import Store, StoreOptions, SkipRendering +from ..core.options import Store, StoreOptions, SkipRendering, Compositor from ..core.util import find_file, unicode, unbound_dimensions, basestring -from .. import Layout, HoloMap, AdjointLayout +from .. import Layout, HoloMap, AdjointLayout, Element, CompositeOverlay from .widgets import NdWidget, ScrubberWidget, SelectionWidget from . import Plot @@ -165,9 +165,12 @@ def get_plot(self_or_cls, obj, renderer=None): # Initialize DynamicMaps with first data item initialize_dynamic(obj) - if not isinstance(obj, Plot) and not displayable(obj): - obj = collate(obj) - initialize_dynamic(obj) + if not isinstance(obj, Plot): + if not displayable(obj): + obj = collate(obj) + initialize_dynamic(obj) + obj = obj.map(lambda obj: Compositor.collapse_element(obj, mode='data'), + [Element, CompositeOverlay]) if not renderer: renderer = self_or_cls.instance() if not isinstance(obj, Plot): From 88ae1429d47e300d180f2ad3adbdd33dac95756f Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 00:16:38 +0000 Subject: [PATCH 16/40] Compositor improvements --- holoviews/core/options.py | 31 ++++++++++++++++++------------ holoviews/ipython/display_hooks.py | 8 +++++--- holoviews/ipython/magics.py | 2 +- holoviews/plotting/__init__.py | 4 ++-- holoviews/plotting/renderer.py | 4 +++- holoviews/util/parser.py | 2 +- 6 files changed, 31 insertions(+), 20 deletions(-) diff --git a/holoviews/core/options.py b/holoviews/core/options.py index 7279d83eac..1b9d6ef13d 100644 --- a/holoviews/core/options.py +++ b/holoviews/core/options.py @@ -778,7 +778,7 @@ class Compositor(param.Parameterized): This pattern specification could then be associated with the RGB operation that returns a single RGB matrix for display.""") - group = param.String(doc=""" + group = param.String(allow_None=True, doc=""" The group identifier for the output of this particular compositor""") kwargs = param.Dict(doc=""" @@ -811,26 +811,32 @@ def collapse_element(cls, overlay, key=None, ranges=None, mode='data'): Finds any applicable compositor and applies it. """ from .overlay import Overlay, CompositeOverlay + unpack = False if not isinstance(overlay, CompositeOverlay): overlay = Overlay([overlay]) + unpack = True + while True: match = cls.strongest_match(overlay, mode) - if match is None: return overlay + if match is None: + if unpack and len(overlay) == 1: + return overlay.values()[0] + return overlay (_, applicable_op, (start, stop)) = match if isinstance(overlay, Overlay): values = overlay.values() - sliced = Overlay.from_values(values[start:stop]) - result = applicable_op.apply(sliced, ranges, key=key) - result = result.relabel(group=applicable_op.group) - overlay = Overlay.from_values(values[:start]+[result]+values[stop:]) - overlay.id = overlay.id + sliced = Overlay(values[start:stop]) else: values = overlay.items() sliced = overlay.clone(values[start:stop]) - result = applicable_op.apply(sliced, ranges, key=key) + result = applicable_op.apply(sliced, ranges, key=key) + if applicable_op.group: result = result.relabel(group=applicable_op.group) + if isinstance(overlay, Overlay): + result = [result] + else: result = list(zip(sliced.keys(), [result])) - overlay = overlay.clone(values[:start]+result+values[stop:]) + overlay = overlay.clone(values[:start]+result+values[stop:]) @classmethod @@ -849,11 +855,12 @@ def collapse(cls, holomap, ranges=None, mode='data'): clone[key] = cls.collapse_element(overlay, key, ranges, mode) return clone + @classmethod def register(cls, compositor): - defined_groups = [op.group for op in cls.definitions] - if compositor.group in defined_groups: - cls.definitions.pop(defined_groups.index(compositor.group)) + defined_patterns = [op.pattern for op in cls.definitions] + if compositor.group in defined_patterns: + cls.definitions.pop(defined_patterns.index(compositor.pattern)) cls.definitions.append(compositor) if compositor.operation not in cls.operations: cls.operations.append(compositor.operation) diff --git a/holoviews/ipython/display_hooks.py b/holoviews/ipython/display_hooks.py index ea676f798d..62a661009b 100644 --- a/holoviews/ipython/display_hooks.py +++ b/holoviews/ipython/display_hooks.py @@ -11,7 +11,8 @@ import holoviews from holoviews.plotting import Plot -from ..core.options import Store, StoreOptions, SkipRendering, AbbreviatedException +from ..core.options import (Store, StoreOptions, SkipRendering, + AbbreviatedException, Compositor) from ..core import (ViewableElement, UniformNdMapping, HoloMap, AdjointLayout, NdLayout, GridSpace, Layout, CompositeOverlay, DynamicMap) @@ -166,9 +167,10 @@ def element_display(element, max_frames): IPython.display.display(IPython.display.HTML(info)) return - backend = Store.current_backend - if type(element) not in Store.registry[backend]: + eltype = type(element) + if (eltype not in Store.registry[backend] and + all(eltype.__name__ != d.pattern for d in Compositor.definitions)): return None # Drop back to png if pdf selected, notebook PDF rendering is buggy diff --git a/holoviews/ipython/magics.py b/holoviews/ipython/magics.py index 48c712f348..13d58efc87 100644 --- a/holoviews/ipython/magics.py +++ b/holoviews/ipython/magics.py @@ -212,7 +212,7 @@ def option_completer(cls, k,v): line = v.text_until_cursor completions = cls.setup_completer() compositor_defs = {el.group:el.output_type.__name__ - for el in Compositor.definitions} + for el in Compositor.definitions if el.group} return cls.line_completer(line, completions, compositor_defs) @classmethod diff --git a/holoviews/plotting/__init__.py b/holoviews/plotting/__init__.py index 72724abf4f..0555d101e9 100644 --- a/holoviews/plotting/__init__.py +++ b/holoviews/plotting/__init__.py @@ -11,8 +11,8 @@ from .renderer import Renderer, HTML_TAGS # noqa (API import) from .util import univariate_composite, bivariate_composite -Compositor.register(Compositor("Distribution", univariate_composite, 'Area', 'data')) -Compositor.register(Compositor("Bivariate", bivariate_composite, 'Polygons', 'data')) +Compositor.register(Compositor("Distribution", univariate_composite, None, 'data')) +Compositor.register(Compositor("Bivariate", bivariate_composite, None, 'data')) def public(obj): if not isinstance(obj, type): return False diff --git a/holoviews/plotting/renderer.py b/holoviews/plotting/renderer.py index 66653fbe89..90d67b0930 100644 --- a/holoviews/plotting/renderer.py +++ b/holoviews/plotting/renderer.py @@ -170,7 +170,9 @@ def get_plot(self_or_cls, obj, renderer=None): obj = collate(obj) initialize_dynamic(obj) obj = obj.map(lambda obj: Compositor.collapse_element(obj, mode='data'), - [Element, CompositeOverlay]) + [CompositeOverlay]) + obj = obj.map(lambda obj: Compositor.collapse_element(obj, mode='data'), + [Element]) if not renderer: renderer = self_or_cls.instance() if not isinstance(obj, Plot): diff --git a/holoviews/util/parser.py b/holoviews/util/parser.py index fdb440fe40..bd2a864643 100644 --- a/holoviews/util/parser.py +++ b/holoviews/util/parser.py @@ -184,7 +184,7 @@ class OptsSpec(Parser): norm_options = (norm_options_short | norm_options_long) compositor_ops = pp.MatchFirst( - [pp.Literal(el.group) for el in Compositor.definitions]) + [pp.Literal(el.group) for el in Compositor.definitions if el.group]) dotted_path = pp.Combine( pp.Word(ascii_uppercase, exact=1) + pp.Word(pp.alphanums+'._')) From 39a35f3636bb13d3083850c831a5299d3d4025c9 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 00:17:27 +0000 Subject: [PATCH 17/40] Fixes for univariate_kde operation --- holoviews/operation/stats.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/holoviews/operation/stats.py b/holoviews/operation/stats.py index 3c14a07861..3331f2f441 100644 --- a/holoviews/operation/stats.py +++ b/holoviews/operation/stats.py @@ -63,7 +63,7 @@ def _process(self, element, key=None): label='{} Density'.format(selected_dim))] data = element.dimension_values(selected_dim) - bin_range = find_minmax((np.nanmin(data), np.nanmax(data)), (0, -float('inf')))\ + bin_range = find_minmax(element.range(selected_dim), (0, -float('inf')))\ if self.p.bin_range is None else self.p.bin_range xs = np.linspace(bin_range[0], bin_range[1], self.p.n_samples) @@ -80,7 +80,7 @@ def _process(self, element, key=None): label='{} Density'.format(selected_dim))] element_type = Area if self.p.filled else Curve - return Area((xs, ys), kdims=[selected_dim], vdims=vdims) + return element_type((xs, ys), kdims=[selected_dim], vdims=vdims, **params) From 84b2145a823f6231e3e35056257aa68d12f15457 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 00:19:51 +0000 Subject: [PATCH 18/40] Readded statistics plot stubs --- holoviews/core/data/image.py | 2 +- holoviews/plotting/bokeh/__init__.py | 7 ++++++- holoviews/plotting/bokeh/element.py | 1 - holoviews/plotting/bokeh/stats.py | 28 ++++++++++++++++++++++++++++ holoviews/plotting/mpl/__init__.py | 7 ++++++- holoviews/plotting/mpl/stats.py | 28 ++++++++++++++++++++++++++++ holoviews/plotting/util.py | 4 ++-- 7 files changed, 71 insertions(+), 6 deletions(-) create mode 100644 holoviews/plotting/bokeh/stats.py create mode 100644 holoviews/plotting/mpl/stats.py diff --git a/holoviews/core/data/image.py b/holoviews/core/data/image.py index 3bc68922b3..a3b90d44c6 100644 --- a/holoviews/core/data/image.py +++ b/holoviews/core/data/image.py @@ -65,7 +65,7 @@ def length(cls, dataset): @classmethod - def validate(cls, dataset, vdims): + def validate(cls, dataset, vdims=True): pass @classmethod diff --git a/holoviews/plotting/bokeh/__init__.py b/holoviews/plotting/bokeh/__init__.py index ae8f33fac9..a67547e2cf 100644 --- a/holoviews/plotting/bokeh/__init__.py +++ b/holoviews/plotting/bokeh/__init__.py @@ -38,6 +38,7 @@ from .plot import GridPlot, LayoutPlot, AdjointLayoutPlot from .raster import RasterPlot, RGBPlot, HeatMapPlot, HSVPlot, QuadMeshPlot from .renderer import BokehRenderer +from .stats import DistributionPlot, BivariatePlot from .tabular import TablePlot from .util import bokeh_version @@ -99,7 +100,11 @@ # Tabular Table: TablePlot, - ItemTable: TablePlot} + ItemTable: TablePlot, + + # Statistics + Distribution: DistributionPlot, + Bivariate: BivariatePlot} if DFrame is not None: associations[DFrame] = TablePlot diff --git a/holoviews/plotting/bokeh/element.py b/holoviews/plotting/bokeh/element.py index c55e8a4943..3bfadc135c 100644 --- a/holoviews/plotting/bokeh/element.py +++ b/holoviews/plotting/bokeh/element.py @@ -755,7 +755,6 @@ def initialize_plot(self, ranges=None, plot=None, plots=None, source=None): self.current_ranges = ranges self.current_frame = element self.current_key = key - style_element = element.last if self.batched else element ranges = util.match_spec(style_element, ranges) diff --git a/holoviews/plotting/bokeh/stats.py b/holoviews/plotting/bokeh/stats.py new file mode 100644 index 0000000000..d30ac1a53a --- /dev/null +++ b/holoviews/plotting/bokeh/stats.py @@ -0,0 +1,28 @@ +import param + +from .chart import AreaPlot +from .path import PolygonPlot + + +class DistributionPlot(AreaPlot): + """ + DistributionPlot visualizes a distribution of values as a KDE. + """ + + bandwidth = param.Number(default=None, doc=""" + The bandwidth of the kernel for the density estimate.""") + + +class BivariatePlot(PolygonPlot): + """ + Bivariate plot visualizes two-dimensional kernel density + estimates. Additionally, by enabling the joint option, the + marginals distributions can be plotted alongside each axis (does + not animate or compose). + """ + + bandwidth = param.Number(default=None, doc=""" + The bandwidth of the kernel for the density estimate.""") + + filled = param.Boolean(default=False, doc=""" + Whether the bivariate contours should be filled.""") diff --git a/holoviews/plotting/mpl/__init__.py b/holoviews/plotting/mpl/__init__.py index a9e715b5e1..8337b4fdcc 100644 --- a/holoviews/plotting/mpl/__init__.py +++ b/holoviews/plotting/mpl/__init__.py @@ -16,6 +16,7 @@ from .path import * # noqa (API import) from .plot import * # noqa (API import) from .raster import * # noqa (API import) +from .stats import * # noqa (API import) from .tabular import * # noqa (API import) from .renderer import MPLRenderer @@ -163,7 +164,11 @@ def grid_selector(grid): Box: PathPlot, Bounds: PathPlot, Ellipse: PathPlot, - Polygons: PolygonPlot}, 'matplotlib', style_aliases=style_aliases) + Polygons: PolygonPlot, + + # Statistics elements + Distribution: DistributionPlot, + Bivariate: BivariatePlot}, 'matplotlib', style_aliases=style_aliases) MPLPlot.sideplots.update({Histogram: SideHistogramPlot, diff --git a/holoviews/plotting/mpl/stats.py b/holoviews/plotting/mpl/stats.py new file mode 100644 index 0000000000..d30ac1a53a --- /dev/null +++ b/holoviews/plotting/mpl/stats.py @@ -0,0 +1,28 @@ +import param + +from .chart import AreaPlot +from .path import PolygonPlot + + +class DistributionPlot(AreaPlot): + """ + DistributionPlot visualizes a distribution of values as a KDE. + """ + + bandwidth = param.Number(default=None, doc=""" + The bandwidth of the kernel for the density estimate.""") + + +class BivariatePlot(PolygonPlot): + """ + Bivariate plot visualizes two-dimensional kernel density + estimates. Additionally, by enabling the joint option, the + marginals distributions can be plotted alongside each axis (does + not animate or compose). + """ + + bandwidth = param.Number(default=None, doc=""" + The bandwidth of the kernel for the density estimate.""") + + filled = param.Boolean(default=False, doc=""" + Whether the bivariate contours should be filled.""") diff --git a/holoviews/plotting/util.py b/holoviews/plotting/util.py index 5c26eb5a1b..365c4d55b3 100644 --- a/holoviews/plotting/util.py +++ b/holoviews/plotting/util.py @@ -459,7 +459,7 @@ class univariate_composite(Operation): def _process(self, element, key=None): plot_opts = Store.lookup_options(Store.current_backend, element, 'plot').kwargs - bw = plot_opts.pop('bw', univariate_kde.bandwidth) + bw = plot_opts.pop('bandwidth', univariate_kde.bandwidth) transformed = univariate_kde(element, bandwidth=bw) Store.transfer_options(element, transformed, ['bw']) return transformed @@ -471,7 +471,7 @@ class bivariate_composite(Operation): def _process(self, element, key=None): plot_opts = Store.lookup_options(Store.current_backend, element, 'plot').kwargs - bw = plot_opts.pop('bw', bivariate_kde.bandwidth) + bw = plot_opts.pop('bandwidth', bivariate_kde.bandwidth) filled = plot_opts.pop('filled', bivariate_kde.filled) transformed = bivariate_kde(element, bandwidth=bw, filled=filled) Store.transfer_options(element, transformed, ['bw', 'filled']) From b9546ac11424f98ac5216b483f3d3d5be8163a45 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 01:13:21 +0000 Subject: [PATCH 19/40] Fix for Compositor tests --- holoviews/core/options.py | 2 +- tests/testmagics.py | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/holoviews/core/options.py b/holoviews/core/options.py index 1b9d6ef13d..94636186e9 100644 --- a/holoviews/core/options.py +++ b/holoviews/core/options.py @@ -859,7 +859,7 @@ def collapse(cls, holomap, ranges=None, mode='data'): @classmethod def register(cls, compositor): defined_patterns = [op.pattern for op in cls.definitions] - if compositor.group in defined_patterns: + if compositor.pattern in defined_patterns: cls.definitions.pop(defined_patterns.index(compositor.pattern)) cls.definitions.append(compositor) if compositor.operation not in cls.operations: diff --git a/tests/testmagics.py b/tests/testmagics.py index 11d4374733..5d16b479b4 100644 --- a/tests/testmagics.py +++ b/tests/testmagics.py @@ -171,15 +171,18 @@ def test_display_compositor_definition(self): definition = " display factory(Image * Image * Image) RGBTEST" self.line_magic('compositor', definition) - assert len(Compositor.definitions) == 1, "Compositor definition not created" - self.assertEqual(Compositor.definitions[0].group, 'RGBTEST') - self.assertEqual(Compositor.definitions[0].mode, 'display') + compositors = [c for c in Compositor.definitions if c.group=='RGBTEST'] + self.assertEqual(len(compositors), 1) + self.assertEqual(compositors[0].group, 'RGBTEST') + self.assertEqual(compositors[0].mode, 'display') def test_data_compositor_definition(self): definition = " data transform(Image * Image) HCSTEST" self.line_magic('compositor', definition) - assert len(Compositor.definitions) == 1, "Compositor definition not created" - self.assertEqual(Compositor.definitions[0].group, 'HCSTEST') - self.assertEqual(Compositor.definitions[0].mode, 'data') + + compositors = [c for c in Compositor.definitions if c.group=='HCSTEST'] + self.assertEqual(len(compositors), 1) + self.assertEqual(compositors[0].group, 'HCSTEST') + self.assertEqual(compositors[0].mode, 'data') From 1317c72554971657ffd2fc91dd9cf61859923e33 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 01:13:41 +0000 Subject: [PATCH 20/40] Allow Empty in AdjointLayout --- holoviews/core/layout.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/core/layout.py b/holoviews/core/layout.py index ed3d3d388c..a68f3ace94 100644 --- a/holoviews/core/layout.py +++ b/holoviews/core/layout.py @@ -210,7 +210,7 @@ def __getitem__(self, key): def __setitem__(self, key, value): if key in ['main', 'right', 'top']: - if isinstance(value, (ViewableElement, UniformNdMapping)): + if isinstance(value, (ViewableElement, UniformNdMapping, Empty)): self.data[key] = value else: raise ValueError('AdjointLayout only accepts Element types.') From 2ba2781b6619e12601b830f1aa0cbf1e93c81f2b Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 02:02:49 +0000 Subject: [PATCH 21/40] Improved Dimension handling for StatisticalElement --- holoviews/core/data/__init__.py | 15 ++----------- holoviews/core/dimension.py | 31 ++++++++++++++++---------- holoviews/element/stats.py | 39 +++++++++++++++++---------------- holoviews/operation/stats.py | 3 --- 4 files changed, 42 insertions(+), 46 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 6eb9135d52..82d4760e53 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -53,7 +53,7 @@ except ImportError: pass -from ..dimension import Dimension +from ..dimension import Dimension, process_dimensions from ..element import Element from ..ndmapping import OrderedDict from ..spaces import HoloMap, DynamicMap @@ -175,18 +175,7 @@ def __init__(self, data, kdims=None, vdims=None, **kwargs): pvals = util.get_param_values(data) kwargs.update([(l, pvals[l]) for l in ['group', 'label'] if l in pvals and l not in kwargs]) - - for group, dims in [('kdims', kdims), ('vdims', vdims)]: - if dims is None: - continue - elif isinstance(dims, (tuple, basestring, Dimension)): - dims = [dims] - elif not isinstance(dims, list): - raise ValueError("%s must be a Dimension or list of dimensions, " - "specified as tuples, string or Dimension instances, " - "not %s." % (group, dims)) - kwargs[group] = [d if isinstance(d, Dimension) else Dimension(d) - for d in dims] + kwargs.update(process_dimensions(kdims, vdims)) kdims, vdims = kwargs.get('kdims'), kwargs.get('vdims') initialized = Interface.initialize(type(self), data, kdims, vdims, diff --git a/holoviews/core/dimension.py b/holoviews/core/dimension.py index c1e38550d2..b5650681c7 100644 --- a/holoviews/core/dimension.py +++ b/holoviews/core/dimension.py @@ -41,6 +41,25 @@ def param_aliases(d): return d +def process_dimensions(kdims, vdims): + """ + Processes kdims and vdims specifications into a dictionary + of dimensions which can be passed to params. + """ + dimensions = {} + for group, dims in [('kdims', kdims), ('vdims', vdims)]: + if dims is None: + continue + elif isinstance(dims, (tuple, basestring, Dimension)): + dims = [dims] + elif not isinstance(dims, list): + raise ValueError("%s must be a Dimension or list of dimensions, " + "specified as tuples, string or Dimension instances, " + "not %s." % (group, dims)) + dimensions[group] = [d if isinstance(d, Dimension) else Dimension(d) for d in dims] + return dimensions + + class redim(object): """ Utility that supports re-dimensioning any HoloViews object via the @@ -787,17 +806,7 @@ class to be associated with dimensions. The contents associated constant_dimensions='cdims', deep_dimensions='ddims') def __init__(self, data, kdims=None, vdims=None, **params): - for group, dims in [('kdims', kdims), ('vdims', vdims)]: - if dims is None: - continue - elif isinstance(dims, (tuple, basestring, Dimension)): - dims = [dims] - elif not isinstance(dims, list): - raise ValueError("%s must be a Dimension or list of dimensions, " - "specified as tuples, string or Dimension instances, " - "not %s." % (group, dims)) - params[group] = [d if isinstance(d, Dimension) else Dimension(d) - for d in dims] + params.update(process_dimensions(kdims, vdims)) if 'cdims' in params: params['cdims'] = {d if isinstance(d, Dimension) else Dimension(d): val for d, val in params['cdims'].items()} diff --git a/holoviews/element/stats.py b/holoviews/element/stats.py index 77c8a711e4..118f5ec859 100644 --- a/holoviews/element/stats.py +++ b/holoviews/element/stats.py @@ -1,7 +1,9 @@ import param import numpy as np -from ..core.dimension import Dimension, Dimensioned +from ..core.dimension import Dimension, process_dimensions +from ..core.element import Element +from ..core.util import get_param_values from .chart import Chart, Scatter @@ -14,10 +16,19 @@ class _StatisticsElement(Chart): """ def __init__(self, data, kdims=None, vdims=None, **params): - params['_validate_vdims'] = False - super(_StatisticsElement, self).__init__(data, kdims, vdims, **params) - if not self.vdims: + if isinstance(data, Element): + params.update(get_param_values(data)) + kdims = kdims or data.dimensions()[:len(self.kdims)] + data = tuple(data.dimension_values(d) for d in kdims) + params.update(dict(kdims=kdims, vdims=[], _validate_vdims=False)) + super(_StatisticsElement, self).__init__(data, **params) + if not vdims: self.vdims = [Dimension('Density')] + elif len(vdims) > 1: + raise ValueError("%s expects at most one vdim." % + type(self).__name__) + else: + self.vdims = process_dimensions(None, vdims)['vdims'] def range(self, dim, data_range=True): @@ -84,7 +95,8 @@ class Bivariate(_StatisticsElement): and y-data. """ - kdims = param.List(default=[Dimension('x'), Dimension('y')]) + kdims = param.List(default=[Dimension('x'), Dimension('y')], + bounds=(2, 2)) vdims = param.List(default=[Dimension('Density')], bounds=(1,1)) @@ -100,23 +112,12 @@ class Distribution(_StatisticsElement): list. Internally it uses Seaborn to make all the conversions. """ - kdims = param.List(default=[Dimension('Value')]) + kdims = param.List(default=[Dimension('Value')], bounds=(1, 1)) group = param.String(default='Distribution', constant=True) - vdims = param.List(default=[Dimension('Density')]) + vdims = param.List(default=[Dimension('Density')], bounds=(0, 1)) + # Ensure Interface does not add an index _auto_indexable_1d = False - -class Regression(Scatter): - """ - Regression is identical to a Scatter plot but is visualized - using the Seaborn regplot interface. This allows it to - implement linear regressions, confidence intervals and a lot - more. - """ - - group = param.String(default='Regression', constant=True) - - diff --git a/holoviews/operation/stats.py b/holoviews/operation/stats.py index 3331f2f441..461cb8a065 100644 --- a/holoviews/operation/stats.py +++ b/holoviews/operation/stats.py @@ -76,9 +76,6 @@ def _process(self, element, key=None): else: ys = np.full_like(xs, 0) - vdims = [Dimension('{}_density'.format(selected_dim), - label='{} Density'.format(selected_dim))] - element_type = Area if self.p.filled else Curve return element_type((xs, ys), kdims=[selected_dim], vdims=vdims, **params) From f153c67af569867c37e08e704b0efcdbcaf0ce98 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 02:03:30 +0000 Subject: [PATCH 22/40] Small stylistic fixes for Distribution --- holoviews/operation/stats.py | 2 +- holoviews/plotting/mpl/__init__.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/holoviews/operation/stats.py b/holoviews/operation/stats.py index 461cb8a065..a33aaf4191 100644 --- a/holoviews/operation/stats.py +++ b/holoviews/operation/stats.py @@ -25,7 +25,7 @@ class univariate_kde(Operation): dimension = param.String(default=None, doc=""" Along which dimension of the Element to compute the KDE.""") - filled = param.Boolean(default=False, doc=""" + filled = param.Boolean(default=True, doc=""" Controls whether to return filled or unfilled KDE.""") n_samples = param.Integer(default=100, doc=""" diff --git a/holoviews/plotting/mpl/__init__.py b/holoviews/plotting/mpl/__init__.py index 8337b4fdcc..9de740398a 100644 --- a/holoviews/plotting/mpl/__init__.py +++ b/holoviews/plotting/mpl/__init__.py @@ -265,3 +265,7 @@ def grid_selector(grid): options.Nodes = Options('style', edgecolors='black', facecolors=Cycle(), marker='o', s=20**2) options.EdgePaths = Options('style', color='black') + +# Statistics +options.Distribution = Options('style', facecolor=Cycle(), edgecolor='black', + alpha=0.5) From 97a343783f6661bb520763aa84f2bdf6bd3e837f Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 02:05:28 +0000 Subject: [PATCH 23/40] Small fix for pandas statistical element handling --- holoviews/core/data/pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/core/data/pandas.py b/holoviews/core/data/pandas.py index 7fd9fa0aae..f86f3b4019 100644 --- a/holoviews/core/data/pandas.py +++ b/holoviews/core/data/pandas.py @@ -41,7 +41,7 @@ def init(cls, eltype, data, kdims, vdims): vdims = [c for c in data.columns if c not in kdims] elif vdims and kdims is None: kdims = [c for c in data.columns if c not in vdims][:ndim] - elif kdims is None and vdims is None: + elif kdims is None and (vdims is None or vdims == []): kdims = list(data.columns[:ndim]) vdims = [] if ndim is None else list(data.columns[ndim:]) if any(isinstance(d, (np.int64, int)) for d in kdims+vdims): From 105c64cc382a1d3f3484c307b7e2e5816de03234 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 02:25:03 +0000 Subject: [PATCH 24/40] Removed Regression from comparisons --- holoviews/element/comparison.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/holoviews/element/comparison.py b/holoviews/element/comparison.py index 4ed798ed40..a8c622ac00 100644 --- a/holoviews/element/comparison.py +++ b/holoviews/element/comparison.py @@ -180,7 +180,6 @@ def register(cls): # Seaborn Views cls.equality_type_funcs[Bivariate] = cls.compare_bivariate cls.equality_type_funcs[Distribution] = cls.compare_distribution - cls.equality_type_funcs[Regression] = cls.compare_regression # NdMappings cls.equality_type_funcs[NdLayout] = cls.compare_gridlayout @@ -672,10 +671,6 @@ def compare_distribution(cls, el1, el2, msg='Distribution'): def compare_bivariate(cls, el1, el2, msg='Bivariate'): cls.compare_dataset(el1, el2, msg) - @classmethod - def compare_regression(cls, el1, el2, msg='Regression'): - cls.compare_dataset(el1, el2, msg) - #=======# # Grids # #=======# From e8667deaf69aab970eaa77aa7a5bf6e2d3aad14d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 02:27:01 +0000 Subject: [PATCH 25/40] Improved statistics compositor definitions --- holoviews/plotting/__init__.py | 6 +++--- holoviews/plotting/util.py | 17 ++++++++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/holoviews/plotting/__init__.py b/holoviews/plotting/__init__.py index 0555d101e9..d0c64abc02 100644 --- a/holoviews/plotting/__init__.py +++ b/holoviews/plotting/__init__.py @@ -9,10 +9,10 @@ from ..core.options import Cycle, Compositor from .plot import Plot from .renderer import Renderer, HTML_TAGS # noqa (API import) -from .util import univariate_composite, bivariate_composite +from .util import univariate_compositor, bivariate_compositor -Compositor.register(Compositor("Distribution", univariate_composite, None, 'data')) -Compositor.register(Compositor("Bivariate", bivariate_composite, None, 'data')) +Compositor.register(Compositor("Distribution", univariate_compositor, None, 'data')) +Compositor.register(Compositor("Bivariate", bivariate_compositor, None, 'data')) def public(obj): if not isinstance(obj, type): return False diff --git a/holoviews/plotting/util.py b/holoviews/plotting/util.py index 365c4d55b3..7379271cf0 100644 --- a/holoviews/plotting/util.py +++ b/holoviews/plotting/util.py @@ -453,24 +453,31 @@ def get_min_distance(element): return 0 -class univariate_composite(Operation): +class univariate_compositor(Operation): output_type = Area def _process(self, element, key=None): - plot_opts = Store.lookup_options(Store.current_backend, element, 'plot').kwargs + backend = Store.current_backend + if self.output_type not in Store.registry[backend]: + return element + plot_opts = Store.lookup_options(backend, element, 'plot').kwargs bw = plot_opts.pop('bandwidth', univariate_kde.bandwidth) - transformed = univariate_kde(element, bandwidth=bw) + filled = plot_opts.pop('filled', univariate_kde.filled) + transformed = univariate_kde(element, bandwidth=bw, filled=filled) Store.transfer_options(element, transformed, ['bw']) return transformed -class bivariate_composite(Operation): +class bivariate_compositor(Operation): output_type = Polygons def _process(self, element, key=None): - plot_opts = Store.lookup_options(Store.current_backend, element, 'plot').kwargs + backend = Store.current_backend + if self.output_type not in Store.registry[backend]: + return element + plot_opts = Store.lookup_options(backend, element, 'plot').kwargs bw = plot_opts.pop('bandwidth', bivariate_kde.bandwidth) filled = plot_opts.pop('filled', bivariate_kde.filled) transformed = bivariate_kde(element, bandwidth=bw, filled=filled) From af6e84a48b460be7338088d721e0476e5c180d4d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 02:27:29 +0000 Subject: [PATCH 26/40] Guard against infinite Compositor recursion --- holoviews/core/options.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/holoviews/core/options.py b/holoviews/core/options.py index 94636186e9..bb0ce09510 100644 --- a/holoviews/core/options.py +++ b/holoviews/core/options.py @@ -816,6 +816,7 @@ def collapse_element(cls, overlay, key=None, ranges=None, mode='data'): overlay = Overlay([overlay]) unpack = True + prev_ids = tuple() while True: match = cls.strongest_match(overlay, mode) if match is None: @@ -838,6 +839,13 @@ def collapse_element(cls, overlay, key=None, ranges=None, mode='data'): result = list(zip(sliced.keys(), [result])) overlay = overlay.clone(values[:start]+result+values[stop:]) + # Guard against infinite recursion for no-ops + spec_fn = lambda x: not isinstance(x, CompositeOverlay) + new_ids = tuple(overlay.traverse(lambda x: id(x), [spec_fn])) + if new_ids == prev_ids: + return overlay + prev_ids = new_ids + @classmethod def collapse(cls, holomap, ranges=None, mode='data'): From dd2b84ec57ccc6f2c1b41b05df600d26ac9938f1 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 03:16:20 +0000 Subject: [PATCH 27/40] Added transfer_options support to Compositor --- holoviews/core/options.py | 46 ++++++++++++++++++------------- holoviews/plotting/__init__.py | 11 ++++++-- holoviews/plotting/plotly/plot.py | 4 --- holoviews/plotting/util.py | 32 --------------------- 4 files changed, 35 insertions(+), 58 deletions(-) diff --git a/holoviews/core/options.py b/holoviews/core/options.py index bb0ce09510..b5c7fc42b0 100644 --- a/holoviews/core/options.py +++ b/holoviews/core/options.py @@ -784,11 +784,12 @@ class Compositor(param.Parameterized): kwargs = param.Dict(doc=""" Optional set of parameters to pass to the operation.""") + transfer_options = param.Boolean(default=False, doc=""" + Whether to transfer the options from the input to the output.""") operations = [] # The operations that can be used to define compositors. definitions = [] # The set of all the compositor instances - @classmethod def strongest_match(cls, overlay, mode): """ @@ -806,7 +807,7 @@ def strongest_match(cls, overlay, mode): @classmethod - def collapse_element(cls, overlay, key=None, ranges=None, mode='data'): + def collapse_element(cls, overlay, ranges=None, mode='data', backend=None): """ Finds any applicable compositor and applies it. """ @@ -830,7 +831,7 @@ def collapse_element(cls, overlay, key=None, ranges=None, mode='data'): else: values = overlay.items() sliced = overlay.clone(values[start:stop]) - result = applicable_op.apply(sliced, ranges, key=key) + result = applicable_op.apply(sliced, ranges, backend) if applicable_op.group: result = result.relabel(group=applicable_op.group) if isinstance(overlay, Overlay): @@ -860,7 +861,7 @@ def collapse(cls, holomap, ranges=None, mode='data'): clone = holomap.clone(shared_data=False) data = zip(ranges[1], holomap.data.values()) if ranges else holomap.data.items() for key, overlay in data: - clone[key] = cls.collapse_element(overlay, key, ranges, mode) + clone[key] = cls.collapse_element(overlay, ranges, mode) return clone @@ -874,7 +875,8 @@ def register(cls, compositor): cls.operations.append(compositor.operation) - def __init__(self, pattern, operation, group, mode, **kwargs): + def __init__(self, pattern, operation, group, mode, transfer_options=False, + output_type=None, **kwargs): self._pattern_spec, labels = [], [] for path in pattern.split('*'): @@ -891,11 +893,13 @@ def __init__(self, pattern, operation, group, mode, **kwargs): else: self.label = '' + self._output_type = output_type super(Compositor, self).__init__(group=group, pattern=pattern, operation=operation, mode=mode, - kwargs=kwargs) + kwargs=kwargs, + transfer_options=transfer_options) @property @@ -904,10 +908,7 @@ def output_type(self): Returns the operation output_type unless explicitly overridden in the kwargs. """ - if 'output_type' in self.kwargs: - return self.kwargs['output_type'] - else: - return self.operation.output_type + return self._output_type or self.operation.output_type def _slice_match_level(self, overlay_items): @@ -960,17 +961,24 @@ def match_level(self, overlay): return (best_lvl, match_slice) if best_lvl != 0 else None - def apply(self, value, input_ranges, key=None): + def apply(self, value, input_ranges, backend=None): """ Apply the compositor on the input with the given input ranges. """ from .overlay import CompositeOverlay + if backend is None: backend = Store.current_backend + kwargs = {k: v for k, v in self.kwargs.items() if k != 'output_type'} if isinstance(value, CompositeOverlay) and len(value) == 1: value = value.values()[0] - if key is None: - return self.operation(value, input_ranges=input_ranges, **self.kwargs) - return self.operation.instance(input_ranges=input_ranges, **self.kwargs).process_element(value, key) + if self.transfer_options: + plot_opts = Store.lookup_options(backend, value, 'plot').kwargs + kwargs.update({k: v for k, v in plot_opts.items() + if k in self.operation.params()}) + transformed = self.operation(value, input_ranges=input_ranges, **kwargs) + if self.transfer_options: + Store.transfer_options(value, transformed, backend) + return transformed class Store(object): @@ -1131,11 +1139,12 @@ def lookup(cls, backend, obj): @classmethod - def transfer_options(cls, obj, new_obj, drop=[]): + def transfer_options(cls, obj, new_obj, backend=None): """ Transfers options for all backends from one object to another. Drops any options defined in the supplied drop list. """ + backend = cls.current_backend if backend is None else backend type_name = type(new_obj).__name__ group = type_name if obj.group == type(obj).__name__ else obj.group spec = '.'.join([s for s in (type_name, group, obj.label) if s]) @@ -1143,10 +1152,9 @@ def transfer_options(cls, obj, new_obj, drop=[]): options = [] for group in ['plot', 'style', 'norm']: opts = cls.lookup_options(backend, obj, group).kwargs - opts = {k: v for k, v in opts.items() if k not in drop} - if opts: - options.append(Options(group, **opts)) - StoreOptions.set_options(new_obj, {spec: options}, backend) + if opts: options.append(Options(group, **opts)) + if options: + StoreOptions.set_options(new_obj, {spec: options}, backend) @classmethod diff --git a/holoviews/plotting/__init__.py b/holoviews/plotting/__init__.py index d0c64abc02..8d9a34041e 100644 --- a/holoviews/plotting/__init__.py +++ b/holoviews/plotting/__init__.py @@ -7,12 +7,17 @@ """ from ..core.options import Cycle, Compositor +from ..element import Area, Polygons from .plot import Plot from .renderer import Renderer, HTML_TAGS # noqa (API import) -from .util import univariate_compositor, bivariate_compositor +from ..operation.stats import univariate_kde, bivariate_kde -Compositor.register(Compositor("Distribution", univariate_compositor, None, 'data')) -Compositor.register(Compositor("Bivariate", bivariate_compositor, None, 'data')) +Compositor.register(Compositor("Distribution", univariate_kde, None, + 'data', transfer_options=True, + output_type=Area)) +Compositor.register(Compositor("Bivariate", bivariate_kde, None, + 'data', transfer_options=True, + output_type=Polygons)) def public(obj): if not isinstance(obj, type): return False diff --git a/holoviews/plotting/plotly/plot.py b/holoviews/plotting/plotly/plot.py index 1270bc227c..d2dabe3387 100644 --- a/holoviews/plotting/plotly/plot.py +++ b/holoviews/plotting/plotly/plot.py @@ -288,11 +288,7 @@ def __init__(self, layout, ranges=None, layout_num=1, **params): self.cols, self.rows = layout.shape self.subplots, self.layout = self._create_subplots(layout, ranges) - def _create_subplots(self, layout, ranges): - layout = layout.map(Compositor.collapse_element, [CompositeOverlay], - clone=False) - subplots = OrderedDict() frame_ranges = self.compute_ranges(layout, None, ranges) frame_ranges = OrderedDict([(key, self.compute_ranges(layout, key, frame_ranges)) diff --git a/holoviews/plotting/util.py b/holoviews/plotting/util.py index 7379271cf0..d194a1e57b 100644 --- a/holoviews/plotting/util.py +++ b/holoviews/plotting/util.py @@ -453,38 +453,6 @@ def get_min_distance(element): return 0 -class univariate_compositor(Operation): - - output_type = Area - - def _process(self, element, key=None): - backend = Store.current_backend - if self.output_type not in Store.registry[backend]: - return element - plot_opts = Store.lookup_options(backend, element, 'plot').kwargs - bw = plot_opts.pop('bandwidth', univariate_kde.bandwidth) - filled = plot_opts.pop('filled', univariate_kde.filled) - transformed = univariate_kde(element, bandwidth=bw, filled=filled) - Store.transfer_options(element, transformed, ['bw']) - return transformed - - -class bivariate_compositor(Operation): - - output_type = Polygons - - def _process(self, element, key=None): - backend = Store.current_backend - if self.output_type not in Store.registry[backend]: - return element - plot_opts = Store.lookup_options(backend, element, 'plot').kwargs - bw = plot_opts.pop('bandwidth', bivariate_kde.bandwidth) - filled = plot_opts.pop('filled', bivariate_kde.filled) - transformed = bivariate_kde(element, bandwidth=bw, filled=filled) - Store.transfer_options(element, transformed, ['bw', 'filled']) - return transformed - - def rgb2hex(rgb): """ Convert RGB(A) tuple to hex. From 59644e6b72aa0cf11f681c6505f38d4f14facef3 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 03:19:59 +0000 Subject: [PATCH 28/40] Only apply Compositors if necessary --- holoviews/plotting/renderer.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/holoviews/plotting/renderer.py b/holoviews/plotting/renderer.py index 90d67b0930..ae0002f812 100644 --- a/holoviews/plotting/renderer.py +++ b/holoviews/plotting/renderer.py @@ -169,10 +169,12 @@ def get_plot(self_or_cls, obj, renderer=None): if not displayable(obj): obj = collate(obj) initialize_dynamic(obj) - obj = obj.map(lambda obj: Compositor.collapse_element(obj, mode='data'), - [CompositeOverlay]) - obj = obj.map(lambda obj: Compositor.collapse_element(obj, mode='data'), - [Element]) + if any(len(c._pattern_spec) > 1 for c in Compositor.definitions): + obj = obj.map(lambda obj: Compositor.collapse_element(obj, mode='data'), + [CompositeOverlay]) + if any(len(c._pattern_spec) == 1 for c in Compositor.definitions): + obj = obj.map(lambda obj: Compositor.collapse_element(obj, mode='data'), + [Element]) if not renderer: renderer = self_or_cls.instance() if not isinstance(obj, Plot): From 6ce24a818f672c3d1c05a51d1b73a26131ce24c7 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 03:44:13 +0000 Subject: [PATCH 29/40] Various small fixes --- holoviews/core/options.py | 9 ++++----- holoviews/element/stats.py | 2 +- holoviews/plotting/renderer.py | 7 +++++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/holoviews/core/options.py b/holoviews/core/options.py index b5c7fc42b0..a434f2d606 100644 --- a/holoviews/core/options.py +++ b/holoviews/core/options.py @@ -1148,11 +1148,10 @@ def transfer_options(cls, obj, new_obj, backend=None): type_name = type(new_obj).__name__ group = type_name if obj.group == type(obj).__name__ else obj.group spec = '.'.join([s for s in (type_name, group, obj.label) if s]) - for backend in cls.renderers: - options = [] - for group in ['plot', 'style', 'norm']: - opts = cls.lookup_options(backend, obj, group).kwargs - if opts: options.append(Options(group, **opts)) + options = [] + for group in ['plot', 'style', 'norm']: + opts = cls.lookup_options(backend, obj, group).kwargs + if opts: options.append(Options(group, **opts)) if options: StoreOptions.set_options(new_obj, {spec: options}, backend) diff --git a/holoviews/element/stats.py b/holoviews/element/stats.py index 118f5ec859..cf82efffdf 100644 --- a/holoviews/element/stats.py +++ b/holoviews/element/stats.py @@ -98,7 +98,7 @@ class Bivariate(_StatisticsElement): kdims = param.List(default=[Dimension('x'), Dimension('y')], bounds=(2, 2)) - vdims = param.List(default=[Dimension('Density')], bounds=(1,1)) + vdims = param.List(default=[Dimension('Density')], bounds=(0,1)) group = param.String(default="Bivariate", constant=True) diff --git a/holoviews/plotting/renderer.py b/holoviews/plotting/renderer.py index ae0002f812..38d8dc9b22 100644 --- a/holoviews/plotting/renderer.py +++ b/holoviews/plotting/renderer.py @@ -164,16 +164,19 @@ def get_plot(self_or_cls, obj, renderer=None): """ # Initialize DynamicMaps with first data item initialize_dynamic(obj) + backend = self_or_cls.backend if not isinstance(obj, Plot): if not displayable(obj): obj = collate(obj) initialize_dynamic(obj) if any(len(c._pattern_spec) > 1 for c in Compositor.definitions): - obj = obj.map(lambda obj: Compositor.collapse_element(obj, mode='data'), + obj = obj.map(lambda obj: Compositor.collapse_element(obj, mode='data', + backend=backend), [CompositeOverlay]) if any(len(c._pattern_spec) == 1 for c in Compositor.definitions): - obj = obj.map(lambda obj: Compositor.collapse_element(obj, mode='data'), + obj = obj.map(lambda obj: Compositor.collapse_element(obj, mode='data', + backend=backend), [Element]) if not renderer: renderer = self_or_cls.instance() From 384f5735121c1f10a2a8f66f94f53918d73a75c9 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 03:45:22 +0000 Subject: [PATCH 30/40] Fixed gridmatrix to work with statistics Elements --- holoviews/operation/element.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py index be65a15e11..8213499e13 100644 --- a/holoviews/operation/element.py +++ b/holoviews/operation/element.py @@ -784,7 +784,7 @@ def _process(self, p, element, ranges={}): datatype=['dataframe', 'dictionary']) else: values = element.dimension_values(d1) - el = p.diagonal_type(values, vdims=[d1]) + el = p.diagonal_type(values, kdims=[d1]) elif p.diagonal_operation is histogram or isinstance(p.diagonal_operation, histogram): bin_range = ranges.get(d1.name, element.range(d1)) opts = dict(axiswise=True, framewise=True) @@ -794,8 +794,9 @@ def _process(self, p, element, ranges={}): else: el = p.diagonal_operation(element, dimension=d1.name) else: - el = p.chart_type(el_data, kdims=[d1], - vdims=[d2], datatype=['dataframe', 'dictionary']) + kdims, vdims = ([d1, d2], []) if len(p.chart_type.kdims) == 2 else (d1, d2) + el = p.chart_type(el_data, kdims=kdims, vdims=vdims, + datatype=['dataframe', 'dictionary']) data[(d1.name, d2.name)] = el return data From 7be66e07e5e749d4c66a3ec4b3772345b124eacb Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 13:30:29 +0000 Subject: [PATCH 31/40] Cleaned up KDE operations --- holoviews/operation/stats.py | 105 ++++++++++++++++++++++-------- holoviews/plotting/bokeh/stats.py | 6 ++ holoviews/plotting/mpl/stats.py | 6 ++ 3 files changed, 89 insertions(+), 28 deletions(-) diff --git a/holoviews/operation/stats.py b/holoviews/operation/stats.py index a33aaf4191..1282a28a0a 100644 --- a/holoviews/operation/stats.py +++ b/holoviews/operation/stats.py @@ -11,7 +11,27 @@ from .element import contours +def _kde_support(bin_range, bw, gridsize, cut, clip): + """Establish support for a kernel density estimate.""" + kmin, kmax = bin_range[0] - bw * cut, bin_range[1] + bw * cut + if clip[0] is not None and np.isfinite(clip[0]): + kmin = max(kmin, clip[0]) + if clip[1] is not None and np.isfinite(clip[1]): + kmax = max(kmax, clip[1]) + return np.linspace(kmin, kmax, gridsize) + + class univariate_kde(Operation): + """ + Computes a 1D kernel density estimate (KDE) along the supplied + dimension. Kernel density estimation is a non-parametric way to + estimate the probability density function of a random variable. + + The KDE works by placing a Gaussian kernel at each sample with + the supplied bandwidth. These kernels are then summed to produce + the density estimate. By default a good bandwidth is determined + using the bw_method but it may be overridden by an explicit value. + """ bw_method = param.ObjectSelector(default='scott', objects=['scott', 'silverman'], doc=""" Method of automatically determining KDE bandwidth""") @@ -19,6 +39,9 @@ class univariate_kde(Operation): bandwidth = param.Number(default=None, doc=""" Allows supplying explicit bandwidth value rather than relying on scott or silverman method.""") + cut = param.Number(default=3, doc=""" + Draw the estimate to cut * bw from the extreme data points.""") + bin_range = param.NumericTuple(default=None, length=2, doc=""" Specifies the range within which to compute the KDE.""") @@ -30,7 +53,7 @@ class univariate_kde(Operation): n_samples = param.Integer(default=100, doc=""" Number of samples to compute the KDE over.""") - + groupby = param.ClassSelector(default=None, class_=(basestring, Dimension), doc=""" Defines a dimension to group the Histogram returning an NdOverlay of Histograms.""") @@ -48,51 +71,72 @@ def _process(self, element, key=None): raise ImportError('%s operation requires SciPy to be installed.' % type(self).__name__) params = {} - dim_template = Dimension if isinstance(element, Distribution): selected_dim = element.kdims[0] if element.group != type(element).__name__: params['group'] = element.group params['label'] = element.label - dim_template = element.vdims[0] - elif self.p.dimension: - selected_dim = self.p.dimension + vdim = element.vdims[0] + vdim_name = '{}_density'.format(selected_dim.name) + vdim_label = '{} Density'.format(selected_dim.label) + vdims = [vdim(vdim_name, label=vdim_label) if vdim.name == 'Density' else vdim] else: - selected_dim = [d.name for d in element.vdims + element.kdims][0] - vdims = [dim_template('{}_density'.format(selected_dim), - label='{} Density'.format(selected_dim))] + if self.p.dimension: + selected_dim = element.get_dimension(self.p.dimension) + else: + selected_dim = [d.name for d in element.vdims + element.kdims][0] + vdim_name = '{}_density'.format(selected_dim.name) + vdim_label = '{} Density'.format(selected_dim.label) + vdims = [Dimension(vdim_nam, label=vdim_label)] data = element.dimension_values(selected_dim) - bin_range = find_minmax(element.range(selected_dim), (0, -float('inf')))\ - if self.p.bin_range is None else self.p.bin_range + bin_range = self.p.bin_range or element.range(selected_dim) + if bin_range == (0, 0) or any(not np.isfinite(r) for r in bin_range): + bin_range = (0, 1) - xs = np.linspace(bin_range[0], bin_range[1], self.p.n_samples) data = data[np.isfinite(data)] if len(data): kde = stats.gaussian_kde(data) if self.p.bandwidth: kde.set_bandwidth(self.p.bandwidth) + bw = kde.scotts_factor() * data.std(ddof=1) + xs = _kde_support(bin_range, bw, self.p.n_samples, self.p.cut, selected_dim.range) ys = kde.evaluate(xs) else: + xs = np.linspace(bin_range[0], bin_range[1], self.p.n_samples) ys = np.full_like(xs, 0) element_type = Area if self.p.filled else Curve return element_type((xs, ys), kdims=[selected_dim], vdims=vdims, **params) - -class bivariate_kde(Operation): - contours = param.Boolean(default=True) +class bivariate_kde(Operation): + """ + Computes a 2D kernel density estimate (KDE) of the first two + dimensions in the input data. Kernel density estimation is a + non-parametric way to estimate the probability density function of + a random variable. + + The KDE works by placing 2D Gaussian kernel at each sample with + the supplied bandwidth. These kernels are then summed to produce + the density estimate. By default a good bandwidth is determined + using the bw_method but it may be overridden by an explicit value. + """ + + contours = param.Boolean(default=True, doc=""" + Whether to compute contours from the KDE, determines whether to + return an Image or Contours/Polygons.""") bw_method = param.ObjectSelector(default='scott', objects=['scott', 'silverman'], doc=""" Method of automatically determining KDE bandwidth""") bandwidth = param.Number(default=None, doc=""" - Allows supplying explicit bandwidth value rather than relying on scott or silverman method.""") + Allows supplying explicit bandwidth value rather than relying + on scott or silverman method.""") - bin_range = param.NumericTuple(default=None, length=2, doc=""" - Specifies the range within which to compute the KDE.""") + cut = param.Number(default=3, doc=""" + Draw the estimate to cut * bw from the extreme data points.""") filled = param.Boolean(default=False, doc=""" Controls whether to return filled or unfilled contours.""") @@ -113,21 +157,26 @@ def _process(self, element, key=None): from scipy import stats except ImportError: raise ImportError('%s operation requires SciPy to be installed.' % type(self).__name__) - + + xdim, ydim = element.dimensions()[:2] data = element.array([0, 1]).T - bin_range = find_minmax((np.nanmin(data), np.nanmax(data)), (0, -float('inf')))\ - if self.p.bin_range is None else self.p.bin_range xmin, xmax = self.p.x_range or element.range(0) ymin, ymax = self.p.y_range or element.range(1) - kde = stats.gaussian_kde(data) - if self.p.bandwidth: - kde.set_bandwidth(self.p.bandwidth) - xs = np.linspace(xmin, xmax, self.p.n_samples) - ys = np.linspace(ymin, ymax, self.p.n_samples) - xx, yy = cartesian_product([xs, ys], False) - positions = np.vstack([xx.ravel(), yy.ravel()]) - f = np.reshape(kde(positions).T, xx.shape) + if len(data): + kde = stats.gaussian_kde(data) + if self.p.bandwidth: + kde.set_bandwidth(self.p.bandwidth) + bw = kde.scotts_factor() * data.std(ddof=1) + xs = _kde_support((xmin, xmax), bw, self.p.n_samples, self.p.cut, xdim.range) + ys = _kde_support((ymin, ymax), bw, self.p.n_samples, self.p.cut, ydim.range) + xx, yy = cartesian_product([xs, ys], False) + positions = np.vstack([xx.ravel(), yy.ravel()]) + f = np.reshape(kde(positions).T, xx.shape) + else: + xs = np.linspace(xmin, xmax, self.p.n_samples) + ys = np.linspace(ymin, ymax, self.p.n_samples) + f = np.zeros((self.p.nsamples, self.p.nsamples)) params = {} if isinstance(element, Bivariate): diff --git a/holoviews/plotting/bokeh/stats.py b/holoviews/plotting/bokeh/stats.py index d30ac1a53a..1703b1189f 100644 --- a/holoviews/plotting/bokeh/stats.py +++ b/holoviews/plotting/bokeh/stats.py @@ -12,6 +12,9 @@ class DistributionPlot(AreaPlot): bandwidth = param.Number(default=None, doc=""" The bandwidth of the kernel for the density estimate.""") + cut = param.Number(default=3, doc=""" + Draw the estimate to cut * bw from the extreme data points.""") + class BivariatePlot(PolygonPlot): """ @@ -24,5 +27,8 @@ class BivariatePlot(PolygonPlot): bandwidth = param.Number(default=None, doc=""" The bandwidth of the kernel for the density estimate.""") + cut = param.Number(default=3, doc=""" + Draw the estimate to cut * bw from the extreme data points.""") + filled = param.Boolean(default=False, doc=""" Whether the bivariate contours should be filled.""") diff --git a/holoviews/plotting/mpl/stats.py b/holoviews/plotting/mpl/stats.py index d30ac1a53a..1703b1189f 100644 --- a/holoviews/plotting/mpl/stats.py +++ b/holoviews/plotting/mpl/stats.py @@ -12,6 +12,9 @@ class DistributionPlot(AreaPlot): bandwidth = param.Number(default=None, doc=""" The bandwidth of the kernel for the density estimate.""") + cut = param.Number(default=3, doc=""" + Draw the estimate to cut * bw from the extreme data points.""") + class BivariatePlot(PolygonPlot): """ @@ -24,5 +27,8 @@ class BivariatePlot(PolygonPlot): bandwidth = param.Number(default=None, doc=""" The bandwidth of the kernel for the density estimate.""") + cut = param.Number(default=3, doc=""" + Draw the estimate to cut * bw from the extreme data points.""") + filled = param.Boolean(default=False, doc=""" Whether the bivariate contours should be filled.""") From 45f06c6923ae9a6e717309eda481a4ad28c498ca Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 14:06:46 +0000 Subject: [PATCH 32/40] Added reference notebooks for statistics elements --- .../reference/elements/bokeh/Bivariate.ipynb | 124 ++++++++++++++++ .../elements/bokeh/Distribution.ipynb | 129 +++++++++++++++++ .../elements/matplotlib/Bivariate.ipynb | 124 ++++++++++++++++ .../elements/matplotlib/Distribution.ipynb | 132 ++++++++++++++++++ 4 files changed, 509 insertions(+) create mode 100644 examples/reference/elements/bokeh/Bivariate.ipynb create mode 100644 examples/reference/elements/bokeh/Distribution.ipynb create mode 100644 examples/reference/elements/matplotlib/Bivariate.ipynb create mode 100644 examples/reference/elements/matplotlib/Distribution.ipynb diff --git a/examples/reference/elements/bokeh/Bivariate.ipynb b/examples/reference/elements/bokeh/Bivariate.ipynb new file mode 100644 index 0000000000..84a82feac2 --- /dev/null +++ b/examples/reference/elements/bokeh/Bivariate.ipynb @@ -0,0 +1,124 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
\n", + "
Title
Bivariate Element
\n", + "
Dependencies
Bokeh, Matplotlib, SciPy
\n", + "
Backends
Bokeh
Matplotlib
\n", + "
\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import holoviews as hv\n", + "hv.extension('bokeh')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "``Bivariate`` provides a convenient way to visualize a 2D distribution of values as a [Kernel density estimate](https://en.wikipedia.org/wiki/Kernel_density_estimation) and therefore provides a 2D extension to the ``Distribution`` element. Kernel density estimation is a non-parametric way to estimate the probability density function of a random variable.\n", + "\n", + "The KDE works by placing a Gaussian kernel at each sample with the supplied bandwidth, which are then summed to produce the density estimate. By default the bandwidth is determined using the Scott's method, which usually produces good results, but it may be overridden by an explicit value.\n", + "\n", + "To start with we will create a ``Bivariate`` with 1,000 normally distributed samples:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "normal = np.random.randn(1000, 2)\n", + "hv.Bivariate(normal)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A ``Bivariate`` might be filled or not and we can define a ``cmap`` to control the coloring:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%opts Bivariate [filled=True colorbar=True width=350 toolbar='above'] (cmap='Blues')\n", + "hv.Bivariate(normal)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can set explicit values for the ``bandwidth`` to see the effect. Since the densities will vary across the ``NdLayout`` we will enable axiswise normalization ensuring they are normalized separately:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%opts Bivariate {+axiswise}\n", + "hv.NdLayout({bw: hv.Bivariate(normal).opts(plot=dict(bandwidth=bw))\n", + " for bw in [0.05, 0.1, 0.5, 1]}, 'Bandwidth').cols(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Underlying the ``Bivariate`` element is the ``bivariate_kde`` operation, which computes the KDE for us automatically when we plot the element. We can also use this operation directly and print the output highlighting the fact that the operation simply returns an ``Contours`` or ``Polygons`` element. It also affords more control over the parameters letting us directly set not only the ``bandwidth`` and ``cut`` values but also a ``x_range``, ``y_range``, ``bw_method`` and the number of samples (``n_samples``) to approximate the KDE with:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from holoviews.operation.stats import bivariate_kde\n", + "dist = hv.Bivariate(normal)\n", + "kde = bivariate_kde(dist, x_range=(-4, 4), y_range=(-4, 4), bw_method='silverman', n_samples=20)\n", + "kde" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/reference/elements/bokeh/Distribution.ipynb b/examples/reference/elements/bokeh/Distribution.ipynb new file mode 100644 index 0000000000..998da03f9b --- /dev/null +++ b/examples/reference/elements/bokeh/Distribution.ipynb @@ -0,0 +1,129 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
\n", + "
Title
Distribution Element
\n", + "
Dependencies
Bokeh, SciPy
\n", + "
Backends
Bokeh
Matplotlib
\n", + "
\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import holoviews as hv\n", + "hv.extension('bokeh')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "``Distribution`` provides a convenient way to visualize a 1D distribution of values as a [Kernel density estimate](https://en.wikipedia.org/wiki/Kernel_density_estimation). Kernel density estimation is a non-parametric way to\n", + "estimate the probability density function of a random variable.\n", + "\n", + "The KDE works by placing a Gaussian kernel at each sample with the supplied bandwidth, which are then summed to produce the density estimate. By default the bandwidth is determined using the Scott's method, which usually produces good results, but it may be overridden by an explicit value.\n", + "\n", + "To start with we will create a ``Distribution`` with 1,000 normally distributed samples:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "normal = np.random.randn(1000)\n", + "hv.Distribution(normal)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can set explicit values for the ``bandwidth`` to see the effect and also declare whether we want the plot to be filled:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%opts Distribution [filled=False] (line_color=Cycle())\n", + "hv.NdOverlay({bw: hv.Distribution(normal).opts(plot=dict(bandwidth=bw)) for bw in [0.05, 0.1, 0.5, 1]})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The ``Distribution`` element is also useful to visualize the marginal distribution of a set of points. Here we will declare distributions for the x- and y-values of two sets of ``Points`` with slightly different spreads and means and then adjoin these plots:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "points = hv.Points(np.random.randn(100,2))\n", + "points2 = hv.Points(np.random.randn(100,2)*2+1)\n", + "\n", + "xdist, ydist = ((hv.Distribution(points2, kdims=[dim]) *\n", + " hv.Distribution(points, kdims=[dim])).redim.range(x=(-5, 5), y=(-5, 5))\n", + " for dim in 'xy')\n", + "(points2 * points) << ydist.opts(plot=dict(width=125)) << xdist.opts(plot=dict(height=125))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Underlying the ``Distribution`` element is the ``univariate_kde`` operation, which computes the KDE for us automatically when we plot the element. We can also use this operation directly and print the output highlighting the fact that the operation simply returns an ``Area`` or ``Curve`` element. It also affords more control over the parameters letting us directly set not only the ``bandwidth`` and ``cut`` values but also a ``bin_range``, ``bw_method`` and the number of samples (``n_samples``) to approximate the KDE with:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from holoviews.operation.stats import univariate_kde\n", + "dist = hv.Distribution(normal)\n", + "kde = univariate_kde(dist, bin_range=(-4, 4), bw_method='silverman', n_samples=20)\n", + "kde" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/reference/elements/matplotlib/Bivariate.ipynb b/examples/reference/elements/matplotlib/Bivariate.ipynb new file mode 100644 index 0000000000..04f376cf9f --- /dev/null +++ b/examples/reference/elements/matplotlib/Bivariate.ipynb @@ -0,0 +1,124 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
\n", + "
Title
Bivariate Element
\n", + "
Dependencies
Matplotlib, SciPy
\n", + "
Backends
Matplotlib
Bokeh
\n", + "
\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import holoviews as hv\n", + "hv.extension('matplotlib')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "``Bivariate`` provides a convenient way to visualize a 2D distribution of values as a [Kernel density estimate](https://en.wikipedia.org/wiki/Kernel_density_estimation) and therefore provides a 2D extension to the ``Distribution`` element. Kernel density estimation is a non-parametric way to estimate the probability density function of a random variable.\n", + "\n", + "The KDE works by placing a Gaussian kernel at each sample with the supplied bandwidth, which are then summed to produce the density estimate. By default the bandwidth is determined using the Scott's method, which usually produces good results, but it may be overridden by an explicit value.\n", + "\n", + "To start with we will create a ``Bivariate`` with 1,000 normally distributed samples:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "normal = np.random.randn(1000, 2)\n", + "hv.Bivariate(normal)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A ``Bivariate`` might be filled or not and we can define a ``cmap`` to control the coloring:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%opts Bivariate [filled=True colorbar=True] (cmap='Blues')\n", + "hv.Bivariate(normal)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can set explicit values for the ``bandwidth`` to see the effect. Since the densities will vary across the ``NdLayout`` we will enable axiswise normalization ensuring they are normalized separately:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%opts Bivariate {+axiswise}\n", + "hv.NdLayout({bw: hv.Bivariate(normal).opts(plot=dict(bandwidth=bw))\n", + " for bw in [0.05, 0.1, 0.5, 1]}, 'Bandwidth')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Underlying the ``Bivariate`` element is the ``bivariate_kde`` operation, which computes the KDE for us automatically when we plot the element. We can also use this operation directly and print the output highlighting the fact that the operation simply returns an ``Contours`` or ``Polygons`` element. It also affords more control over the parameters letting us directly set not only the ``bandwidth`` and ``cut`` values but also a ``x_range``, ``y_range``, ``bw_method`` and the number of samples (``n_samples``) to approximate the KDE with:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from holoviews.operation.stats import bivariate_kde\n", + "dist = hv.Bivariate(normal)\n", + "kde = bivariate_kde(dist, x_range=(-4, 4), y_range=(-4, 4), bw_method='silverman', n_samples=20)\n", + "kde" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/reference/elements/matplotlib/Distribution.ipynb b/examples/reference/elements/matplotlib/Distribution.ipynb new file mode 100644 index 0000000000..af3d0027b8 --- /dev/null +++ b/examples/reference/elements/matplotlib/Distribution.ipynb @@ -0,0 +1,132 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
\n", + "
Title
Distribution Element
\n", + "
Dependencies
Matplotlib, SciPy
\n", + "
Backends
\n", + "
Matplotlib
\n", + "
Bokeh
\n", + "
Plotly
\n", + "
\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import holoviews as hv\n", + "hv.extension('matplotlib')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "``Distribution`` provides a convenient way to visualize a 1D distribution of values as a [Kernel density estimate](https://en.wikipedia.org/wiki/Kernel_density_estimation). Kernel density estimation is a non-parametric way to\n", + "estimate the probability density function of a random variable.\n", + "\n", + "The KDE works by placing a Gaussian kernel at each sample with the supplied bandwidth, which are then summed to produce the density estimate. By default the bandwidth is determined using the Scott's method, which usually produces good results, but it may be overridden by an explicit value.\n", + "\n", + "To start with we will create a ``Distribution`` with 1,000 normally distributed samples:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "normal = np.random.randn(1000)\n", + "hv.Distribution(normal)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can set explicit values for the ``bandwidth`` to see the effect and also declare whether we want the plot to be filled:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%opts Distribution [filled=False] (alpha=1)\n", + "hv.NdOverlay({bw: hv.Distribution(normal).opts(plot=dict(bandwidth=bw)) for bw in [0.05, 0.1, 0.5, 1]})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The ``Distribution`` element is also useful to visualize the marginal distribution of a set of points. Here we will declare distributions for the x- and y-values of two sets of ``Points`` with slightly different spreads and means and then adjoin these plots:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "points = hv.Points(np.random.randn(100,2))\n", + "points2 = hv.Points(np.random.randn(100,2)*2+1)\n", + "\n", + "xdist, ydist = ((hv.Distribution(points2, kdims=[dim]) *\n", + " hv.Distribution(points, kdims=[dim]))\n", + " for dim in 'xy')\n", + "((points2 * points) << ydist << xdist).redim.range(x=(-5, 5), y=(-5, 5))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Underlying the ``Distribution`` element is the ``univariate_kde`` operation, which computes the KDE for us automatically when we plot the element. We can also use this operation directly and print the output highlighting the fact that the operation simply returns an ``Area`` or ``Curve`` element. It also affords more control over the parameters letting us directly set not only the ``bandwidth`` and ``cut`` values but also a ``bin_range``, ``bw_method`` and the number of samples (``n_samples``) to approximate the KDE with:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from holoviews.operation.stats import univariate_kde\n", + "dist = hv.Distribution(normal)\n", + "kde = univariate_kde(dist, bin_range=(-4, 4), bw_method='silverman', n_samples=20)\n", + "kde" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 07b4fda56ecf38ce3f848368b689bd67c65c87db Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 14:46:56 +0000 Subject: [PATCH 33/40] Added density grid demo notebooks --- .../demos/bokeh/iris_density_grid.ipynb | 81 +++++++++++++++++++ .../demos/matplotlib/iris_density_grid.ipynb | 81 +++++++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 examples/gallery/demos/bokeh/iris_density_grid.ipynb create mode 100644 examples/gallery/demos/matplotlib/iris_density_grid.ipynb diff --git a/examples/gallery/demos/bokeh/iris_density_grid.ipynb b/examples/gallery/demos/bokeh/iris_density_grid.ipynb new file mode 100644 index 0000000000..62db4c91eb --- /dev/null +++ b/examples/gallery/demos/bokeh/iris_density_grid.ipynb @@ -0,0 +1,81 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Most examples work across multiple plotting backends, this example is also available for:\n", + "\n", + "* [Matplotlib - iris_density_grid](../matplotlib/iris_splot_example.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import holoviews as hv\n", + "hv.extension('bokeh', width=95)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Declaring data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from bokeh.sampledata.iris import flowers\n", + "from holoviews.operation import gridmatrix\n", + "\n", + "iris_ds = hv.Dataset(flowers)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%opts Bivariate [bandwidth=0.5] (cmap='Blues')\n", + "hv.operation.gridmatrix(iris_ds, diagonal_type=hv.Distribution, chart_type=hv.Bivariate)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/gallery/demos/matplotlib/iris_density_grid.ipynb b/examples/gallery/demos/matplotlib/iris_density_grid.ipynb new file mode 100644 index 0000000000..e9b592889b --- /dev/null +++ b/examples/gallery/demos/matplotlib/iris_density_grid.ipynb @@ -0,0 +1,81 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Most examples work across multiple plotting backends, this example is also available for:\n", + "\n", + "* [Bokeh - iris_density_grid](../bokeh/iris_splot_example.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import holoviews as hv\n", + "hv.extension('matplotlib', width=95)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Declaring data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from bokeh.sampledata.iris import flowers\n", + "from holoviews.operation import gridmatrix\n", + "\n", + "iris_ds = hv.Dataset(flowers)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%opts Bivariate [bandwidth=0.5] (cmap='Blues')\n", + "hv.operation.gridmatrix(iris_ds, diagonal_type=hv.Distribution, chart_type=hv.Bivariate)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 5c5a9d0a3300cb79e11b5e211ce967033960fb11 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 15:24:37 +0000 Subject: [PATCH 34/40] Added filled options to DistributionPlots --- holoviews/plotting/bokeh/stats.py | 3 +++ holoviews/plotting/mpl/stats.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/holoviews/plotting/bokeh/stats.py b/holoviews/plotting/bokeh/stats.py index 1703b1189f..742770eb35 100644 --- a/holoviews/plotting/bokeh/stats.py +++ b/holoviews/plotting/bokeh/stats.py @@ -15,6 +15,9 @@ class DistributionPlot(AreaPlot): cut = param.Number(default=3, doc=""" Draw the estimate to cut * bw from the extreme data points.""") + filled = param.Boolean(default=True, doc=""" + Whether the bivariate contours should be filled.""") + class BivariatePlot(PolygonPlot): """ diff --git a/holoviews/plotting/mpl/stats.py b/holoviews/plotting/mpl/stats.py index 1703b1189f..742770eb35 100644 --- a/holoviews/plotting/mpl/stats.py +++ b/holoviews/plotting/mpl/stats.py @@ -15,6 +15,9 @@ class DistributionPlot(AreaPlot): cut = param.Number(default=3, doc=""" Draw the estimate to cut * bw from the extreme data points.""") + filled = param.Boolean(default=True, doc=""" + Whether the bivariate contours should be filled.""") + class BivariatePlot(PolygonPlot): """ From 0b53c06518fcbd82292cc6f0c9a1bb67bc704fc7 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 15:25:05 +0000 Subject: [PATCH 35/40] Added a unit test for style transfer --- holoviews/core/options.py | 4 ++-- tests/testoptions.py | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/holoviews/core/options.py b/holoviews/core/options.py index a434f2d606..0b595201ff 100644 --- a/holoviews/core/options.py +++ b/holoviews/core/options.py @@ -1150,8 +1150,8 @@ def transfer_options(cls, obj, new_obj, backend=None): spec = '.'.join([s for s in (type_name, group, obj.label) if s]) options = [] for group in ['plot', 'style', 'norm']: - opts = cls.lookup_options(backend, obj, group).kwargs - if opts: options.append(Options(group, **opts)) + opts = cls.lookup_options(backend, obj, group) + if opts and opts.kwargs: options.append(Options(group, **opts.kwargs)) if options: StoreOptions.set_options(new_obj, {spec: options}, backend) diff --git a/tests/testoptions.py b/tests/testoptions.py index c92d950906..71e1908d33 100644 --- a/tests/testoptions.py +++ b/tests/testoptions.py @@ -493,6 +493,15 @@ def test_style_inheritance_override(self): # Check plot options works as expected self.assertEqual(self.lookup_options(hist2, 'plot').options, self.default_plot) + def test_style_transfer(self): + hist = self.hist.opts(style={'style1':'style_child'}) + hist2 = self.hist.opts() + opts = Store.lookup_options('matplotlib', hist2, 'style').kwargs + self.assertEqual(opts, {'style1': 'style1', 'style2': 'style2'}) + Store.transfer_options(hist, hist2, 'matplotlib') + opts = Store.lookup_options('matplotlib', hist2, 'style').kwargs + self.assertEqual(opts, {'style1': 'style_child', 'style2': 'style2'}) + @attr(optional=1) # Needs matplotlib class TestOptionTreeFind(ComparisonTestCase): From 37de41590f24df69c2fa08866f12ea4e91a5b51e Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 15:25:43 +0000 Subject: [PATCH 36/40] Added tests for statistics elements --- tests/teststatselements.py | 146 +++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 tests/teststatselements.py diff --git a/tests/teststatselements.py b/tests/teststatselements.py new file mode 100644 index 0000000000..4f289c73c9 --- /dev/null +++ b/tests/teststatselements.py @@ -0,0 +1,146 @@ +import numpy as np + +import holoviews +from holoviews.core.dimension import Dimension +from holoviews.core.options import Compositor, Store +from holoviews.element import (Distribution, Bivariate, Points, Image, + Curve, Area, Contours, Polygons) +from holoviews.element.comparison import ComparisonTestCase + + +class StatisticalElementTest(ComparisonTestCase): + + def test_distribution_array_constructor(self): + dist = Distribution(np.array([0, 1, 2])) + self.assertEqual(dist.kdims, [Dimension('Value')]) + self.assertEqual(dist.vdims, [Dimension('Density')]) + + def test_distribution_array_constructor_custom_vdim(self): + dist = Distribution(np.array([0, 1, 2]), vdims=['Test']) + self.assertEqual(dist.kdims, [Dimension('Value')]) + self.assertEqual(dist.vdims, [Dimension('Test')]) + + def test_bivariate_array_constructor(self): + dist = Bivariate(np.array([[0, 1, 2], [0, 1, 2]])) + self.assertEqual(dist.kdims, [Dimension('x'), Dimension('y')]) + self.assertEqual(dist.vdims, [Dimension('Density')]) + + def test_bivariate_array_constructor_custom_vdim(self): + dist = Bivariate(np.array([[0, 1, 2], [0, 1, 2]]), vdims=['Test']) + self.assertEqual(dist.kdims, [Dimension('x'), Dimension('y')]) + self.assertEqual(dist.vdims, [Dimension('Test')]) + + def test_distribution_array_range_kdims(self): + dist = Distribution(np.array([0, 1, 2])) + self.assertEqual(dist.range(0), (0, 2)) + + def test_bivariate_array_range_kdims(self): + dist = Bivariate(np.array([[0, 1], [1, 2], [2, 3]])) + self.assertEqual(dist.range(0), (0, 2)) + self.assertEqual(dist.range(1), (1, 3)) + + def test_distribution_array_range_vdims(self): + dist = Distribution(np.array([0, 1, 2])) + dmin, dmax = dist.range(1) + self.assertFalse(np.isfinite(dmin)) + self.assertFalse(np.isfinite(dmax)) + + def test_bivariate_array_range_vdims(self): + dist = Bivariate(np.array([[0, 1, 2], [0, 1, 3]])) + dmin, dmax = dist.range(2) + self.assertFalse(np.isfinite(dmin)) + self.assertFalse(np.isfinite(dmax)) + + def test_distribution_array_kdim_type(self): + dist = Distribution(np.array([0, 1, 2])) + self.assertEqual(dist.get_dimension_type(0), np.int64) + + def test_bivariate_array_kdim_type(self): + dist = Bivariate(np.array([[0, 1], [1, 2], [2, 3]])) + self.assertEqual(dist.get_dimension_type(0), np.int64) + self.assertEqual(dist.get_dimension_type(1), np.int64) + + def test_distribution_array_vdim_type(self): + dist = Distribution(np.array([0, 1, 2])) + self.assertEqual(dist.get_dimension_type(1), np.float64) + + def test_bivariate_array_vdim_type(self): + dist = Bivariate(np.array([[0, 1], [1, 2], [2, 3]])) + self.assertEqual(dist.get_dimension_type(2), np.float64) + + def test_distribution_from_image(self): + dist = Distribution(Image(np.arange(5)*np.arange(5)[:, np.newaxis]), 'z') + self.assertEqual(dist.range(0), (0, 16)) + + def test_bivariate_from_points(self): + points = Points(np.array([[0, 1], [1, 2], [2, 3]])) + dist = Bivariate(points) + self.assertEqual(dist.kdims, points.kdims) + + + +class StatisticalCompositorTest(ComparisonTestCase): + + def setUp(self): + self.renderer = holoviews.renderer('matplotlib') + np.random.seed(42) + + def test_distribution_composite(self): + dist = Distribution(np.array([0, 1, 2])) + area = Compositor.collapse_element(dist) + self.assertIsInstance(area, Area) + self.assertEqual(area.vdims, [Dimension(('Value_density', 'Value Density'))]) + + def test_distribution_composite_transfer_opts(self): + dist = Distribution(np.array([0, 1, 2])).opts(style=dict(color='red')) + area = Compositor.collapse_element(dist) + opts = Store.lookup_options('matplotlib', area, 'style').kwargs + self.assertEqual(opts.get('color', None), 'red') + + def test_distribution_composite_transfer_opts_with_group(self): + dist = Distribution(np.array([0, 1, 2]), group='Test').opts(style=dict(color='red')) + area = Compositor.collapse_element(dist) + opts = Store.lookup_options('matplotlib', area, 'style').kwargs + self.assertEqual(opts.get('color', None), 'red') + + def test_distribution_composite_custom_vdim(self): + dist = Distribution(np.array([0, 1, 2]), vdims=['Test']) + area = Compositor.collapse_element(dist) + self.assertIsInstance(area, Area) + self.assertEqual(area.vdims, [Dimension('Test')]) + + def test_distribution_composite_not_filled(self): + dist = Distribution(np.array([0, 1, 2])).opts(plot=dict(filled=False)) + curve = Compositor.collapse_element(dist) + self.assertIsInstance(curve, Curve) + self.assertEqual(curve.vdims, [Dimension(('Value_density', 'Value Density'))]) + + def test_bivariate_composite(self): + dist = Bivariate(np.random.rand(10, 2)) + contours = Compositor.collapse_element(dist) + self.assertIsInstance(contours, Contours) + self.assertEqual(contours.vdims, [Dimension('Density')]) + + def test_bivariate_composite_transfer_opts(self): + dist = Bivariate(np.random.rand(10, 2)).opts(style=dict(cmap='Blues')) + contours = Compositor.collapse_element(dist) + opts = Store.lookup_options('matplotlib', contours, 'style').kwargs + self.assertEqual(opts.get('cmap', None), 'Blues') + + def test_bivariate_composite_transfer_opts_with_group(self): + dist = Bivariate(np.random.rand(10, 2), group='Test').opts(style=dict(cmap='Blues')) + contours = Compositor.collapse_element(dist) + opts = Store.lookup_options('matplotlib', contours, 'style').kwargs + self.assertEqual(opts.get('cmap', None), 'Blues') + + def test_bivariate_composite_custom_vdim(self): + dist = Bivariate(np.random.rand(10, 2), vdims=['Test']) + contours = Compositor.collapse_element(dist) + self.assertIsInstance(contours, Contours) + self.assertEqual(contours.vdims, [Dimension('Test')]) + + def test_bivariate_composite_filled(self): + dist = Bivariate(np.random.rand(10, 2)).opts(plot=dict(filled=True)) + contours = Compositor.collapse_element(dist) + self.assertIsInstance(contours, Polygons) + self.assertEqual(contours.vdims, [Dimension('Density')]) From 6ad60d9a01d676d1aef8540af5209e82247e906d Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 16:08:10 +0000 Subject: [PATCH 37/40] Fix for Compositor unit tests --- tests/testmagics.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/testmagics.py b/tests/testmagics.py index 5d16b479b4..f8b5702aa6 100644 --- a/tests/testmagics.py +++ b/tests/testmagics.py @@ -161,10 +161,11 @@ def setUp(self): super(TestCompositorMagic, self).setUp() self.cell("import numpy as np") self.cell("from holoviews.element import Image") - + self.definitions = Compositor.definitions + Compositor.definitions = [] def tearDown(self): - Compositor.definitions = [] + Compositor.definitions = self.definitions super(TestCompositorMagic, self).tearDown() def test_display_compositor_definition(self): From 33d3b0231603d10766ccb5058ecc4bfdadd07438 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 16:12:03 +0000 Subject: [PATCH 38/40] Removed left over Regression element test --- tests/testplotinstantiation.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/tests/testplotinstantiation.py b/tests/testplotinstantiation.py index ac05dde199..a6af805399 100644 --- a/tests/testplotinstantiation.py +++ b/tests/testplotinstantiation.py @@ -36,13 +36,6 @@ except: mpl_renderer = None - -try: - import seaborn as sns - from holoviews.interface.seaborn import Regression -except: - sns = None - try: from holoviews.plotting.bokeh.util import bokeh_version bokeh_renderer = Store.renderers['bokeh'] @@ -111,14 +104,6 @@ def test_interleaved_overlay(self): o = Overlay([Curve(np.array([[0, 1]])) , Scatter([[1,1]]) , Curve(np.array([[0, 1]]))]) OverlayPlot(o) - def test_regression_plot_initializes(self): - if sns is None: - raise SkipTest("Seaborn required to test Regression plot") - reg = Regression(np.random.rand(20,2)) - plot = mpl_renderer.get_plot(reg) - plot.handles['axis'] - plot.initialize_plot() - @attr(optional=1) # Requires jinja2 def test_dynamic_nonoverlap(self): kdims = [Dimension('File', range=(0.01, 1)), From 782f11af98f08dfdb3917643c1ca9e22066d4874 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 18:02:13 +0000 Subject: [PATCH 39/40] Made suggested improvements to Compositor --- holoviews/core/options.py | 28 +++++++++++++++++++++++++--- holoviews/plotting/__init__.py | 2 ++ holoviews/plotting/renderer.py | 9 +-------- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/holoviews/core/options.py b/holoviews/core/options.py index 0b595201ff..6dc08e3529 100644 --- a/holoviews/core/options.py +++ b/holoviews/core/options.py @@ -787,6 +787,9 @@ class Compositor(param.Parameterized): transfer_options = param.Boolean(default=False, doc=""" Whether to transfer the options from the input to the output.""") + transfer_parameters = param.Boolean(default=False, doc=""" + Whether to transfer plot options which match to the operation.""") + operations = [] # The operations that can be used to define compositors. definitions = [] # The set of all the compositor instances @@ -865,6 +868,24 @@ def collapse(cls, holomap, ranges=None, mode='data'): return clone + @classmethod + def map(cls, obj, mode='data', backend=None): + """ + Applies compositor operations to any HoloViews element or container + using the map method. + """ + from .overlay import Overlay, CompositeOverlay + element_compositors = [c for c in cls.definitions if len(c._pattern_spec) == 1] + overlay_compositors = [c for c in cls.definitions if len(c._pattern_spec) > 1] + if overlay_compositors: + obj = obj.map(lambda obj: cls.collapse_element(obj, mode=mode, backend=backend), + [CompositeOverlay]) + if element_compositors: + obj = obj.map(lambda obj: cls.collapse_element(obj, mode=mode, backend=backend), + [c.pattern for c in element_compositors]) + return obj + + @classmethod def register(cls, compositor): defined_patterns = [op.pattern for op in cls.definitions] @@ -876,7 +897,7 @@ def register(cls, compositor): def __init__(self, pattern, operation, group, mode, transfer_options=False, - output_type=None, **kwargs): + transfer_parameters=False, output_type=None, **kwargs): self._pattern_spec, labels = [], [] for path in pattern.split('*'): @@ -899,7 +920,8 @@ def __init__(self, pattern, operation, group, mode, transfer_options=False, operation=operation, mode=mode, kwargs=kwargs, - transfer_options=transfer_options) + transfer_options=transfer_options, + transfer_parameters=transfer_parameters) @property @@ -970,7 +992,7 @@ def apply(self, value, input_ranges, backend=None): kwargs = {k: v for k, v in self.kwargs.items() if k != 'output_type'} if isinstance(value, CompositeOverlay) and len(value) == 1: value = value.values()[0] - if self.transfer_options: + if self.transfer_parameters: plot_opts = Store.lookup_options(backend, value, 'plot').kwargs kwargs.update({k: v for k, v in plot_opts.items() if k in self.operation.params()}) diff --git a/holoviews/plotting/__init__.py b/holoviews/plotting/__init__.py index 8d9a34041e..c07b88a955 100644 --- a/holoviews/plotting/__init__.py +++ b/holoviews/plotting/__init__.py @@ -14,9 +14,11 @@ Compositor.register(Compositor("Distribution", univariate_kde, None, 'data', transfer_options=True, + transfer_parameters=True, output_type=Area)) Compositor.register(Compositor("Bivariate", bivariate_kde, None, 'data', transfer_options=True, + transfer_parameters=True, output_type=Polygons)) def public(obj): diff --git a/holoviews/plotting/renderer.py b/holoviews/plotting/renderer.py index 38d8dc9b22..be88a901e5 100644 --- a/holoviews/plotting/renderer.py +++ b/holoviews/plotting/renderer.py @@ -170,14 +170,7 @@ def get_plot(self_or_cls, obj, renderer=None): if not displayable(obj): obj = collate(obj) initialize_dynamic(obj) - if any(len(c._pattern_spec) > 1 for c in Compositor.definitions): - obj = obj.map(lambda obj: Compositor.collapse_element(obj, mode='data', - backend=backend), - [CompositeOverlay]) - if any(len(c._pattern_spec) == 1 for c in Compositor.definitions): - obj = obj.map(lambda obj: Compositor.collapse_element(obj, mode='data', - backend=backend), - [Element]) + obj = Compositor.map(obj, mode='data', backend=self_or_cls.backend) if not renderer: renderer = self_or_cls.instance() if not isinstance(obj, Plot): From e3efb3a8f256caffde1cfb259a1c6eb603557105 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Tue, 31 Oct 2017 18:03:02 +0000 Subject: [PATCH 40/40] Removed underscore from StatisticsElement baseclass --- holoviews/element/__init__.py | 3 ++- holoviews/element/stats.py | 12 +++++++----- holoviews/ipython/display_hooks.py | 6 ++---- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/holoviews/element/__init__.py b/holoviews/element/__init__.py index d07f5a4281..22b9847aca 100644 --- a/holoviews/element/__init__.py +++ b/holoviews/element/__init__.py @@ -97,7 +97,8 @@ def vectorfield(self, kdims=None, vdims=None, groupby=None, **kwargs): def public(obj): - if not isinstance(obj, type): return False + if not isinstance(obj, type) or getattr(obj, 'abstract', False): + return False return issubclass(obj, Element) __all__ = list(set([_k for _k, _v in locals().items() if public(_v)])) diff --git a/holoviews/element/stats.py b/holoviews/element/stats.py index cf82efffdf..32d45990e9 100644 --- a/holoviews/element/stats.py +++ b/holoviews/element/stats.py @@ -7,7 +7,7 @@ from .chart import Chart, Scatter -class _StatisticsElement(Chart): +class StatisticsElement(Chart): """ StatisticsElement provides a baseclass for Element types that compute statistics based on the input data. The baseclass @@ -15,13 +15,15 @@ class _StatisticsElement(Chart): of the value dimensions. """ + __abstract = True + def __init__(self, data, kdims=None, vdims=None, **params): if isinstance(data, Element): params.update(get_param_values(data)) kdims = kdims or data.dimensions()[:len(self.kdims)] data = tuple(data.dimension_values(d) for d in kdims) params.update(dict(kdims=kdims, vdims=[], _validate_vdims=False)) - super(_StatisticsElement, self).__init__(data, **params) + super(StatisticsElement, self).__init__(data, **params) if not vdims: self.vdims = [Dimension('Density')] elif len(vdims) > 1: @@ -33,7 +35,7 @@ def __init__(self, data, kdims=None, vdims=None, **params): def range(self, dim, data_range=True): iskdim = self.get_dimension(dim) not in self.vdims - return super(_StatisticsElement, self).range(dim, data_range=iskdim) + return super(StatisticsElement, self).range(dim, data_range=iskdim) def dimension_values(self, dim, expanded=True, flat=True): @@ -87,7 +89,7 @@ def columns(self, dimensions=None): -class Bivariate(_StatisticsElement): +class Bivariate(StatisticsElement): """ Bivariate Views are containers for two dimensional data, which is to be visualized as a kernel density estimate. The @@ -104,7 +106,7 @@ class Bivariate(_StatisticsElement): -class Distribution(_StatisticsElement): +class Distribution(StatisticsElement): """ Distribution Views provide a container for data to be visualized as a one-dimensional distribution. The data should diff --git a/holoviews/ipython/display_hooks.py b/holoviews/ipython/display_hooks.py index 62a661009b..feb3863f6f 100644 --- a/holoviews/ipython/display_hooks.py +++ b/holoviews/ipython/display_hooks.py @@ -12,7 +12,7 @@ import holoviews from holoviews.plotting import Plot from ..core.options import (Store, StoreOptions, SkipRendering, - AbbreviatedException, Compositor) + AbbreviatedException) from ..core import (ViewableElement, UniformNdMapping, HoloMap, AdjointLayout, NdLayout, GridSpace, Layout, CompositeOverlay, DynamicMap) @@ -168,9 +168,7 @@ def element_display(element, max_frames): return backend = Store.current_backend - eltype = type(element) - if (eltype not in Store.registry[backend] and - all(eltype.__name__ != d.pattern for d in Compositor.definitions)): + if type(element) not in Store.registry[backend]: return None # Drop back to png if pdf selected, notebook PDF rendering is buggy