diff --git a/asv_bench/benchmarks/combine.py b/asv_bench/benchmarks/combine.py
index 9314361e998..aa9662d44f9 100644
--- a/asv_bench/benchmarks/combine.py
+++ b/asv_bench/benchmarks/combine.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 import xarray as xr
 
 
diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py
index cd212895d99..c4cfbbbdfdf 100644
--- a/asv_bench/benchmarks/indexing.py
+++ b/asv_bench/benchmarks/indexing.py
@@ -125,3 +125,16 @@ def setup(self, key):
         requires_dask()
         super().setup(key)
         self.ds = self.ds.chunk({"x": 100, "y": 50, "t": 50})
+
+
+class BooleanIndexing:
+    # https://github.com/pydata/xarray/issues/2227
+    def setup(self):
+        self.ds = xr.Dataset(
+            {"a": ("time", np.arange(10_000_000))},
+            coords={"time": np.arange(10_000_000)},
+        )
+        self.time_filter = self.ds.time > 50_000
+
+    def time_indexing(self):
+        self.ds.isel(time=self.time_filter)
diff --git a/doc/api.rst b/doc/api.rst
index 872e7786e1b..256a1dbf3af 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -8,7 +8,7 @@ This page provides an auto-generated summary of xarray's API. For more details
 and examples, refer to the relevant chapters in the main part of the
 documentation.
 
-See also: :ref:`public api`_.
+See also: :ref:`public api`
 
 Top-level functions
 ===================
@@ -117,6 +117,9 @@ Indexing
    Dataset.loc
    Dataset.isel
    Dataset.sel
+   Dataset.head
+   Dataset.tail
+   Dataset.thin
    Dataset.squeeze
    Dataset.interp
    Dataset.interp_like
@@ -279,6 +282,9 @@ Indexing
    DataArray.loc
    DataArray.isel
    DataArray.sel
+   DataArray.head
+   DataArray.tail
+   DataArray.thin
    DataArray.squeeze
    DataArray.interp
    DataArray.interp_like
@@ -604,6 +610,7 @@ Plotting
 
    Dataset.plot
    DataArray.plot
+   Dataset.plot.scatter
    plot.plot
    plot.contourf
    plot.contour
diff --git a/doc/dask.rst b/doc/dask.rst
index b0ffd0c449d..19cbc11292c 100644
--- a/doc/dask.rst
+++ b/doc/dask.rst
@@ -75,13 +75,14 @@ entirely equivalent to opening a dataset using ``open_dataset`` and then
 chunking the data using the ``chunk`` method, e.g.,
 ``xr.open_dataset('example-data.nc').chunk({'time': 10})``.
 
-To open multiple files simultaneously, use :py:func:`~xarray.open_mfdataset`::
+To open multiple files simultaneously in parallel using Dask delayed,
+use :py:func:`~xarray.open_mfdataset`::
 
-    xr.open_mfdataset('my/files/*.nc')
+    xr.open_mfdataset('my/files/*.nc', parallel=True)
 
 This function will automatically concatenate and merge dataset into one in
 the simple cases that it understands (see :py:func:`~xarray.auto_combine`
-for the full disclaimer). By default, ``open_mfdataset`` will chunk each
+for the full disclaimer). By default, :py:func:`~xarray.open_mfdataset` will chunk each
 netCDF file into a single Dask array; again, supply the ``chunks`` argument to
 control the size of the resulting Dask arrays. In more complex cases, you can
 open each file individually using ``open_dataset`` and merge the result, as
@@ -132,6 +133,13 @@ A dataset can also be converted to a Dask DataFrame using :py:meth:`~xarray.Data
 
 Dask DataFrames do not support multi-indexes so the coordinate variables from the dataset are included as columns in the Dask DataFrame.
 
+.. ipython:: python
+    :suppress:
+
+    import os
+    os.remove('example-data.nc')
+    os.remove('manipulated-example-data.nc')
+
 Using Dask with xarray
 ----------------------
 
@@ -373,12 +381,6 @@ one million elements (e.g., a 1000x1000 matrix). With large arrays (10+ GB), the
 cost of queueing up Dask operations can be noticeable, and you may need even
 larger chunksizes.
 
-.. ipython:: python
-    :suppress:
-
-    import os
-    os.remove('example-data.nc')
-
 Optimization Tips
 -----------------
 
diff --git a/doc/gallery/plot_cartopy_facetgrid.py b/doc/gallery/plot_cartopy_facetgrid.py
index a0afa7ad92e..af04ad6856a 100644
--- a/doc/gallery/plot_cartopy_facetgrid.py
+++ b/doc/gallery/plot_cartopy_facetgrid.py
@@ -41,6 +41,6 @@
     ax.set_extent([-160, -30, 5, 75])
     # Without this aspect attributes the maps will look chaotic and the
     # "extent" attribute above will be ignored
-    ax.set_aspect("equal", "box-forced")
+    ax.set_aspect("equal")
 
 plt.show()
diff --git a/doc/indexing.rst b/doc/indexing.rst
index 4c5b93db0b4..9ee8f1dddf8 100644
--- a/doc/indexing.rst
+++ b/doc/indexing.rst
@@ -236,9 +236,8 @@ The :py:meth:`~xarray.Dataset.drop` method returns a new object with the listed
 index labels along a dimension dropped:
 
 .. ipython:: python
-    :okwarning:
 
-    ds.drop(['IN', 'IL'], dim='space')
+    ds.drop(space=['IN', 'IL'])
 
 ``drop`` is both a ``Dataset`` and ``DataArray`` method.
 
@@ -393,14 +392,6 @@ These methods may also be applied to ``Dataset`` objects
   You may find increased performance by loading your data into memory first,
   e.g., with :py:meth:`~xarray.Dataset.load`.
 
-.. note::
-
-  Vectorized indexing is a new feature in v0.10.
-  In older versions of xarray, dimensions of indexers are ignored.
-  Dedicated methods for some advanced indexing use cases,
-  ``isel_points`` and ``sel_points`` are now deprecated.
-  See :ref:`more_advanced_indexing` for their alternative.
-
 .. note::
 
   If an indexer is a :py:meth:`~xarray.DataArray`, its coordinates should not
diff --git a/doc/io.rst b/doc/io.rst
index 4a61b59ac2a..775d915188e 100644
--- a/doc/io.rst
+++ b/doc/io.rst
@@ -99,7 +99,9 @@ netCDF
 The recommended way to store xarray data structures is `netCDF`__, which
 is a binary file format for self-described datasets that originated
 in the geosciences. xarray is based on the netCDF data model, so netCDF files
-on disk directly correspond to :py:class:`~xarray.Dataset` objects.
+on disk directly correspond to :py:class:`~xarray.Dataset` objects (more accurately,
+a group in a netCDF file directly corresponds to a to :py:class:`~xarray.Dataset` object.
+See :ref:`io.netcdf_groups` for more.)
 
 NetCDF is supported on almost all platforms, and parsers exist
 for the vast majority of scientific programming languages. Recent versions of
@@ -121,7 +123,7 @@ read/write netCDF V4 files and use the compression options described below).
 __ https://github.com/Unidata/netcdf4-python
 
 We can save a Dataset to disk using the
-:py:attr:`Dataset.to_netcdf <xarray.Dataset.to_netcdf>` method:
+:py:meth:`~Dataset.to_netcdf` method:
 
 .. ipython:: python
 
@@ -147,19 +149,6 @@ convert the ``DataArray`` to a ``Dataset`` before saving, and then convert back
 when loading, ensuring that the ``DataArray`` that is loaded is always exactly
 the same as the one that was saved.
 
-NetCDF groups are not supported as part of the
-:py:class:`~xarray.Dataset` data model.  Instead, groups can be loaded
-individually as Dataset objects.
-To do so, pass a ``group`` keyword argument to the
-``open_dataset`` function. The group can be specified as a path-like
-string, e.g., to access subgroup 'bar' within group 'foo' pass
-'/foo/bar' as the ``group`` argument.
-In a similar way, the ``group`` keyword argument can be given to the
-:py:meth:`~xarray.Dataset.to_netcdf` method to write to a group
-in a netCDF file.
-When writing multiple groups in one file, pass ``mode='a'`` to ``to_netcdf``
-to ensure that each call does not delete the file.
-
 Data is always loaded lazily from netCDF files. You can manipulate, slice and subset
 Dataset and DataArray objects, and no array values are loaded into memory until
 you try to perform some sort of actual computation. For an example of how these
@@ -195,6 +184,24 @@ It is possible to append or overwrite netCDF variables using the ``mode='a'``
 argument. When using this option, all variables in the dataset will be written
 to the original netCDF file, regardless if they exist in the original dataset.
 
+
+.. _io.netcdf_groups:
+
+Groups
+~~~~~~
+
+NetCDF groups are not supported as part of the :py:class:`~xarray.Dataset` data model.
+Instead, groups can be loaded individually as Dataset objects.
+To do so, pass a ``group`` keyword argument to the
+:py:func:`~xarray.open_dataset` function. The group can be specified as a path-like
+string, e.g., to access subgroup ``'bar'`` within group ``'foo'`` pass
+``'/foo/bar'`` as the ``group`` argument.
+In a similar way, the ``group`` keyword argument can be given to the
+:py:meth:`~xarray.Dataset.to_netcdf` method to write to a group
+in a netCDF file.
+When writing multiple groups in one file, pass ``mode='a'`` to
+:py:meth:`~xarray.Dataset.to_netcdf` to ensure that each call does not delete the file.
+
 .. _io.encoding:
 
 Reading encoded data
@@ -203,7 +210,7 @@ Reading encoded data
 NetCDF files follow some conventions for encoding datetime arrays (as numbers
 with a "units" attribute) and for packing and unpacking data (as
 described by the "scale_factor" and "add_offset" attributes). If the argument
-``decode_cf=True`` (default) is given to ``open_dataset``, xarray will attempt
+``decode_cf=True`` (default) is given to :py:func:`~xarray.open_dataset`, xarray will attempt
 to automatically decode the values in the netCDF objects according to
 `CF conventions`_. Sometimes this will fail, for example, if a variable
 has an invalid "units" or "calendar" attribute. For these cases, you can
@@ -247,6 +254,130 @@ will remove encoding information.
     import os
     os.remove('saved_on_disk.nc')
 
+
+.. _combining multiple files:
+
+Reading multi-file datasets
+...........................
+
+NetCDF files are often encountered in collections, e.g., with different files
+corresponding to different model runs or one file per timestamp.
+xarray can straightforwardly combine such files into a single Dataset by making use of
+:py:func:`~xarray.concat`, :py:func:`~xarray.merge`, :py:func:`~xarray.combine_nested` and
+:py:func:`~xarray.combine_by_coords`. For details on the difference between these
+functions see :ref:`combining data`.
+
+Xarray includes support for manipulating datasets that don't fit into memory
+with dask_. If you have dask installed, you can open multiple files
+simultaneously in parallel using :py:func:`~xarray.open_mfdataset`::
+
+    xr.open_mfdataset('my/files/*.nc', parallel=True)
+
+This function automatically concatenates and merges multiple files into a
+single xarray dataset.
+It is the recommended way to open multiple files with xarray.
+For more details on parallel reading, see :ref:`combining.multi`, :ref:`dask.io` and a
+`blog post`_ by Stephan Hoyer.
+:py:func:`~xarray.open_mfdataset` takes many kwargs that allow you to
+control its behaviour (for e.g. ``parallel``, ``combine``, ``compat``, ``join``, ``concat_dim``).
+See its docstring for more details.
+
+
+.. note::
+
+    A common use-case involves a dataset distributed across a large number of files with
+    each file containing a large number of variables. Commonly a few of these variables
+    need to be concatenated along a dimension (say ``"time"``), while the rest are equal
+    across the datasets (ignoring floating point differences). The following command
+    with suitable modifications (such as ``parallel=True``) works well with such datasets::
+
+         xr.open_mfdataset('my/files/*.nc', concat_dim="time",
+     	              	   data_vars='minimal', coords='minimal', compat='override')
+
+    This command concatenates variables along the ``"time"`` dimension, but only those that
+    already contain the ``"time"`` dimension (``data_vars='minimal', coords='minimal'``).
+    Variables that lack the ``"time"`` dimension are taken from the first dataset
+    (``compat='override'``).
+
+
+.. _dask: http://dask.pydata.org
+.. _blog post: http://stephanhoyer.com/2015/06/11/xray-dask-out-of-core-labeled-arrays/
+
+Sometimes multi-file datasets are not conveniently organized for easy use of :py:func:`~xarray.open_mfdataset`.
+One can use the ``preprocess`` argument to provide a function that takes a dataset
+and returns a modified Dataset.
+:py:func:`~xarray.open_mfdataset` will call ``preprocess`` on every dataset
+(corresponding to each file) prior to combining them.
+
+
+If :py:func:`~xarray.open_mfdataset` does not meet your needs, other approaches are possible.
+The general pattern for parallel reading of multiple files
+using dask, modifying those datasets and then combining into a single ``Dataset`` is::
+
+     def modify(ds):
+         # modify ds here
+         return ds
+
+
+     # this is basically what open_mfdataset does
+     open_kwargs = dict(decode_cf=True, decode_times=False)
+     open_tasks = [dask.delayed(xr.open_dataset)(f, **open_kwargs) for f in file_names]
+     tasks = [dask.delayed(modify)(task) for task in open_tasks]
+     datasets = dask.compute(tasks)  # get a list of xarray.Datasets
+     combined = xr.combine_nested(datasets)  # or some combination of concat, merge
+
+
+As an example, here's how we could approximate ``MFDataset`` from the netCDF4
+library::
+
+    from glob import glob
+    import xarray as xr
+
+    def read_netcdfs(files, dim):
+        # glob expands paths with * to a list of files, like the unix shell
+        paths = sorted(glob(files))
+        datasets = [xr.open_dataset(p) for p in paths]
+        combined = xr.concat(dataset, dim)
+        return combined
+
+    combined = read_netcdfs('/all/my/files/*.nc', dim='time')
+
+This function will work in many cases, but it's not very robust. First, it
+never closes files, which means it will fail one you need to load more than
+a few thousands file. Second, it assumes that you want all the data from each
+file and that it can all fit into memory. In many situations, you only need
+a small subset or an aggregated summary of the data from each file.
+
+Here's a slightly more sophisticated example of how to remedy these
+deficiencies::
+
+    def read_netcdfs(files, dim, transform_func=None):
+        def process_one_path(path):
+            # use a context manager, to ensure the file gets closed after use
+            with xr.open_dataset(path) as ds:
+                # transform_func should do some sort of selection or
+                # aggregation
+                if transform_func is not None:
+                    ds = transform_func(ds)
+                # load all data from the transformed dataset, to ensure we can
+                # use it after closing each original file
+                ds.load()
+                return ds
+
+        paths = sorted(glob(files))
+        datasets = [process_one_path(p) for p in paths]
+        combined = xr.concat(datasets, dim)
+        return combined
+
+    # here we suppose we only care about the combined mean of each file;
+    # you might also use indexing operations like .sel to subset datasets
+    combined = read_netcdfs('/all/my/files/*.nc', dim='time',
+                            transform_func=lambda ds: ds.mean())
+
+This pattern works well and is very robust. We've used similar code to process
+tens of thousands of files constituting 100s of GB of data.
+
+
 .. _io.netcdf.writing_encoded:
 
 Writing encoded data
@@ -743,6 +874,13 @@ be done directly from zarr, as described in the
 
 .. _io.cfgrib:
 
+.. ipython:: python
+   :suppress:
+
+    import shutil
+    shutil.rmtree('foo.zarr')
+    shutil.rmtree('path/to/directory.zarr')
+
 GRIB format via cfgrib
 ----------------------
 
@@ -810,84 +948,3 @@ For CSV files, one might also consider `xarray_extras`_.
 .. _xarray_extras: https://xarray-extras.readthedocs.io/en/latest/api/csv.html
 
 .. _IO tools: http://pandas.pydata.org/pandas-docs/stable/io.html
-
-
-.. _combining multiple files:
-
-
-Combining multiple files
-------------------------
-
-NetCDF files are often encountered in collections, e.g., with different files
-corresponding to different model runs. xarray can straightforwardly combine such
-files into a single Dataset by making use of :py:func:`~xarray.concat`,
-:py:func:`~xarray.merge`, :py:func:`~xarray.combine_nested` and
-:py:func:`~xarray.combine_by_coords`. For details on the difference between these
-functions see :ref:`combining data`.
-
-.. note::
-
-    Xarray includes support for manipulating datasets that don't fit into memory
-    with dask_. If you have dask installed, you can open multiple files
-    simultaneously using :py:func:`~xarray.open_mfdataset`::
-
-        xr.open_mfdataset('my/files/*.nc')
-
-    This function automatically concatenates and merges multiple files into a
-    single xarray dataset.
-    It is the recommended way to open multiple files with xarray.
-    For more details, see :ref:`combining.multi`, :ref:`dask.io` and a
-    `blog post`_ by Stephan Hoyer.
-
-.. _dask: http://dask.pydata.org
-.. _blog post: http://stephanhoyer.com/2015/06/11/xray-dask-out-of-core-labeled-arrays/
-
-For example, here's how we could approximate ``MFDataset`` from the netCDF4
-library::
-
-    from glob import glob
-    import xarray as xr
-
-    def read_netcdfs(files, dim):
-        # glob expands paths with * to a list of files, like the unix shell
-        paths = sorted(glob(files))
-        datasets = [xr.open_dataset(p) for p in paths]
-        combined = xr.concat(dataset, dim)
-        return combined
-
-    combined = read_netcdfs('/all/my/files/*.nc', dim='time')
-
-This function will work in many cases, but it's not very robust. First, it
-never closes files, which means it will fail one you need to load more than
-a few thousands file. Second, it assumes that you want all the data from each
-file and that it can all fit into memory. In many situations, you only need
-a small subset or an aggregated summary of the data from each file.
-
-Here's a slightly more sophisticated example of how to remedy these
-deficiencies::
-
-    def read_netcdfs(files, dim, transform_func=None):
-        def process_one_path(path):
-            # use a context manager, to ensure the file gets closed after use
-            with xr.open_dataset(path) as ds:
-                # transform_func should do some sort of selection or
-                # aggregation
-                if transform_func is not None:
-                    ds = transform_func(ds)
-                # load all data from the transformed dataset, to ensure we can
-                # use it after closing each original file
-                ds.load()
-                return ds
-
-        paths = sorted(glob(files))
-        datasets = [process_one_path(p) for p in paths]
-        combined = xr.concat(datasets, dim)
-        return combined
-
-    # here we suppose we only care about the combined mean of each file;
-    # you might also use indexing operations like .sel to subset datasets
-    combined = read_netcdfs('/all/my/files/*.nc', dim='time',
-                            transform_func=lambda ds: ds.mean())
-
-This pattern works well and is very robust. We've used similar code to process
-tens of thousands of files constituting 100s of GB of data.
diff --git a/doc/related-projects.rst b/doc/related-projects.rst
index 58b9a7c22c9..647db5fd8e4 100644
--- a/doc/related-projects.rst
+++ b/doc/related-projects.rst
@@ -11,7 +11,7 @@ Geosciences
 ~~~~~~~~~~~
 
 - `aospy <https://aospy.readthedocs.io>`_: Automated analysis and management of gridded climate data.
-- `climpred <https://climpred.readthedocs.io>`_: Analysis of ensemble forecast models for climate prediction. 
+- `climpred <https://climpred.readthedocs.io>`_: Analysis of ensemble forecast models for climate prediction.
 - `infinite-diff <https://github.com/spencerahill/infinite-diff>`_: xarray-based finite-differencing, focused on gridded climate/meterology data
 - `marc_analysis <https://github.com/darothen/marc_analysis>`_: Analysis package for CESM/MARC experiments and output.
 - `MetPy <https://unidata.github.io/MetPy/dev/index.html>`_: A collection of tools in Python for reading, visualizing, and performing calculations with weather data.
@@ -26,7 +26,7 @@ Geosciences
   subclass.
 - `Regionmask <https://regionmask.readthedocs.io/>`_: plotting and creation of masks of spatial regions
 - `salem <https://salem.readthedocs.io>`_: Adds geolocalised subsetting, masking, and plotting operations to xarray's data structures via accessors.
-- `SatPy <https://satpy.readthedocs.io/>`_ : Library for reading and manipulating meteorological remote sensing data and writing it to various image and data file formats. 
+- `SatPy <https://satpy.readthedocs.io/>`_ : Library for reading and manipulating meteorological remote sensing data and writing it to various image and data file formats.
 - `Spyfit <https://spyfit.readthedocs.io/en/master/>`_: FTIR spectroscopy of the atmosphere
 - `windspharm <https://ajdawson.github.io/windspharm/index.html>`_: Spherical
   harmonic wind analysis in Python.
@@ -56,6 +56,7 @@ Extend xarray capabilities
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 - `Collocate <https://github.com/cistools/collocate>`_: Collocate xarray trajectories in arbitrary physical dimensions
 - `eofs <https://ajdawson.github.io/eofs/>`_: EOF analysis in Python.
+- `hypothesis-gufunc <https://hypothesis-gufunc.readthedocs.io/en/latest/>`_: Extension to hypothesis. Makes it easy to write unit tests with xarray objects as input.
 - `xarray_extras <https://github.com/crusaderky/xarray_extras>`_: Advanced algorithms for xarray objects (e.g. integrations/interpolations).
 - `xrft <https://github.com/rabernat/xrft>`_: Fourier transforms for xarray data.
 - `xr-scipy <https://xr-scipy.readthedocs.io>`_: A lightweight scipy wrapper for xarray.
diff --git a/doc/reshaping.rst b/doc/reshaping.rst
index b3abfc5afb0..51202f9be41 100644
--- a/doc/reshaping.rst
+++ b/doc/reshaping.rst
@@ -156,6 +156,7 @@ represented by a :py:class:`pandas.MultiIndex` object. These methods are used
 like this:
 
 .. ipython:: python
+
         data = xr.Dataset(
             data_vars={'a': (('x', 'y'), [[0, 1, 2], [3, 4, 5]]),
                       'b': ('x', [6, 7])},
diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst
index a17ecd2f2a4..96641c2b97e 100644
--- a/doc/weather-climate.rst
+++ b/doc/weather-climate.rst
@@ -137,6 +137,12 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports:
    da.to_netcdf('example-no-leap.nc')
    xr.open_dataset('example-no-leap.nc')
 
+.. ipython:: python
+    :suppress:
+
+    import os
+    os.remove('example-no-leap.nc')
+
 - And resampling along the time dimension for data indexed by a :py:class:`~xarray.CFTimeIndex`:
 
 .. ipython:: python
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 1fc96019c4d..39ca1c204c6 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -13,13 +13,79 @@ What's New
     import xarray as xr
     np.random.seed(123456)
 
-.. _whats-new.0.13.0:
+.. _whats-new.0.13.1:
 
-v0.13.0 (unreleased)
+v0.13.1 (unreleased)
 --------------------
 
-This release increases the minimum required Python version from 3.5.0 to 3.5.3
-(:issue:`3089`). By `Guido Imperiale <https://github.com/crusaderky>`_.
+Bug fixes
+~~~~~~~~~
+- Reintroduce support for :mod:`weakref` (broken in v0.13.0). Support has been
+  reinstated for :class:`DataArray` and :class:`Dataset` objects only. Internal xarray
+  objects remain unaddressable by weakref in order to save memory.
+  (:issue:`3317`) by `Guido Imperiale <https://github.com/crusaderky>`_.
+
+Documentation
+~~~~~~~~~~~~~
+- Add examples for :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims`.
+  By `Justus Magin <https://github.com/keewis>`_.
+
+.. _whats-new.0.13.0:
+
+v0.13.0 (17 Sep 2019)
+---------------------
+
+This release includes many exciting changes: wrapping of
+`NEP18 <https://www.numpy.org/neps/nep-0018-array-function-protocol.html>`_ compliant
+numpy-like arrays; new :py:meth:`~Dataset.plot.scatter` plotting method that can scatter
+two ``DataArrays`` in a ``Dataset`` against each other; support for converting pandas
+DataFrames to xarray objects that wrap ``pydata/sparse``; and more!
+
+Breaking changes
+~~~~~~~~~~~~~~~~
+
+- This release increases the minimum required Python version from 3.5.0 to 3.5.3
+  (:issue:`3089`). By `Guido Imperiale <https://github.com/crusaderky>`_.
+- The ``isel_points`` and ``sel_points`` methods are removed, having been deprecated
+  since v0.10.0. These are redundant with the ``isel`` / ``sel`` methods.
+  See :ref:`vectorized_indexing` for the details
+  By `Maximilian Roos <https://github.com/max-sixty>`_
+- The ``inplace`` kwarg for public methods now raises an error, having been deprecated
+  since v0.11.0.
+  By `Maximilian Roos <https://github.com/max-sixty>`_
+- :py:func:`~xarray.concat` now requires the ``dim`` argument. Its ``indexers``, ``mode``
+  and ``concat_over`` kwargs have now been removed.
+  By `Deepak Cherian <https://github.com/dcherian>`_
+- Passing a list of colors in ``cmap`` will now raise an error, having been deprecated since
+  v0.6.1.
+- Most xarray objects now define ``__slots__``. This reduces overall RAM usage by ~22%
+  (not counting the underlying numpy buffers); on CPython 3.7/x64, a trivial DataArray
+  has gone down from 1.9kB to 1.5kB.
+
+  Caveats:
+
+  - Pickle streams produced by older versions of xarray can't be loaded using this
+    release, and vice versa.
+  - Any user code that was accessing the ``__dict__`` attribute of
+    xarray objects will break. The best practice to attach custom metadata to xarray
+    objects is to use the ``attrs`` dictionary.
+  - Any user code that defines custom subclasses of xarray classes must now explicitly
+    define ``__slots__`` itself. Subclasses that don't add any attributes must state so
+    by defining ``__slots__ = ()`` right after the class header.
+    Omitting ``__slots__`` will now cause a ``FutureWarning`` to be logged, and will raise an
+    error in a later release.
+
+  (:issue:`3250`) by `Guido Imperiale <https://github.com/crusaderky>`_.
+- The default dimension for :py:meth:`Dataset.groupby`, :py:meth:`Dataset.resample`,
+  :py:meth:`DataArray.groupby` and :py:meth:`DataArray.resample` reductions is now the
+  grouping or resampling dimension.
+- :py:meth:`DataArray.to_dataset` requires ``name`` to be passed as a kwarg (previously ambiguous
+  positional arguments were deprecated)
+- Reindexing with variables of a different dimension now raise an error (previously deprecated)
+- :py:func:`~xarray.broadcast_array` is removed (previously deprecated in favor of
+  :py:func:`~xarray.broadcast`)
+- :py:meth:`Variable.expand_dims` is removed (previously deprecated in favor of
+  :py:meth:`Variable.set_dims`)
 
 New functions/methods
 ~~~~~~~~~~~~~~~~~~~~~
@@ -28,10 +94,16 @@ New functions/methods
   `NEP18 <https://www.numpy.org/neps/nep-0018-array-function-protocol.html>`_ compliant
   numpy-like library (important: read notes about NUMPY_EXPERIMENTAL_ARRAY_FUNCTION in
   the above link). Added explicit test coverage for
-  `sparse <https://github.com/pydata/sparse>`_. (:issue:`3117`, :issue:`3202`)
-  By `Nezar Abdennur <https://github.com/nvictus>`_
+  `sparse <https://github.com/pydata/sparse>`_. (:issue:`3117`, :issue:`3202`).
+  This requires `sparse>=0.8.0`. By `Nezar Abdennur <https://github.com/nvictus>`_
   and `Guido Imperiale <https://github.com/crusaderky>`_.
 
+- :py:meth:`~Dataset.from_dataframe` and :py:meth:`~DataArray.from_series` now
+  support ``sparse=True`` for converting pandas objects into xarray objects
+  wrapping sparse arrays. This is particularly useful with sparsely populated
+  hierarchical indexes. (:issue:`3206`)
+  By `Stephan Hoyer <https://github.com/shoyer>`_.
+
 - The xarray package is now discoverable by mypy (although typing hints coverage is not
   complete yet). mypy type checking is now enforced by CI. Libraries that depend on
   xarray and use mypy can now remove from their setup.cfg the lines::
@@ -45,13 +117,17 @@ New functions/methods
   and `Maximilian Roos <https://github.com/max-sixty>`_.
 
 - Added :py:meth:`DataArray.broadcast_like` and :py:meth:`Dataset.broadcast_like`.
-  By `Deepak Cherian <https://github.com/dcherian>`_ and `David Mertz 
+  By `Deepak Cherian <https://github.com/dcherian>`_ and `David Mertz
   <http://github.com/DavidMertz>`_.
 
-- Dataset plotting API for visualizing dependencies between two `DataArray`s!
+- Dataset plotting API for visualizing dependencies between two DataArrays!
   Currently only :py:meth:`Dataset.plot.scatter` is implemented.
   By `Yohai Bar Sinai <https://github.com/yohai>`_ and `Deepak Cherian <https://github.com/dcherian>`_
 
+- Added :py:meth:`DataArray.head`, :py:meth:`DataArray.tail` and :py:meth:`DataArray.thin`;
+  as well as :py:meth:`Dataset.head`, :py:meth:`Dataset.tail` and :py:meth:`Dataset.thin` methods.
+  (:issue:`319`) By `Gerardo Rivera <https://github.com/dangomelon>`_.
+
 Enhancements
 ~~~~~~~~~~~~
 
@@ -61,19 +137,46 @@ Enhancements
   By `Robert Hetland <https://github.com/hetland>`
 - Added ``join='override'``. This only checks that index sizes are equal among objects and skips
   checking indexes for equality. By `Deepak Cherian <https://github.com/dcherian>`_.
+- Multiple enhancements to :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset`.
+  By `Deepak Cherian <https://github.com/dcherian>`_
+
+  - Added ``compat='override'``. When merging, this option picks the variable from the first dataset
+    and skips all comparisons.
+
+  - Added ``join='override'``. When aligning, this only checks that index sizes are equal among objects
+    and skips checking indexes for equality.
+
+  - :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset` now support the ``join`` kwarg.
+    It is passed down to :py:func:`~xarray.align`.
+
+  - :py:func:`~xarray.concat` now calls :py:func:`~xarray.merge` on variables that are not concatenated
+    (i.e. variables without ``concat_dim`` when ``data_vars`` or ``coords`` are ``"minimal"``).
+    :py:func:`~xarray.concat` passes its new ``compat`` kwarg down to :py:func:`~xarray.merge`.
+    (:issue:`2064`)
 
-- :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset` now support the ``join`` kwarg.
-  It is passed down to :py:func:`~xarray.align`. By `Deepak Cherian <https://github.com/dcherian>`_.
+  Users can avoid a common bottleneck when using :py:func:`~xarray.open_mfdataset` on a large number of
+  files with variables that are known to be aligned and some of which need not be concatenated.
+  Slow equality comparisons can now be avoided, for e.g.::
+
+    data = xr.open_mfdataset(files, concat_dim='time', data_vars='minimal',
+                             coords='minimal', compat='override', join='override')
 
 - In :py:meth:`~xarray.Dataset.to_zarr`, passing ``mode`` is not mandatory if
   ``append_dim`` is set, as it will automatically be set to ``'a'`` internally.
   By `David Brochart <https://github.com/davidbrochart>`_.
+
+- Added the ability to initialize an empty or full DataArray
+  with a single value. (:issue:`277`)
+  By `Gerardo Rivera <https://github.com/dangomelon>`_.
+
 - :py:func:`~xarray.Dataset.to_netcdf()` now supports the ``invalid_netcdf`` kwarg when used
   with ``engine="h5netcdf"``. It is passed to :py:func:`h5netcdf.File`.
   By `Ulrich Herter <https://github.com/ulijh>`_.
 
 - :py:meth:`~xarray.Dataset.drop` now supports keyword arguments; dropping index
-  labels by specifying both ``dim`` and ``labels`` is deprecated (:issue:`2910`).
+  labels by using both ``dim`` and ``labels`` or using a
+  :py:class:`~xarray.core.coordinates.DataArrayCoordinates` object are
+  deprecated (:issue:`2910`).
   By `Gregory Gundersen <https://github.com/gwgundersen/>`_.
 
 - Added examples of :py:meth:`Dataset.set_index` and
@@ -81,9 +184,15 @@ Enhancements
   when the user passes invalid arguments (:issue:`3176`).
   By `Gregory Gundersen <https://github.com/gwgundersen>`_.
 
+- :py:func:`filter_by_attrs` now filters the coordinates as well as the variables.
+  By `Spencer Jones <https://github.com/cspencerjones>`_.
+
 Bug fixes
 ~~~~~~~~~
 
+- Improve "missing dimensions" error message for :py:func:`~xarray.apply_ufunc`
+  (:issue:`2078`).
+  By `Rick Russotto <https://github.com/rdrussotto>`_.
 - :py:meth:`~xarray.DataArray.assign_coords` now supports dictionary arguments
   (:issue:`3231`).
   By `Gregory Gundersen <https://github.com/gwgundersen>`_.
@@ -110,8 +219,12 @@ Bug fixes
 - Fix error that arises when using open_mfdataset on a series of netcdf files
   having differing values for a variable attribute of type list. (:issue:`3034`)
   By `Hasan Ahmad <https://github.com/HasanAhmadQ7>`_.
-                               
-.. _whats-new.0.12.3:
+- Prevent :py:meth:`~xarray.DataArray.argmax` and :py:meth:`~xarray.DataArray.argmin` from calling
+  dask compute (:issue:`3237`). By `Ulrich Herter <https://github.com/ulijh>`_.
+- Plots in 2 dimensions (pcolormesh, contour) now allow to specify levels as numpy
+  array (:issue:`3284`). By `Mathias Hauser <https://github.com/mathause>`_.
+- Fixed bug in :meth:`DataArray.quantile` failing to keep attributes when
+  `keep_attrs` was True (:issue:`3304`). By David Huard `<https://github.com/huard>`_.
 
 Documentation
 ~~~~~~~~~~~~~
@@ -120,6 +233,12 @@ Documentation
   or pushing new commits.
   By `Gregory Gundersen <https://github.com/gwgundersen>`_.
 
+- Fixed documentation to clean up unwanted files created in ``ipython`` examples
+  (:issue:`3227`).
+  By `Gregory Gundersen <https://github.com/gwgundersen/>`_.
+
+.. _whats-new.0.12.3:
+
 v0.12.3 (10 July 2019)
 ----------------------
 
@@ -134,14 +253,14 @@ New functions/methods
   as described in :ref:`reshape.stacking_different`.
   By `Noah Brenowitz <https://github.com/nbren12>`_.
 
+Enhancements
+~~~~~~~~~~~~
+
 - Support for renaming ``Dataset`` variables and dimensions independently
   with :py:meth:`~Dataset.rename_vars` and :py:meth:`~Dataset.rename_dims`
   (:issue:`3026`).
   By `Julia Kent <https://github.com/jukent>`_.
 
-Enhancements
-~~~~~~~~~~~~
-
 - Add ``scales``, ``offsets``, ``units`` and ``descriptions``
   attributes to :py:class:`~xarray.DataArray` returned by
   :py:func:`~xarray.open_rasterio`. (:issue:`3013`)
diff --git a/doc/why-xarray.rst b/doc/why-xarray.rst
index d0a6c591b29..25d558d99d5 100644
--- a/doc/why-xarray.rst
+++ b/doc/why-xarray.rst
@@ -62,9 +62,8 @@ The power of the dataset over a plain dictionary is that, in addition to
 pulling out arrays by name, it is possible to select or combine data along a
 dimension across all arrays simultaneously. Like a
 :py:class:`~pandas.DataFrame`, datasets facilitate array operations with
-heterogeneous data -- the difference is that the arrays in a dataset can not
-only have different data types, but can also have different numbers of
-dimensions.
+heterogeneous data -- the difference is that the arrays in a dataset can have 
+not only different data types, but also different numbers of dimensions.
 
 This data model is borrowed from the netCDF_ file format, which also provides
 xarray with a natural and portable serialization format. NetCDF is very popular
diff --git a/setup.cfg b/setup.cfg
index 6cb58d2b9a2..114f71f4a9f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -25,8 +25,6 @@ ignore=
     E731
     # line break before binary operator
     W503
-    # Unused imports; TODO: Allow typing to work without triggering errors
-    F401
 exclude=
     doc
 
diff --git a/xarray/__init__.py b/xarray/__init__.py
index a3df034f7c7..cdca708e28c 100644
--- a/xarray/__init__.py
+++ b/xarray/__init__.py
@@ -6,7 +6,7 @@
 __version__ = get_versions()["version"]
 del get_versions
 
-from .core.alignment import align, broadcast, broadcast_arrays
+from .core.alignment import align, broadcast
 from .core.common import full_like, zeros_like, ones_like
 from .core.concat import concat
 from .core.combine import combine_by_coords, combine_nested, auto_combine
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 887af0023fb..0d6dedac57e 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -6,6 +6,7 @@
 from pathlib import Path
 from textwrap import dedent
 from typing import (
+    TYPE_CHECKING,
     Callable,
     Dict,
     Hashable,
@@ -13,21 +14,19 @@
     Mapping,
     Tuple,
     Union,
-    TYPE_CHECKING,
 )
 
 import numpy as np
 
-from .. import Dataset, DataArray, backends, conventions, coding
+from .. import DataArray, Dataset, auto_combine, backends, coding, conventions
 from ..core import indexing
-from .. import auto_combine
 from ..core.combine import (
-    combine_by_coords,
-    _nested_combine,
     _infer_concat_order_from_positions,
+    _nested_combine,
+    combine_by_coords,
 )
 from ..core.utils import close_on_error, is_grib_path, is_remote_uri
-from .common import ArrayWriter, AbstractDataStore
+from .common import AbstractDataStore, ArrayWriter
 from .locks import _get_scheduler
 
 if TYPE_CHECKING:
@@ -695,6 +694,8 @@ def open_dataarray(
 
 
 class _MultiFileCloser:
+    __slots__ = ("file_objs",)
+
     def __init__(self, file_objs):
         self.file_objs = file_objs
 
@@ -760,7 +761,7 @@ def open_mfdataset(
         `xarray.auto_combine` is used, but in the future this behavior will 
         switch to use `xarray.combine_by_coords` by default.
     compat : {'identical', 'equals', 'broadcast_equals',
-              'no_conflicts'}, optional
+              'no_conflicts', 'override'}, optional
         String indicating how to compare variables of the same name for
         potential conflicts when merging:
          * 'broadcast_equals': all values must be equal when variables are
@@ -771,6 +772,7 @@ def open_mfdataset(
          * 'no_conflicts': only values which are not null in both datasets
            must be equal. The returned dataset then contains the combination
            of all non-null values.
+         * 'override': skip comparing and pick variable from first dataset
     preprocess : callable, optional
         If provided, call this function on each dataset prior to concatenation.
         You can find the file-name from which each dataset was loaded in
@@ -913,7 +915,7 @@ def open_mfdataset(
             # Remove this after deprecation cycle from #2616 is complete
             basic_msg = dedent(
                 """\
-            In xarray version 0.13 the default behaviour of `open_mfdataset`
+            In xarray version 0.14 the default behaviour of `open_mfdataset`
             will change. To retain the existing behavior, pass
             combine='nested'. To use future default behavior, pass
             combine='by_coords'. See
diff --git a/xarray/backends/common.py b/xarray/backends/common.py
index 7ee11052192..455b77907f9 100644
--- a/xarray/backends/common.py
+++ b/xarray/backends/common.py
@@ -68,12 +68,16 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500
 
 
 class BackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed):
+    __slots__ = ()
+
     def __array__(self, dtype=None):
         key = indexing.BasicIndexer((slice(None),) * self.ndim)
         return np.asarray(self[key], dtype=dtype)
 
 
 class AbstractDataStore(Mapping):
+    __slots__ = ()
+
     def __iter__(self):
         return iter(self.variables)
 
@@ -165,6 +169,8 @@ def __exit__(self, exception_type, exception_value, traceback):
 
 
 class ArrayWriter:
+    __slots__ = ("sources", "targets", "regions", "lock")
+
     def __init__(self, lock=None):
         self.sources = []
         self.targets = []
@@ -205,6 +211,8 @@ def sync(self, compute=True):
 
 
 class AbstractWritableDataStore(AbstractDataStore):
+    __slots__ = ()
+
     def encode(self, variables, attributes):
         """
         Encode the variables and attributes in this store
@@ -371,6 +379,8 @@ def set_dimensions(self, variables, unlimited_dims=None):
 
 
 class WritableCFDataStore(AbstractWritableDataStore):
+    __slots__ = ()
+
     def encode(self, variables, attributes):
         # All NetCDF files get CF encoded by default, without this attempting
         # to write times, for example, would fail.
diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
index edc28c7b0ff..0c5fe9087d2 100644
--- a/xarray/backends/h5netcdf_.py
+++ b/xarray/backends/h5netcdf_.py
@@ -5,7 +5,7 @@
 
 from .. import Variable
 from ..core import indexing
-from ..core.utils import FrozenOrderedDict, close_on_error
+from ..core.utils import FrozenOrderedDict
 from .common import WritableCFDataStore
 from .file_manager import CachingFileManager
 from .locks import HDF5_LOCK, combine_locks, ensure_lock, get_write_lock
diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py
index 9866a2fe344..813942c2f32 100644
--- a/xarray/backends/netCDF4_.py
+++ b/xarray/backends/netCDF4_.py
@@ -10,7 +10,7 @@
 from .. import Variable, coding
 from ..coding.variables import pop_to
 from ..core import indexing
-from ..core.utils import FrozenOrderedDict, close_on_error, is_remote_uri
+from ..core.utils import FrozenOrderedDict, is_remote_uri
 from .common import (
     BackendArray,
     WritableCFDataStore,
@@ -30,6 +30,8 @@
 
 
 class BaseNetCDF4Array(BackendArray):
+    __slots__ = ("datastore", "dtype", "shape", "variable_name")
+
     def __init__(self, variable_name, datastore):
         self.datastore = datastore
         self.variable_name = variable_name
@@ -52,8 +54,13 @@ def __setitem__(self, key, value):
             if self.datastore.autoclose:
                 self.datastore.close(needs_lock=False)
 
+    def get_array(self, needs_lock=True):
+        raise NotImplementedError("Virtual Method")
+
 
 class NetCDF4ArrayWrapper(BaseNetCDF4Array):
+    __slots__ = ()
+
     def get_array(self, needs_lock=True):
         ds = self.datastore._acquire(needs_lock)
         variable = ds.variables[self.variable_name]
@@ -294,6 +301,17 @@ class NetCDF4DataStore(WritableCFDataStore):
     This store supports NetCDF3, NetCDF4 and OpenDAP datasets.
     """
 
+    __slots__ = (
+        "autoclose",
+        "format",
+        "is_remote",
+        "lock",
+        "_filename",
+        "_group",
+        "_manager",
+        "_mode",
+    )
+
     def __init__(
         self, manager, group=None, mode=None, lock=NETCDF4_PYTHON_LOCK, autoclose=False
     ):
diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py
index 1d832d4f671..316f13470b7 100644
--- a/xarray/backends/rasterio_.py
+++ b/xarray/backends/rasterio_.py
@@ -322,11 +322,14 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc
         attrs["units"] = riods.units
 
     # Parse extra metadata from tags, if supported
-    parsers = {"ENVI": _parse_envi}
+    parsers = {"ENVI": _parse_envi, "GTiff": lambda m: m}
 
     driver = riods.driver
     if driver in parsers:
-        meta = parsers[driver](riods.tags(ns=driver))
+        if driver == "GTiff":
+            meta = parsers[driver](riods.tags())
+        else:
+            meta = parsers[driver](riods.tags(ns=driver))
 
         for k, v in meta.items():
             # Add values as coordinates if they match the band count,
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 31997d258c8..9a115de55ef 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -29,6 +29,8 @@ def _encode_zarr_attr_value(value):
 
 
 class ZarrArrayWrapper(BackendArray):
+    __slots__ = ("datastore", "dtype", "shape", "variable_name")
+
     def __init__(self, variable_name, datastore):
         self.datastore = datastore
         self.variable_name = variable_name
@@ -231,6 +233,15 @@ class ZarrStore(AbstractWritableDataStore):
     """Store for reading and writing data via zarr
     """
 
+    __slots__ = (
+        "append_dim",
+        "ds",
+        "_consolidate_on_close",
+        "_group",
+        "_read_only",
+        "_synchronizer",
+    )
+
     @classmethod
     def open_group(
         cls,
diff --git a/xarray/conventions.py b/xarray/conventions.py
index c15e5c40e73..1e40d254e96 100644
--- a/xarray/conventions.py
+++ b/xarray/conventions.py
@@ -31,6 +31,8 @@ class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin):
     dtype('int16')
     """
 
+    __slots__ = ("array",)
+
     def __init__(self, array):
         self.array = indexing.as_indexable(array)
 
@@ -60,6 +62,8 @@ class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin):
     dtype('bool')
     """
 
+    __slots__ = ("array",)
+
     def __init__(self, array):
         self.array = indexing.as_indexable(array)
 
diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py
index 03a6d37b01e..8838e71e6ca 100644
--- a/xarray/core/accessor_str.py
+++ b/xarray/core/accessor_str.py
@@ -75,6 +75,8 @@ class StringAccessor:
 
     """
 
+    __slots__ = ("_obj",)
+
     def __init__(self, obj):
         self._obj = obj
 
diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
index bb44f48fb9b..d63718500bc 100644
--- a/xarray/core/alignment.py
+++ b/xarray/core/alignment.py
@@ -1,9 +1,8 @@
 import functools
 import operator
-import warnings
 from collections import OrderedDict, defaultdict
 from contextlib import suppress
-from typing import Any, Dict, Hashable, Mapping, Optional, Tuple, Union, TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, Dict, Hashable, Mapping, Optional, Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -14,8 +13,8 @@
 from .variable import IndexVariable, Variable
 
 if TYPE_CHECKING:
-    from .dataarray import DataArray
-    from .dataset import Dataset
+    from .dataarray import DataArray  # noqa: F401
+    from .dataset import Dataset  # noqa: F401
 
 
 def _get_joiner(join):
@@ -387,14 +386,9 @@ def reindex_variables(
 
     for dim, indexer in indexers.items():
         if isinstance(indexer, DataArray) and indexer.dims != (dim,):
-            warnings.warn(
+            raise ValueError(
                 "Indexer has dimensions {:s} that are different "
-                "from that to be indexed along {:s}. "
-                "This will behave differently in the future.".format(
-                    str(indexer.dims), dim
-                ),
-                FutureWarning,
-                stacklevel=3,
+                "from that to be indexed along {:s}".format(str(indexer.dims), dim)
             )
 
         target = new_indexes[dim] = utils.safe_cast_to_index(indexers[dim])
@@ -592,14 +586,3 @@ def broadcast(*args, exclude=None):
         result.append(_broadcast_helper(arg, exclude, dims_map, common_coords))
 
     return tuple(result)
-
-
-def broadcast_arrays(*args):
-    import warnings
-
-    warnings.warn(
-        "xarray.broadcast_arrays is deprecated: use " "xarray.broadcast instead",
-        DeprecationWarning,
-        stacklevel=2,
-    )
-    return broadcast(*args)
diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py
index 5e8c8758ef5..137db034c95 100644
--- a/xarray/core/arithmetic.py
+++ b/xarray/core/arithmetic.py
@@ -14,6 +14,8 @@ class SupportsArithmetic:
     Used by Dataset, DataArray, Variable and GroupBy.
     """
 
+    __slots__ = ()
+
     # TODO: implement special methods for arithmetic here rather than injecting
     # them in xarray/core/ops.py. Ideally, do so by inheriting from
     # numpy.lib.mixins.NDArrayOperatorsMixin.
diff --git a/xarray/core/combine.py b/xarray/core/combine.py
index 3aae12c3b66..be7fd86555c 100644
--- a/xarray/core/combine.py
+++ b/xarray/core/combine.py
@@ -5,10 +5,10 @@
 
 import pandas as pd
 
+from . import dtypes
+from .concat import concat
 from .dataarray import DataArray
 from .dataset import Dataset
-from .concat import concat
-from . import dtypes
 from .merge import merge
 
 
@@ -243,6 +243,7 @@ def _combine_1d(
                 dim=concat_dim,
                 data_vars=data_vars,
                 coords=coords,
+                compat=compat,
                 fill_value=fill_value,
                 join=join,
             )
@@ -351,7 +352,7 @@ def combine_nested(
         Must be the same length as the depth of the list passed to
         ``datasets``.
     compat : {'identical', 'equals', 'broadcast_equals',
-              'no_conflicts'}, optional
+              'no_conflicts', 'override'}, optional
         String indicating how to compare variables of the same name for
         potential merge conflicts:
 
@@ -363,6 +364,7 @@ def combine_nested(
         - 'no_conflicts': only values which are not null in both datasets
           must be equal. The returned dataset then contains the combination
           of all non-null values.
+        - 'override': skip comparing and pick variable from first dataset
     data_vars : {'minimal', 'different', 'all' or list of str}, optional
         Details are in the documentation of concat
     coords : {'minimal', 'different', 'all' or list of str}, optional
@@ -504,7 +506,7 @@ def combine_by_coords(
     datasets : sequence of xarray.Dataset
         Dataset objects to combine.
     compat : {'identical', 'equals', 'broadcast_equals',
-              'no_conflicts'}, optional
+              'no_conflicts', 'override'}, optional
         String indicating how to compare variables of the same name for
         potential conflicts:
 
@@ -516,6 +518,7 @@ def combine_by_coords(
         - 'no_conflicts': only values which are not null in both datasets
           must be equal. The returned dataset then contains the combination
           of all non-null values.
+        - 'override': skip comparing and pick variable from first dataset
     data_vars : {'minimal', 'different', 'all' or list of str}, optional
         Details are in the documentation of concat
     coords : {'minimal', 'different', 'all' or list of str}, optional
@@ -598,6 +601,7 @@ def combine_by_coords(
             concat_dims=concat_dims,
             data_vars=data_vars,
             coords=coords,
+            compat=compat,
             fill_value=fill_value,
             join=join,
         )
@@ -667,7 +671,7 @@ def auto_combine(
         component files. Set ``concat_dim=None`` explicitly to disable
         concatenation.
     compat : {'identical', 'equals', 'broadcast_equals',
-             'no_conflicts'}, optional
+             'no_conflicts', 'override'}, optional
         String indicating how to compare variables of the same name for
         potential conflicts:
         - 'broadcast_equals': all values must be equal when variables are
@@ -678,6 +682,7 @@ def auto_combine(
         - 'no_conflicts': only values which are not null in both datasets
           must be equal. The returned dataset then contains the combination
           of all non-null values.
+        - 'override': skip comparing and pick variable from first dataset
     data_vars : {'minimal', 'different', 'all' or list of str}, optional
         Details are in the documentation of concat
     coords : {'minimal', 'different', 'all' o list of str}, optional
@@ -711,7 +716,7 @@ def auto_combine(
     if not from_openmfds:
         basic_msg = dedent(
             """\
-        In xarray version 0.13 `auto_combine` will be deprecated. See
+        In xarray version 0.14 `auto_combine` will be deprecated. See
         http://xarray.pydata.org/en/stable/combining.html#combining-multi"""
         )
         warnings.warn(basic_msg, FutureWarning, stacklevel=2)
@@ -753,7 +758,7 @@ def auto_combine(
         message += dedent(
             """\
         The datasets supplied require both concatenation and merging. From
-        xarray version 0.13 this will operation will require either using the
+        xarray version 0.14 this will operation will require either using the
         new `combine_nested` function (or the `combine='nested'` option to
         open_mfdataset), with a nested list structure such that you can combine
         along the dimensions {}. Alternatively if your datasets have global
@@ -832,6 +837,7 @@ def _old_auto_combine(
                 dim=dim,
                 data_vars=data_vars,
                 coords=coords,
+                compat=compat,
                 fill_value=fill_value,
                 join=join,
             )
@@ -850,6 +856,7 @@ def _auto_concat(
     coords="different",
     fill_value=dtypes.NA,
     join="outer",
+    compat="no_conflicts",
 ):
     if len(datasets) == 1 and dim is None:
         # There is nothing more to combine, so kick out early.
@@ -876,5 +883,10 @@ def _auto_concat(
                 )
             dim, = concat_dims
         return concat(
-            datasets, dim=dim, data_vars=data_vars, coords=coords, fill_value=fill_value
+            datasets,
+            dim=dim,
+            data_vars=data_vars,
+            coords=coords,
+            fill_value=fill_value,
+            compat=compat,
         )
diff --git a/xarray/core/common.py b/xarray/core/common.py
index 2e834492521..ab9e7616ce1 100644
--- a/xarray/core/common.py
+++ b/xarray/core/common.py
@@ -1,3 +1,4 @@
+import warnings
 from collections import OrderedDict
 from contextlib import suppress
 from textwrap import dedent
@@ -35,6 +36,8 @@
 
 
 class ImplementsArrayReduce:
+    __slots__ = ()
+
     @classmethod
     def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool):
         if include_skipna:
@@ -72,6 +75,8 @@ def wrapped_func(self, dim=None, axis=None, **kwargs):  # type: ignore
 
 
 class ImplementsDatasetReduce:
+    __slots__ = ()
+
     @classmethod
     def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool):
         if include_skipna:
@@ -110,6 +115,8 @@ class AbstractArray(ImplementsArrayReduce):
     """Shared base class for DataArray and Variable.
     """
 
+    __slots__ = ()
+
     def __bool__(self: Any) -> bool:
         return bool(self.values)
 
@@ -180,7 +187,25 @@ class AttrAccessMixin:
     """Mixin class that allows getting keys with attribute access
     """
 
-    _initialized = False
+    __slots__ = ()
+
+    def __init_subclass__(cls):
+        """Verify that all subclasses explicitly define ``__slots__``. If they don't,
+        raise error in the core xarray module and a FutureWarning in third-party
+        extensions.
+        This check is only triggered in Python 3.6+.
+        """
+        if not hasattr(object.__new__(cls), "__dict__"):
+            cls.__setattr__ = cls._setattr_slots
+        elif cls.__module__.startswith("xarray."):
+            raise AttributeError("%s must explicitly define __slots__" % cls.__name__)
+        else:
+            cls.__setattr__ = cls._setattr_dict
+            warnings.warn(
+                "xarray subclass %s should explicitly define __slots__" % cls.__name__,
+                FutureWarning,
+                stacklevel=2,
+            )
 
     @property
     def _attr_sources(self) -> List[Mapping[Hashable, Any]]:
@@ -195,7 +220,7 @@ def _item_sources(self) -> List[Mapping[Hashable, Any]]:
         return []
 
     def __getattr__(self, name: str) -> Any:
-        if name != "__setstate__":
+        if name not in {"__dict__", "__setstate__"}:
             # this avoids an infinite loop when pickle looks for the
             # __setstate__ attribute before the xarray object is initialized
             for source in self._attr_sources:
@@ -205,20 +230,52 @@ def __getattr__(self, name: str) -> Any:
             "%r object has no attribute %r" % (type(self).__name__, name)
         )
 
-    def __setattr__(self, name: str, value: Any) -> None:
-        if self._initialized:
-            try:
-                # Allow setting instance variables if they already exist
-                # (e.g., _attrs). We use __getattribute__ instead of hasattr
-                # to avoid key lookups with attribute-style access.
-                self.__getattribute__(name)
-            except AttributeError:
-                raise AttributeError(
-                    "cannot set attribute %r on a %r object. Use __setitem__ "
-                    "style assignment (e.g., `ds['name'] = ...`) instead to "
-                    "assign variables." % (name, type(self).__name__)
-                )
+    # This complicated three-method design boosts overall performance of simple
+    # operations - particularly DataArray methods that perform a _to_temp_dataset()
+    # round-trip - by a whopping 8% compared to a single method that checks
+    # hasattr(self, "__dict__") at runtime before every single assignment (like
+    # _setattr_py35 does). All of this is just temporary until the FutureWarning can be
+    # changed into a hard crash.
+    def _setattr_dict(self, name: str, value: Any) -> None:
+        """Deprecated third party subclass (see ``__init_subclass__`` above)
+        """
         object.__setattr__(self, name, value)
+        if name in self.__dict__:
+            # Custom, non-slotted attr, or improperly assigned variable?
+            warnings.warn(
+                "Setting attribute %r on a %r object. Explicitly define __slots__ "
+                "to suppress this warning for legitimate custom attributes and "
+                "raise an error when attempting variables assignments."
+                % (name, type(self).__name__),
+                FutureWarning,
+                stacklevel=2,
+            )
+
+    def _setattr_slots(self, name: str, value: Any) -> None:
+        """Objects with ``__slots__`` raise AttributeError if you try setting an
+        undeclared attribute. This is desirable, but the error message could use some
+        improvement.
+        """
+        try:
+            object.__setattr__(self, name, value)
+        except AttributeError as e:
+            # Don't accidentally shadow custom AttributeErrors, e.g.
+            # DataArray.dims.setter
+            if str(e) != "%r object has no attribute %r" % (type(self).__name__, name):
+                raise
+            raise AttributeError(
+                "cannot set attribute %r on a %r object. Use __setitem__ style"
+                "assignment (e.g., `ds['name'] = ...`) instead of assigning variables."
+                % (name, type(self).__name__)
+            ) from e
+
+    def _setattr_py35(self, name: str, value: Any) -> None:
+        if hasattr(self, "__dict__"):
+            return self._setattr_dict(name, value)
+        return self._setattr_slots(name, value)
+
+    # Overridden in Python >=3.6 by __init_subclass__
+    __setattr__ = _setattr_py35
 
     def __dir__(self) -> List[str]:
         """Provide method name lookup and completion. Only provide 'public'
@@ -283,6 +340,8 @@ def get_squeeze_dims(
 class DataWithCoords(SupportsArithmetic, AttrAccessMixin):
     """Shared base class for Dataset and DataArray."""
 
+    __slots__ = ()
+
     _rolling_exp_cls = RollingExp
 
     def squeeze(
diff --git a/xarray/core/computation.py b/xarray/core/computation.py
index cb3a0d5db7d..424ab5be87a 100644
--- a/xarray/core/computation.py
+++ b/xarray/core/computation.py
@@ -7,6 +7,7 @@
 from collections import Counter, OrderedDict
 from distutils.version import LooseVersion
 from typing import (
+    TYPE_CHECKING,
     AbstractSet,
     Any,
     Callable,
@@ -17,7 +18,6 @@
     Sequence,
     Tuple,
     Union,
-    TYPE_CHECKING,
 )
 
 import numpy as np
@@ -51,6 +51,14 @@ class _UFuncSignature:
         Core dimension names on each output variable.
     """
 
+    __slots__ = (
+        "input_core_dims",
+        "output_core_dims",
+        "_all_input_core_dims",
+        "_all_output_core_dims",
+        "_all_core_dims",
+    )
+
     def __init__(self, input_core_dims, output_core_dims=((),)):
         self.input_core_dims = tuple(tuple(a) for a in input_core_dims)
         self.output_core_dims = tuple(tuple(a) for a in output_core_dims)
@@ -502,9 +510,10 @@ def broadcast_compat_data(variable, broadcast_dims, core_dims):
     missing_core_dims = [d for d in core_dims if d not in set_old_dims]
     if missing_core_dims:
         raise ValueError(
-            "operand to apply_ufunc has required core dimensions %r, but "
-            "some of these are missing on the input variable:  %r"
-            % (list(core_dims), missing_core_dims)
+            "operand to apply_ufunc has required core dimensions {}, but "
+            "some of these dimensions are absent on an input variable: {}".format(
+                list(core_dims), missing_core_dims
+            )
         )
 
     set_new_dims = set(new_dims)
@@ -648,7 +657,6 @@ def func(*arrays):
 def _apply_blockwise(
     func, args, input_dims, output_dims, signature, output_dtypes, output_sizes=None
 ):
-    import dask.array as da
     from .dask_array_compat import blockwise
 
     if signature.num_outputs > 1:
diff --git a/xarray/core/concat.py b/xarray/core/concat.py
index 014b615f2a7..e68c247d880 100644
--- a/xarray/core/concat.py
+++ b/xarray/core/concat.py
@@ -1,24 +1,21 @@
-import warnings
 from collections import OrderedDict
 
 import pandas as pd
 
-from . import utils, dtypes
+from . import dtypes, utils
 from .alignment import align
+from .merge import unique_variable, _VALID_COMPAT
 from .variable import IndexVariable, Variable, as_variable
 from .variable import concat as concat_vars
 
 
 def concat(
     objs,
-    dim=None,
+    dim,
     data_vars="all",
     coords="different",
     compat="equals",
     positions=None,
-    indexers=None,
-    mode=None,
-    concat_over=None,
     fill_value=dtypes.NA,
     join="outer",
 ):
@@ -63,12 +60,19 @@ def concat(
             those corresponding to other dimensions.
           * list of str: The listed coordinate variables will be concatenated,
             in addition to the 'minimal' coordinates.
-    compat : {'equals', 'identical'}, optional
-        String indicating how to compare non-concatenated variables and
-        dataset global attributes for potential conflicts. 'equals' means
-        that all variable values and dimensions must be the same;
-        'identical' means that variable attributes and global attributes
-        must also be equal.
+    compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts', 'override'}, optional
+        String indicating how to compare non-concatenated variables of the same name for
+        potential conflicts. This is passed down to merge.
+
+        - 'broadcast_equals': all values must be equal when variables are
+          broadcast against each other to ensure common dimensions.
+        - 'equals': all values and dimensions must be the same.
+        - 'identical': all values, dimensions and attributes must be the
+          same.
+        - 'no_conflicts': only values which are not null in both datasets
+          must be equal. The returned dataset then contains the combination
+          of all non-null values.
+        - 'override': skip comparing and pick variable from first dataset
     positions : None or list of integer arrays, optional
         List of integer arrays which specifies the integer positions to which
         to assign each dataset along the concatenated dimension. If not
@@ -111,36 +115,10 @@ def concat(
     except StopIteration:
         raise ValueError("must supply at least one object to concatenate")
 
-    if dim is None:
-        warnings.warn(
-            "the `dim` argument to `concat` will be required "
-            "in a future version of xarray; for now, setting it to "
-            "the old default of 'concat_dim'",
-            FutureWarning,
-            stacklevel=2,
-        )
-        dim = "concat_dims"
-
-    if indexers is not None:  # pragma: no cover
-        warnings.warn(
-            "indexers has been renamed to positions; the alias "
-            "will be removed in a future version of xarray",
-            FutureWarning,
-            stacklevel=2,
-        )
-        positions = indexers
-
-    if mode is not None:
-        raise ValueError(
-            "`mode` is no longer a valid argument to "
-            "xarray.concat; it has been split into the "
-            "`data_vars` and `coords` arguments"
-        )
-    if concat_over is not None:
+    if compat not in _VALID_COMPAT:
         raise ValueError(
-            "`concat_over` is no longer a valid argument to "
-            "xarray.concat; it has been split into the "
-            "`data_vars` and `coords` arguments"
+            "compat=%r invalid: must be 'broadcast_equals', 'equals', 'identical', 'no_conflicts' or 'override'"
+            % compat
         )
 
     if isinstance(first_obj, DataArray):
@@ -179,23 +157,39 @@ def _calc_concat_dim_coord(dim):
     return dim, coord
 
 
-def _calc_concat_over(datasets, dim, data_vars, coords):
+def _calc_concat_over(datasets, dim, dim_names, data_vars, coords, compat):
     """
     Determine which dataset variables need to be concatenated in the result,
-    and which can simply be taken from the first dataset.
     """
     # Return values
     concat_over = set()
     equals = {}
 
-    if dim in datasets[0]:
+    if dim in dim_names:
+        concat_over_existing_dim = True
         concat_over.add(dim)
+    else:
+        concat_over_existing_dim = False
+
+    concat_dim_lengths = []
     for ds in datasets:
+        if concat_over_existing_dim:
+            if dim not in ds.dims:
+                if dim in ds:
+                    ds = ds.set_coords(dim)
+                else:
+                    raise ValueError("%r is not present in all datasets" % dim)
         concat_over.update(k for k, v in ds.variables.items() if dim in v.dims)
+        concat_dim_lengths.append(ds.dims.get(dim, 1))
 
     def process_subset_opt(opt, subset):
         if isinstance(opt, str):
             if opt == "different":
+                if compat == "override":
+                    raise ValueError(
+                        "Cannot specify both %s='different' and compat='override'."
+                        % subset
+                    )
                 # all nonindexes that are not the same in each dataset
                 for k in getattr(datasets[0], subset):
                     if k not in concat_over:
@@ -209,7 +203,7 @@ def process_subset_opt(opt, subset):
                         for ds_rhs in datasets[1:]:
                             v_rhs = ds_rhs.variables[k].compute()
                             computed.append(v_rhs)
-                            if not v_lhs.equals(v_rhs):
+                            if not getattr(v_lhs, compat)(v_rhs):
                                 concat_over.add(k)
                                 equals[k] = False
                                 # computed variables are not to be re-computed
@@ -245,7 +239,29 @@ def process_subset_opt(opt, subset):
 
     process_subset_opt(data_vars, "data_vars")
     process_subset_opt(coords, "coords")
-    return concat_over, equals
+    return concat_over, equals, concat_dim_lengths
+
+
+# determine dimensional coordinate names and a dict mapping name to DataArray
+def _parse_datasets(datasets):
+
+    dims = set()
+    all_coord_names = set()
+    data_vars = set()  # list of data_vars
+    dim_coords = dict()  # maps dim name to variable
+    dims_sizes = {}  # shared dimension sizes to expand variables
+
+    for ds in datasets:
+        dims_sizes.update(ds.dims)
+        all_coord_names.update(ds.coords)
+        data_vars.update(ds.data_vars)
+
+        for dim in set(ds.dims) - dims:
+            if dim not in dim_coords:
+                dim_coords[dim] = ds.coords[dim].variable
+        dims = dims | set(ds.dims)
+
+    return dim_coords, dims_sizes, all_coord_names, data_vars
 
 
 def _dataset_concat(
@@ -263,11 +279,6 @@ def _dataset_concat(
     """
     from .dataset import Dataset
 
-    if compat not in ["equals", "identical"]:
-        raise ValueError(
-            "compat=%r invalid: must be 'equals' " "or 'identical'" % compat
-        )
-
     dim, coord = _calc_concat_dim_coord(dim)
     # Make sure we're working on a copy (we'll be loading variables)
     datasets = [ds.copy() for ds in datasets]
@@ -275,62 +286,65 @@ def _dataset_concat(
         *datasets, join=join, copy=False, exclude=[dim], fill_value=fill_value
     )
 
-    concat_over, equals = _calc_concat_over(datasets, dim, data_vars, coords)
+    dim_coords, dims_sizes, coord_names, data_names = _parse_datasets(datasets)
+    dim_names = set(dim_coords)
+    unlabeled_dims = dim_names - coord_names
 
-    def insert_result_variable(k, v):
-        assert isinstance(v, Variable)
-        if k in datasets[0].coords:
-            result_coord_names.add(k)
-        result_vars[k] = v
+    both_data_and_coords = coord_names & data_names
+    if both_data_and_coords:
+        raise ValueError(
+            "%r is a coordinate in some datasets but not others." % both_data_and_coords
+        )
+    # we don't want the concat dimension in the result dataset yet
+    dim_coords.pop(dim, None)
+    dims_sizes.pop(dim, None)
+
+    # case where concat dimension is a coordinate or data_var but not a dimension
+    if (dim in coord_names or dim in data_names) and dim not in dim_names:
+        datasets = [ds.expand_dims(dim) for ds in datasets]
+
+    # determine which variables to concatentate
+    concat_over, equals, concat_dim_lengths = _calc_concat_over(
+        datasets, dim, dim_names, data_vars, coords, compat
+    )
+
+    # determine which variables to merge, and then merge them according to compat
+    variables_to_merge = (coord_names | data_names) - concat_over - dim_names
+
+    result_vars = {}
+    if variables_to_merge:
+        to_merge = {var: [] for var in variables_to_merge}
+
+        for ds in datasets:
+            absent_merge_vars = variables_to_merge - set(ds.variables)
+            if absent_merge_vars:
+                raise ValueError(
+                    "variables %r are present in some datasets but not others. "
+                    % absent_merge_vars
+                )
 
-    # create the new dataset and add constant variables
-    result_vars = OrderedDict()
-    result_coord_names = set(datasets[0].coords)
+            for var in variables_to_merge:
+                to_merge[var].append(ds.variables[var])
+
+        for var in variables_to_merge:
+            result_vars[var] = unique_variable(
+                var, to_merge[var], compat=compat, equals=equals.get(var, None)
+            )
+    else:
+        result_vars = OrderedDict()
+    result_vars.update(dim_coords)
+
+    # assign attrs and encoding from first dataset
     result_attrs = datasets[0].attrs
     result_encoding = datasets[0].encoding
 
-    for k, v in datasets[0].variables.items():
-        if k not in concat_over:
-            insert_result_variable(k, v)
-
-    # check that global attributes and non-concatenated variables are fixed
-    # across all datasets
+    # check that global attributes are fixed across all datasets if necessary
     for ds in datasets[1:]:
         if compat == "identical" and not utils.dict_equiv(ds.attrs, result_attrs):
-            raise ValueError("dataset global attributes not equal")
-        for k, v in ds.variables.items():
-            if k not in result_vars and k not in concat_over:
-                raise ValueError("encountered unexpected variable %r" % k)
-            elif (k in result_coord_names) != (k in ds.coords):
-                raise ValueError(
-                    "%r is a coordinate in some datasets but not " "others" % k
-                )
-            elif k in result_vars and k != dim:
-                # Don't use Variable.identical as it internally invokes
-                # Variable.equals, and we may already know the answer
-                if compat == "identical" and not utils.dict_equiv(
-                    v.attrs, result_vars[k].attrs
-                ):
-                    raise ValueError("variable %s not identical across datasets" % k)
-
-                # Proceed with equals()
-                try:
-                    # May be populated when using the "different" method
-                    is_equal = equals[k]
-                except KeyError:
-                    result_vars[k].load()
-                    is_equal = v.equals(result_vars[k])
-                if not is_equal:
-                    raise ValueError("variable %s not equal across datasets" % k)
+            raise ValueError("Dataset global attributes not equal.")
 
     # we've already verified everything is consistent; now, calculate
     # shared dimension sizes so we can expand the necessary variables
-    dim_lengths = [ds.dims.get(dim, 1) for ds in datasets]
-    non_concat_dims = {}
-    for ds in datasets:
-        non_concat_dims.update(ds.dims)
-    non_concat_dims.pop(dim, None)
-
     def ensure_common_dims(vars):
         # ensure each variable with the given name shares the same
         # dimensions and the same shape for all of them except along the
@@ -338,25 +352,27 @@ def ensure_common_dims(vars):
         common_dims = tuple(pd.unique([d for v in vars for d in v.dims]))
         if dim not in common_dims:
             common_dims = (dim,) + common_dims
-        for var, dim_len in zip(vars, dim_lengths):
+        for var, dim_len in zip(vars, concat_dim_lengths):
             if var.dims != common_dims:
-                common_shape = tuple(
-                    non_concat_dims.get(d, dim_len) for d in common_dims
-                )
+                common_shape = tuple(dims_sizes.get(d, dim_len) for d in common_dims)
                 var = var.set_dims(common_dims, common_shape)
             yield var
 
     # stack up each variable to fill-out the dataset (in order)
+    # n.b. this loop preserves variable order, needed for groupby.
     for k in datasets[0].variables:
         if k in concat_over:
             vars = ensure_common_dims([ds.variables[k] for ds in datasets])
             combined = concat_vars(vars, dim, positions)
-            insert_result_variable(k, combined)
+            assert isinstance(combined, Variable)
+            result_vars[k] = combined
 
     result = Dataset(result_vars, attrs=result_attrs)
-    result = result.set_coords(result_coord_names)
+    result = result.set_coords(coord_names)
     result.encoding = result_encoding
 
+    result = result.drop(unlabeled_dims, errors="ignore")
+
     if coord is not None:
         # add concat dimension last to ensure that its in the final Dataset
         result[coord.name] = coord
@@ -378,7 +394,7 @@ def _dataarray_concat(
 
     if data_vars != "all":
         raise ValueError(
-            "data_vars is not a valid argument when " "concatenating DataArray objects"
+            "data_vars is not a valid argument when concatenating DataArray objects"
         )
 
     datasets = []
diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py
index 562d30dd6c7..ddea5739fff 100644
--- a/xarray/core/coordinates.py
+++ b/xarray/core/coordinates.py
@@ -4,12 +4,12 @@
     TYPE_CHECKING,
     Any,
     Hashable,
-    Mapping,
     Iterator,
-    Union,
+    Mapping,
+    Sequence,
     Set,
     Tuple,
-    Sequence,
+    Union,
     cast,
 )
 
@@ -35,7 +35,7 @@
 
 
 class AbstractCoordinates(Mapping[Hashable, "DataArray"]):
-    _data = None  # type: Union["DataArray", "Dataset"]
+    __slots__ = ()
 
     def __getitem__(self, key: Hashable) -> "DataArray":
         raise NotImplementedError()
@@ -53,7 +53,7 @@ def dims(self) -> Union[Mapping[Hashable, int], Tuple[Hashable, ...]]:
 
     @property
     def indexes(self) -> Indexes:
-        return self._data.indexes
+        return self._data.indexes  # type: ignore
 
     @property
     def variables(self):
@@ -108,9 +108,9 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index:
             raise ValueError("no valid index for a 0-dimensional object")
         elif len(ordered_dims) == 1:
             (dim,) = ordered_dims
-            return self._data.get_index(dim)
+            return self._data.get_index(dim)  # type: ignore
         else:
-            indexes = [self._data.get_index(k) for k in ordered_dims]
+            indexes = [self._data.get_index(k) for k in ordered_dims]  # type: ignore
             names = list(ordered_dims)
             return pd.MultiIndex.from_product(indexes, names=names)
 
@@ -187,7 +187,7 @@ class DatasetCoordinates(AbstractCoordinates):
     objects.
     """
 
-    _data = None  # type: Dataset
+    __slots__ = ("_data",)
 
     def __init__(self, dataset: "Dataset"):
         self._data = dataset
@@ -258,7 +258,7 @@ class DataArrayCoordinates(AbstractCoordinates):
     dimensions and the values given by corresponding DataArray objects.
     """
 
-    _data = None  # type: DataArray
+    __slots__ = ("_data",)
 
     def __init__(self, dataarray: "DataArray"):
         self._data = dataarray
@@ -314,6 +314,8 @@ class LevelCoordinatesSource(Mapping[Hashable, Any]):
     by any public methods.
     """
 
+    __slots__ = ("_data",)
+
     def __init__(self, data_object: "Union[DataArray, Dataset]"):
         self._data = data_object
 
diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py
index 5d4ff849b57..fe2cdc5c553 100644
--- a/xarray/core/dask_array_compat.py
+++ b/xarray/core/dask_array_compat.py
@@ -4,7 +4,6 @@
 import numpy as np
 from dask import __version__ as dask_version
 
-
 try:
     blockwise = da.blockwise
 except AttributeError:
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 52c11429e2b..e63b6c9975f 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -4,6 +4,7 @@
 from collections import OrderedDict
 from numbers import Number
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     Dict,
@@ -17,7 +18,6 @@
     Union,
     cast,
     overload,
-    TYPE_CHECKING,
 )
 
 import numpy as np
@@ -38,9 +38,9 @@
 from .accessor_dt import DatetimeAccessor
 from .accessor_str import StringAccessor
 from .alignment import (
-    align,
     _broadcast_helper,
     _get_broadcast_dims_map_common_coords,
+    align,
     reindex_like_indexers,
 )
 from .common import AbstractArray, DataWithCoords
@@ -54,7 +54,7 @@
 from .formatting import format_item
 from .indexes import Indexes, default_indexes
 from .options import OPTIONS
-from .utils import _check_inplace, either_dict_or_kwargs, ReprObject
+from .utils import ReprObject, _check_inplace, either_dict_or_kwargs
 from .variable import (
     IndexVariable,
     Variable,
@@ -113,6 +113,11 @@ def _infer_coords_and_dims(
                 coord = as_variable(coord, name=dims[n]).to_index_variable()
                 dims[n] = coord.name
         dims = tuple(dims)
+    elif len(dims) != len(shape):
+        raise ValueError(
+            "different number of dimensions on data "
+            "and dims: %s vs %s" % (len(shape), len(dims))
+        )
     else:
         for d in dims:
             if not isinstance(d, str):
@@ -158,7 +163,27 @@ def _infer_coords_and_dims(
     return new_coords, dims
 
 
+def _check_data_shape(data, coords, dims):
+    if data is dtypes.NA:
+        data = np.nan
+    if coords is not None and utils.is_scalar(data, include_0d=False):
+        if utils.is_dict_like(coords):
+            if dims is None:
+                return data
+            else:
+                data_shape = tuple(
+                    as_variable(coords[k], k).size if k in coords.keys() else 1
+                    for k in dims
+                )
+        else:
+            data_shape = tuple(as_variable(coord, "foo").size for coord in coords)
+        data = np.full(data_shape, data)
+    return data
+
+
 class _LocIndexer:
+    __slots__ = ("data_array",)
+
     def __init__(self, data_array: "DataArray"):
         self.data_array = data_array
 
@@ -223,6 +248,16 @@ class DataArray(AbstractArray, DataWithCoords):
         Dictionary for holding arbitrary metadata.
     """
 
+    __slots__ = (
+        "_accessors",
+        "_coords",
+        "_file_obj",
+        "_name",
+        "_indexes",
+        "_variable",
+        "__weakref__",
+    )
+
     _groupby_cls = groupby.DataArrayGroupBy
     _rolling_cls = rolling.DataArrayRolling
     _coarsen_cls = rolling.DataArrayCoarsen
@@ -234,7 +269,7 @@ class DataArray(AbstractArray, DataWithCoords):
 
     def __init__(
         self,
-        data: Any,
+        data: Any = dtypes.NA,
         coords: Union[Sequence[Tuple], Mapping[Hashable, Any], None] = None,
         dims: Union[Hashable, Sequence[Hashable], None] = None,
         name: Hashable = None,
@@ -288,7 +323,7 @@ def __init__(
         if encoding is not None:
             warnings.warn(
                 "The `encoding` argument to `DataArray` is deprecated, and . "
-                "will be removed in 0.13. "
+                "will be removed in 0.14. "
                 "Instead, specify the encoding when writing to disk or "
                 "set the `encoding` attribute directly.",
                 FutureWarning,
@@ -323,6 +358,7 @@ def __init__(
             if encoding is None:
                 encoding = getattr(data, "encoding", None)
 
+            data = _check_data_shape(data, coords, dims)
             data = as_compatible_data(data)
             coords, dims = _infer_coords_and_dims(data.shape, coords, dims)
             variable = Variable(dims, data, attrs, encoding, fastpath=True)
@@ -332,6 +368,7 @@ def __init__(
         assert isinstance(coords, OrderedDict)
         self._coords = coords  # type: OrderedDict[Any, Variable]
         self._name = name  # type: Optional[Hashable]
+        self._accessors = None  # type: Optional[Dict[str, Any]]
 
         # TODO(shoyer): document this argument, once it becomes part of the
         # public interface.
@@ -339,8 +376,6 @@ def __init__(
 
         self._file_obj = None
 
-        self._initialized = True  # type: bool
-
     def _replace(
         self,
         variable: Variable = None,
@@ -444,7 +479,7 @@ def _to_dataset_whole(
         dataset = Dataset._from_vars_and_coord_names(variables, coord_names)
         return dataset
 
-    def to_dataset(self, dim: Hashable = None, name: Hashable = None) -> Dataset:
+    def to_dataset(self, dim: Hashable = None, *, name: Hashable = None) -> Dataset:
         """Convert a DataArray to a Dataset.
 
         Parameters
@@ -462,15 +497,9 @@ def to_dataset(self, dim: Hashable = None, name: Hashable = None) -> Dataset:
         dataset : Dataset
         """
         if dim is not None and dim not in self.dims:
-            warnings.warn(
-                "the order of the arguments on DataArray.to_dataset "
-                "has changed; you now need to supply ``name`` as "
-                "a keyword argument",
-                FutureWarning,
-                stacklevel=2,
+            raise TypeError(
+                "{} is not a dim. If supplying a ``name``, pass as a kwarg.".format(dim)
             )
-            name = dim
-            dim = None
 
         if dim is not None:
             if name is not None:
@@ -700,34 +729,21 @@ def reset_coords(
         drop : bool, optional
             If True, remove coordinates instead of converting them into
             variables.
-        inplace : bool, optional
-            If True, modify this object in place. Otherwise, create a new
-            object.
 
         Returns
         -------
-        Dataset, or DataArray if ``drop == True``, or None if
-        ``inplace == True``
+        Dataset, or DataArray if ``drop == True``
         """
-        inplace = _check_inplace(inplace)
-        if inplace and not drop:
-            raise ValueError(
-                "cannot reset coordinates in-place on a "
-                "DataArray without ``drop == True``"
-            )
+        _check_inplace(inplace)
         if names is None:
             names = set(self.coords) - set(self.dims)
         dataset = self.coords.to_dataset().reset_coords(names, drop)
         if drop:
-            if inplace:
-                self._coords = dataset._variables
-                return None
-            else:
-                return self._replace(coords=dataset._variables)
+            return self._replace(coords=dataset._variables)
         else:
             if self.name is None:
                 raise ValueError(
-                    "cannot reset_coords with drop=False " "on an unnamed DataArrray"
+                    "cannot reset_coords with drop=False on an unnamed DataArrray"
                 )
             dataset[self.name] = self.variable
             return dataset
@@ -1026,30 +1042,55 @@ def sel(
         )
         return self._from_temp_dataset(ds)
 
-    def isel_points(self, dim="points", **indexers) -> "DataArray":
-        """Return a new DataArray whose data is given by pointwise integer
-        indexing along the specified dimension(s).
+    def head(
+        self,
+        indexers: Union[Mapping[Hashable, int], int] = None,
+        **indexers_kwargs: Any
+    ) -> "DataArray":
+        """Return a new DataArray whose data is given by the the first `n`
+        values along the specified dimension(s). Default `n` = 5
 
         See Also
         --------
-        Dataset.isel_points
+        Dataset.head
+        DataArray.tail
+        DataArray.thin
         """
-        ds = self._to_temp_dataset().isel_points(dim=dim, **indexers)
+        ds = self._to_temp_dataset().head(indexers, **indexers_kwargs)
         return self._from_temp_dataset(ds)
 
-    def sel_points(
-        self, dim="points", method=None, tolerance=None, **indexers
+    def tail(
+        self,
+        indexers: Union[Mapping[Hashable, int], int] = None,
+        **indexers_kwargs: Any
     ) -> "DataArray":
-        """Return a new DataArray whose dataset is given by pointwise selection
-        of index labels along the specified dimension(s).
+        """Return a new DataArray whose data is given by the the last `n`
+        values along the specified dimension(s). Default `n` = 5
 
         See Also
         --------
-        Dataset.sel_points
+        Dataset.tail
+        DataArray.head
+        DataArray.thin
         """
-        ds = self._to_temp_dataset().sel_points(
-            dim=dim, method=method, tolerance=tolerance, **indexers
-        )
+        ds = self._to_temp_dataset().tail(indexers, **indexers_kwargs)
+        return self._from_temp_dataset(ds)
+
+    def thin(
+        self,
+        indexers: Union[Mapping[Hashable, int], int] = None,
+        **indexers_kwargs: Any
+    ) -> "DataArray":
+        """Return a new DataArray whose data is given by each `n` value
+        along the specified dimension(s). Default `n` = 5
+
+        See Also
+        --------
+        Dataset.thin
+        DataArray.head
+        DataArray.tail
+        """
+        ds = self._to_temp_dataset().thin(indexers, **indexers_kwargs)
         return self._from_temp_dataset(ds)
 
     def broadcast_like(
@@ -1412,9 +1453,26 @@ def swap_dims(self, dims_dict: Mapping[Hashable, Hashable]) -> "DataArray":
 
         Returns
         -------
-        renamed : Dataset
+        swapped : DataArray
             DataArray with swapped dimensions.
 
+        Examples
+        --------
+        >>> arr = xr.DataArray(data=[0, 1], dims="x",
+                               coords={"x": ["a", "b"], "y": ("x", [0, 1])})
+        >>> arr
+        <xarray.DataArray (x: 2)>
+        array([0, 1])
+        Coordinates:
+          * x        (x) <U1 'a' 'b'
+            y        (x) int64 0 1
+        >>> arr.swap_dims({"x": "y"})
+        <xarray.DataArray (y: 2)>
+        array([0, 1])
+        Coordinates:
+            x        (y) <U1 'a' 'b'
+          * y        (y) int64 0 1
+
         See Also
         --------
 
@@ -1468,9 +1526,7 @@ def expand_dims(
             This object, but with an additional dimension(s).
         """
         if isinstance(dim, int):
-            raise TypeError(
-                "dim should be hashable or sequence/mapping of " "hashables"
-            )
+            raise TypeError("dim should be hashable or sequence/mapping of hashables")
         elif isinstance(dim, Sequence) and not isinstance(dim, str):
             if len(dim) != len(set(dim)):
                 raise ValueError("dims should not contain duplicate values.")
@@ -1511,9 +1567,6 @@ def set_index(
         append : bool, optional
             If True, append the supplied index(es) to the existing index(es).
             Otherwise replace the existing index(es) (default).
-        inplace : bool, optional
-            If True, set new index(es) in-place. Otherwise, return a new
-            DataArray object.
         **indexes_kwargs: optional
             The keyword arguments form of ``indexes``.
             One of indexes or indexes_kwargs must be provided.
@@ -1522,10 +1575,9 @@ def set_index(
         -------
         obj : DataArray
             Another DataArray, with this data but replaced coordinates.
-            Return None if inplace=True.
 
-        Example
-        -------
+        Examples
+        --------
         >>> arr = xr.DataArray(data=np.ones((2, 3)),
         ...                    dims=['x', 'y'],
         ...                    coords={'x':
@@ -1552,14 +1604,10 @@ def set_index(
         --------
         DataArray.reset_index
         """
-        inplace = _check_inplace(inplace)
+        _check_inplace(inplace)
         indexes = either_dict_or_kwargs(indexes, indexes_kwargs, "set_index")
         coords, _ = merge_indexes(indexes, self._coords, set(), append=append)
-        if inplace:
-            self._coords = coords
-            return None
-        else:
-            return self._replace(coords=coords)
+        return self._replace(coords=coords)
 
     def reset_index(
         self,
@@ -1577,36 +1625,29 @@ def reset_index(
         drop : bool, optional
             If True, remove the specified indexes and/or multi-index levels
             instead of extracting them as new coordinates (default: False).
-        inplace : bool, optional
-            If True, modify the dataarray in-place. Otherwise, return a new
-            DataArray object.
 
         Returns
         -------
         obj : DataArray
             Another dataarray, with this dataarray's data but replaced
-            coordinates. If ``inplace == True``, return None.
+            coordinates.
 
         See Also
         --------
         DataArray.set_index
         """
-        inplace = _check_inplace(inplace)
+        _check_inplace(inplace)
         coords, _ = split_indexes(
             dims_or_levels, self._coords, set(), self._level_coords, drop=drop
         )
-        if inplace:
-            self._coords = coords
-            return None
-        else:
-            return self._replace(coords=coords)
+        return self._replace(coords=coords)
 
     def reorder_levels(
         self,
         dim_order: Mapping[Hashable, Sequence[int]] = None,
         inplace: bool = None,
         **dim_order_kwargs: Sequence[int]
-    ) -> Optional["DataArray"]:
+    ) -> "DataArray":
         """Rearrange index levels using input order.
 
         Parameters
@@ -1615,9 +1656,6 @@ def reorder_levels(
             Mapping from names matching dimensions and values given
             by lists representing new level orders. Every given dimension
             must have a multi-index.
-        inplace : bool, optional
-            If True, modify the dataarray in-place. Otherwise, return a new
-            DataArray object.
         **dim_order_kwargs: optional
             The keyword arguments form of ``dim_order``.
             One of dim_order or dim_order_kwargs must be provided.
@@ -1626,9 +1664,9 @@ def reorder_levels(
         -------
         obj : DataArray
             Another dataarray, with this dataarray's data but replaced
-            coordinates. If ``inplace == True``, return None.
+            coordinates.
         """
-        inplace = _check_inplace(inplace)
+        _check_inplace(inplace)
         dim_order = either_dict_or_kwargs(dim_order, dim_order_kwargs, "reorder_levels")
         replace_coords = {}
         for dim, order in dim_order.items():
@@ -1639,11 +1677,7 @@ def reorder_levels(
             replace_coords[dim] = IndexVariable(coord.dims, index.reorder_levels(order))
         coords = self._coords.copy()
         coords.update(replace_coords)
-        if inplace:
-            self._coords = coords
-            return None
-        else:
-            return self._replace(coords=coords)
+        return self._replace(coords=coords)
 
     def stack(
         self,
@@ -2319,19 +2353,27 @@ def from_dict(cls, d: dict) -> "DataArray":
         return obj
 
     @classmethod
-    def from_series(cls, series: pd.Series) -> "DataArray":
+    def from_series(cls, series: pd.Series, sparse: bool = False) -> "DataArray":
         """Convert a pandas.Series into an xarray.DataArray.
 
         If the series's index is a MultiIndex, it will be expanded into a
         tensor product of one-dimensional coordinates (filling in missing
         values with NaN). Thus this operation should be the inverse of the
         `to_series` method.
+
+        If sparse=True, creates a sparse array instead of a dense NumPy array.
+        Requires the pydata/sparse package.
+
+        See also
+        --------
+        xarray.Dataset.from_dataframe
         """
-        # TODO: add a 'name' parameter
-        name = series.name
-        df = pd.DataFrame({name: series})
-        ds = Dataset.from_dataframe(df)
-        return ds[name]
+        temp_name = "__temporary_name"
+        df = pd.DataFrame({temp_name: series})
+        ds = Dataset.from_dataframe(df, sparse=sparse)
+        result = cast(DataArray, ds[temp_name])
+        result.name = series.name
+        return result
 
     def to_cdms2(self) -> "cdms2_Variable":
         """Convert this array into a cdms2.Variable
@@ -2516,10 +2558,12 @@ def plot(self) -> _PlotMethods:
         >>> d = DataArray([[1, 2], [3, 4]])
 
         For convenience just call this directly
+
         >>> d.plot()
 
         Or use it as a namespace to use xarray.plot functions as
         DataArray methods
+
         >>> d.plot.imshow()  # equivalent to xarray.plot.imshow(d)
 
         """
@@ -2746,7 +2790,7 @@ def dot(
         """
         if isinstance(other, Dataset):
             raise NotImplementedError(
-                "dot products are not yet supported " "with Dataset objects."
+                "dot products are not yet supported with Dataset objects."
             )
         if not isinstance(other, DataArray):
             raise TypeError("dot only operates on DataArrays.")
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 076b97e8623..ea087ce3ce1 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -7,6 +7,7 @@
 from numbers import Number
 from pathlib import Path
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     DefaultDict,
@@ -24,28 +25,28 @@
     Union,
     cast,
     overload,
-    TYPE_CHECKING,
 )
 
 import numpy as np
 import pandas as pd
+
 import xarray as xr
 
 from ..coding.cftimeindex import _parse_array_of_cftime_strings
+from ..plot.dataset_plot import _Dataset_PlotMethods
 from . import (
     alignment,
     dtypes,
     duck_array_ops,
     formatting,
     groupby,
-    indexing,
     ops,
     pdcompat,
     resample,
     rolling,
     utils,
 )
-from .alignment import align, _broadcast_helper, _get_broadcast_dims_map_common_coords
+from .alignment import _broadcast_helper, _get_broadcast_dims_map_common_coords, align
 from .common import (
     ALL_DIMS,
     DataWithCoords,
@@ -54,7 +55,6 @@
 )
 from .coordinates import (
     DatasetCoordinates,
-    DataArrayCoordinates,
     LevelCoordinatesSource,
     assert_coordinate_consistent,
     remap_label_indexers,
@@ -77,9 +77,10 @@
     either_dict_or_kwargs,
     hashable,
     maybe_wrap_array,
+    is_dict_like,
+    is_list_like,
 )
 from .variable import IndexVariable, Variable, as_variable, broadcast_variables
-from ..plot.dataset_plot import _Dataset_PlotMethods
 
 if TYPE_CHECKING:
     from ..backends import AbstractDataStore, ZarrStore
@@ -344,6 +345,8 @@ def as_dataset(obj: Any) -> "Dataset":
 
 
 class DataVariables(Mapping[Hashable, "DataArray"]):
+    __slots__ = ("_dataset",)
+
     def __init__(self, dataset: "Dataset"):
         self._dataset = dataset
 
@@ -383,6 +386,8 @@ def _ipython_key_completions_(self):
 
 
 class _LocIndexer:
+    __slots__ = ("dataset",)
+
     def __init__(self, dataset: "Dataset"):
         self.dataset = dataset
 
@@ -406,6 +411,18 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords):
     coordinates used for label based indexing.
     """
 
+    __slots__ = (
+        "_accessors",
+        "_attrs",
+        "_coord_names",
+        "_dims",
+        "_encoding",
+        "_file_obj",
+        "_indexes",
+        "_variables",
+        "__weakref__",
+    )
+
     _groupby_cls = groupby.DatasetGroupBy
     _rolling_cls = rolling.DatasetRolling
     _coarsen_cls = rolling.DatasetCoarsen
@@ -473,7 +490,7 @@ def __init__(
         if compat is not None:
             warnings.warn(
                 "The `compat` argument to Dataset is deprecated and will be "
-                "removed in 0.13."
+                "removed in 0.14."
                 "Instead, use `merge` to control how variables are combined",
                 FutureWarning,
                 stacklevel=2,
@@ -484,6 +501,7 @@ def __init__(
         self._variables = OrderedDict()  # type: OrderedDict[Any, Variable]
         self._coord_names = set()  # type: Set[Hashable]
         self._dims = {}  # type: Dict[Any, int]
+        self._accessors = None  # type: Optional[Dict[str, Any]]
         self._attrs = None  # type: Optional[OrderedDict]
         self._file_obj = None
         if data_vars is None:
@@ -499,7 +517,6 @@ def __init__(
             self._attrs = OrderedDict(attrs)
 
         self._encoding = None  # type: Optional[Dict]
-        self._initialized = True
 
     def _set_init_vars_and_dims(self, data_vars, coords, compat):
         """Set the initial value of Dataset variables and dimensions
@@ -838,7 +855,7 @@ def _construct_direct(
         obj._attrs = attrs
         obj._file_obj = file_obj
         obj._encoding = encoding
-        obj._initialized = True
+        obj._accessors = None
         return obj
 
     __default = object()
@@ -1213,12 +1230,13 @@ def loc(self) -> _LocIndexer:
         """
         return _LocIndexer(self)
 
-    def __getitem__(self, key: object) -> "Union[DataArray, Dataset]":
+    def __getitem__(self, key: Any) -> "Union[DataArray, Dataset]":
         """Access variables or coordinates this dataset as a
         :py:class:`~xarray.DataArray`.
 
         Indexing with a list of names will return a new ``Dataset`` object.
         """
+        # TODO(shoyer): type this properly: https://github.com/python/mypy/issues/7328
         if utils.is_dict_like(key):
             return self.isel(**cast(Mapping, key))
 
@@ -1353,9 +1371,6 @@ def set_coords(
         ----------
         names : hashable or iterable of hashables
             Name(s) of variables in this dataset to convert into coordinates.
-        inplace : bool, optional
-            If True, modify this dataset inplace. Otherwise, create a new
-            object.
 
         Returns
         -------
@@ -1369,13 +1384,13 @@ def set_coords(
         # DataFrame.set_index?
         # nb. check in self._variables, not self.data_vars to insure that the
         # operation is idempotent
-        inplace = _check_inplace(inplace)
+        _check_inplace(inplace)
         if isinstance(names, str) or not isinstance(names, Iterable):
             names = [names]
         else:
             names = list(names)
         self._assert_all_in_dataset(names)
-        obj = self if inplace else self.copy()
+        obj = self.copy()
         obj._coord_names.update(names)
         return obj
 
@@ -1395,15 +1410,12 @@ def reset_coords(
         drop : bool, optional
             If True, remove coordinates instead of converting them into
             variables.
-        inplace : bool, optional
-            If True, modify this dataset inplace. Otherwise, create a new
-            object.
 
         Returns
         -------
         Dataset
         """
-        inplace = _check_inplace(inplace)
+        _check_inplace(inplace)
         if names is None:
             names = self._coord_names - set(self.dims)
         else:
@@ -1417,7 +1429,7 @@ def reset_coords(
                 raise ValueError(
                     "cannot remove index coordinates with reset_coords: %s" % bad_coords
                 )
-        obj = self if inplace else self.copy()
+        obj = self.copy()
         obj._coord_names.difference_update(names)
         if drop:
             for name in names:
@@ -1769,7 +1781,7 @@ def _validate_indexers(
             elif isinstance(v, Dataset):
                 raise TypeError("cannot use a Dataset as an indexer")
             elif isinstance(v, Sequence) and len(v) == 0:
-                v = IndexVariable((k,), np.zeros((0,), dtype="int64"))
+                v = Variable((k,), np.zeros((0,), dtype="int64"))
             else:
                 v = np.asarray(v)
 
@@ -1783,16 +1795,13 @@ def _validate_indexers(
                 if v.ndim == 0:
                     v = Variable((), v)
                 elif v.ndim == 1:
-                    v = IndexVariable((k,), v)
+                    v = Variable((k,), v)
                 else:
                     raise IndexError(
                         "Unlabeled multi-dimensional array cannot be "
                         "used for indexing: {}".format(k)
                     )
 
-            if v.ndim == 1:
-                v = v.to_index_variable()
-
             indexers_list.append((k, v))
 
         return indexers_list
@@ -1997,213 +2006,152 @@ def sel(
         result = self.isel(indexers=pos_indexers, drop=drop)
         return result._overwrite_indexes(new_indexes)
 
-    def isel_points(self, dim: Any = "points", **indexers: Any) -> "Dataset":
-        """Returns a new dataset with each array indexed pointwise along the
-        specified dimension(s).
-
-        This method selects pointwise values from each array and is akin to
-        the NumPy indexing behavior of `arr[[0, 1], [0, 1]]`, except this
-        method does not require knowing the order of each array's dimensions.
+    def head(
+        self,
+        indexers: Union[Mapping[Hashable, int], int] = None,
+        **indexers_kwargs: Any
+    ) -> "Dataset":
+        """Returns a new dataset with the first `n` values of each array
+        for the specified dimension(s).
 
         Parameters
         ----------
-        dim : hashable or DataArray or pandas.Index or other list-like object, 
-              optional
-            Name of the dimension to concatenate along. If dim is provided as a
-            hashable, it must be a new dimension name, in which case it is added
-            along axis=0. If dim is provided as a DataArray or Index or
-            list-like object, its name, which must not be present in the
-            dataset, is used as the dimension to concatenate along and the
-            values are added as a coordinate.
-        **indexers : {dim: indexer, ...}
-            Keyword arguments with names matching dimensions and values given
-            by array-like objects. All indexers must be the same length and
-            1 dimensional.
+        indexers : dict or int, default: 5
+            A dict with keys matching dimensions and integer values `n`
+            or a single integer `n` applied over all dimensions.
+            One of indexers or indexers_kwargs must be provided.
+        **indexers_kwargs : {dim: n, ...}, optional
+            The keyword arguments form of ``indexers``.
+            One of indexers or indexers_kwargs must be provided.
 
-        Returns
-        -------
-        obj : Dataset
-            A new Dataset with the same contents as this dataset, except each
-            array and dimension is indexed by the appropriate indexers. With
-            pointwise indexing, the new Dataset will always be a copy of the
-            original.
 
         See Also
         --------
-        Dataset.sel
-        Dataset.isel
-        Dataset.sel_points
-        DataArray.isel_points
-        """  # noqa
-        warnings.warn(
-            "Dataset.isel_points is deprecated: use Dataset.isel()" "instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-
-        indexer_dims = set(indexers)
-
-        def take(variable, slices):
-            # Note: remove helper function when once when numpy
-            # supports vindex https://github.com/numpy/numpy/pull/6075
-            if hasattr(variable.data, "vindex"):
-                # Special case for dask backed arrays to use vectorised list
-                # indexing
-                sel = variable.data.vindex[slices]
-            else:
-                # Otherwise assume backend is numpy array with 'fancy' indexing
-                sel = variable.data[slices]
-            return sel
-
-        def relevant_keys(mapping):
-            return [
-                k for k, v in mapping.items() if any(d in indexer_dims for d in v.dims)
-            ]
-
-        coords = relevant_keys(self.coords)
-        indexers = {k: np.asarray(v) for k, v in indexers.items()}
-        non_indexed_dims = set(self.dims) - indexer_dims
-        non_indexed_coords = set(self.coords) - set(coords)
-
-        # All the indexers should be iterables
-        # Check that indexers are valid dims, integers, and 1D
+        Dataset.tail
+        Dataset.thin
+        DataArray.head
+        """
+        if not indexers_kwargs:
+            if indexers is None:
+                indexers = 5
+            if not isinstance(indexers, int) and not is_dict_like(indexers):
+                raise TypeError("indexers must be either dict-like or a single integer")
+        if isinstance(indexers, int):
+            indexers = {dim: indexers for dim in self.dims}
+        indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head")
         for k, v in indexers.items():
-            if k not in self.dims:
-                raise ValueError("dimension %s does not exist" % k)
-            if v.dtype.kind != "i":  # type: ignore
-                raise TypeError("Indexers must be integers")
-            if v.ndim != 1:  # type: ignore
-                raise ValueError("Indexers must be 1 dimensional")
-
-        # all the indexers should have the same length
-        lengths = {len(v) for k, v in indexers.items()}
-        if len(lengths) > 1:
-            raise ValueError("All indexers must be the same length")
-
-        # Existing dimensions are not valid choices for the dim argument
-        if isinstance(dim, str):
-            if dim in self.dims:
-                # dim is an invalid string
-                raise ValueError(
-                    "Existing dimension names are not valid "
-                    "choices for the dim argument in sel_points"
+            if not isinstance(v, int):
+                raise TypeError(
+                    "expected integer type indexer for "
+                    "dimension %r, found %r" % (k, type(v))
                 )
-
-        elif hasattr(dim, "dims"):
-            # dim is a DataArray or Coordinate
-            if dim.name in self.dims:
-                # dim already exists
+            elif v < 0:
                 raise ValueError(
-                    "Existing dimensions are not valid choices "
-                    "for the dim argument in sel_points"
+                    "expected positive integer as indexer "
+                    "for dimension %r, found %s" % (k, v)
                 )
+        indexers_slices = {k: slice(val) for k, val in indexers.items()}
+        return self.isel(indexers_slices)
 
-        # Set the new dim_name, and optionally the new dim coordinate
-        # dim is either an array-like or a string
-        if not utils.is_scalar(dim):
-            # dim is array like get name or assign 'points', get as variable
-            dim_name = "points" if not hasattr(dim, "name") else dim.name
-            dim_coord = as_variable(dim, name=dim_name)
-        else:
-            # dim is a string
-            dim_name = dim
-            dim_coord = None  # type: ignore
-
-        reordered = self.transpose(*list(indexer_dims), *list(non_indexed_dims))
-
-        variables = OrderedDict()  # type: ignore
-
-        for name, var in reordered.variables.items():
-            if name in indexers or any(d in indexer_dims for d in var.dims):
-                # slice if var is an indexer or depends on an indexed dim
-                slc = [indexers.get(k, slice(None)) for k in var.dims]
+    def tail(
+        self,
+        indexers: Union[Mapping[Hashable, int], int] = None,
+        **indexers_kwargs: Any
+    ) -> "Dataset":
+        """Returns a new dataset with the last `n` values of each array
+        for the specified dimension(s).
 
-                var_dims = [dim_name] + [d for d in var.dims if d in non_indexed_dims]
-                selection = take(var, tuple(slc))
-                var_subset = type(var)(var_dims, selection, var.attrs)
-                variables[name] = var_subset
-            else:
-                # If not indexed just add it back to variables or coordinates
-                variables[name] = var
+        Parameters
+        ----------
+        indexers : dict or int, default: 5
+            A dict with keys matching dimensions and integer values `n`
+            or a single integer `n` applied over all dimensions.
+            One of indexers or indexers_kwargs must be provided.
+        **indexers_kwargs : {dim: n, ...}, optional
+            The keyword arguments form of ``indexers``.
+            One of indexers or indexers_kwargs must be provided.
 
-        coord_names = (set(coords) & set(variables)) | non_indexed_coords
 
-        dset = self._replace_vars_and_dims(variables, coord_names=coord_names)
-        # Add the dim coord to the new dset. Must be done after creation
-        # because_replace_vars_and_dims can only access existing coords,
-        # not add new ones
-        if dim_coord is not None:
-            dset.coords[dim_name] = dim_coord
-        return dset
+        See Also
+        --------
+        Dataset.head
+        Dataset.thin
+        DataArray.tail
+        """
+        if not indexers_kwargs:
+            if indexers is None:
+                indexers = 5
+            if not isinstance(indexers, int) and not is_dict_like(indexers):
+                raise TypeError("indexers must be either dict-like or a single integer")
+        if isinstance(indexers, int):
+            indexers = {dim: indexers for dim in self.dims}
+        indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail")
+        for k, v in indexers.items():
+            if not isinstance(v, int):
+                raise TypeError(
+                    "expected integer type indexer for "
+                    "dimension %r, found %r" % (k, type(v))
+                )
+            elif v < 0:
+                raise ValueError(
+                    "expected positive integer as indexer "
+                    "for dimension %r, found %s" % (k, v)
+                )
+        indexers_slices = {
+            k: slice(-val, None) if val != 0 else slice(val)
+            for k, val in indexers.items()
+        }
+        return self.isel(indexers_slices)
 
-    def sel_points(
+    def thin(
         self,
-        dim: Any = "points",
-        method: str = None,
-        tolerance: Number = None,
-        **indexers: Any
-    ):
-        """Returns a new dataset with each array indexed pointwise by tick
-        labels along the specified dimension(s).
-
-        In contrast to `Dataset.isel_points`, indexers for this method should
-        use labels instead of integers.
-
-        In contrast to `Dataset.sel`, this method selects points along the
-        diagonal of multi-dimensional arrays, not the intersection.
+        indexers: Union[Mapping[Hashable, int], int] = None,
+        **indexers_kwargs: Any
+    ) -> "Dataset":
+        """Returns a new dataset with each array indexed along every `n`th
+        value for the specified dimension(s)
 
         Parameters
         ----------
-        dim : hashable or DataArray or pandas.Index or other list-like object, 
-              optional
-            Name of the dimension to concatenate along. If dim is provided as a
-            hashable, it must be a new dimension name, in which case it is added
-            along axis=0. If dim is provided as a DataArray or Index or
-            list-like object, its name, which must not be present in the
-            dataset, is used as the dimension to concatenate along and the
-            values are added as a coordinate.
-        method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional
-            Method to use for inexact matches (requires pandas>=0.16):
-
-            * None (default): only exact matches
-            * pad / ffill: propagate last valid index value forward
-            * backfill / bfill: propagate next valid index value backward
-            * nearest: use nearest valid index value
-        tolerance : optional
-            Maximum distance between original and new labels for inexact
-            matches. The values of the index at the matching locations must
-            satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
-            Requires pandas>=0.17.
-        **indexers : {dim: indexer, ...}
-            Keyword arguments with names matching dimensions and values given
-            by array-like objects. All indexers must be the same length and
-            1 dimensional.
+        indexers : dict or int, default: 5
+            A dict with keys matching dimensions and integer values `n`
+            or a single integer `n` applied over all dimensions.
+            One of indexers or indexers_kwargs must be provided.
+        **indexers_kwargs : {dim: n, ...}, optional
+            The keyword arguments form of ``indexers``.
+            One of indexers or indexers_kwargs must be provided.
 
-        Returns
-        -------
-        obj : Dataset
-            A new Dataset with the same contents as this dataset, except each
-            array and dimension is indexed by the appropriate indexers. With
-            pointwise indexing, the new Dataset will always be a copy of the
-            original.
 
         See Also
         --------
-        Dataset.sel
-        Dataset.isel
-        Dataset.isel_points
-        DataArray.sel_points
-        """  # noqa
-        warnings.warn(
-            "Dataset.sel_points is deprecated: use Dataset.sel()" "instead.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-
-        pos_indexers, _ = indexing.remap_label_indexers(
-            self, indexers, method=method, tolerance=tolerance
-        )
-        return self.isel_points(dim=dim, **pos_indexers)
+        Dataset.head
+        Dataset.tail
+        DataArray.thin
+        """
+        if (
+            not indexers_kwargs
+            and not isinstance(indexers, int)
+            and not is_dict_like(indexers)
+        ):
+            raise TypeError("indexers must be either dict-like or a single integer")
+        if isinstance(indexers, int):
+            indexers = {dim: indexers for dim in self.dims}
+        indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin")
+        for k, v in indexers.items():
+            if not isinstance(v, int):
+                raise TypeError(
+                    "expected integer type indexer for "
+                    "dimension %r, found %r" % (k, type(v))
+                )
+            elif v < 0:
+                raise ValueError(
+                    "expected positive integer as indexer "
+                    "for dimension %r, found %s" % (k, v)
+                )
+            elif v == 0:
+                raise ValueError("step cannot be zero")
+        indexers_slices = {k: slice(None, None, val) for k, val in indexers.items()}
+        return self.isel(indexers_slices)
 
     def broadcast_like(
         self, other: Union["Dataset", "DataArray"], exclude: Iterable[Hashable] = None
@@ -2416,7 +2364,10 @@ def interp(
         if kwargs is None:
             kwargs = {}
         coords = either_dict_or_kwargs(coords, coords_kwargs, "interp")
-        indexers = OrderedDict(self._validate_indexers(coords))
+        indexers = OrderedDict(
+            (k, v.to_index_variable() if isinstance(v, Variable) and v.ndim == 1 else v)
+            for k, v in self._validate_indexers(coords)
+        )
 
         obj = self if assume_sorted else self.sortby([k for k in coords])
 
@@ -2604,9 +2555,6 @@ def rename(
         name_dict : dict-like, optional
             Dictionary whose keys are current variable or dimension names and
             whose values are the desired names.
-        inplace : bool, optional
-            If True, rename variables and dimensions in-place. Otherwise,
-            return a new dataset object.
         **names, optional
             Keyword form of ``name_dict``.
             One of name_dict or names must be provided.
@@ -2623,7 +2571,7 @@ def rename(
         Dataset.rename_dims
         DataArray.rename
         """
-        inplace = _check_inplace(inplace)
+        _check_inplace(inplace)
         name_dict = either_dict_or_kwargs(name_dict, names, "rename")
         for k in name_dict.keys():
             if k not in self and k not in self.dims:
@@ -2635,9 +2583,7 @@ def rename(
         variables, coord_names, dims, indexes = self._rename_all(
             name_dict=name_dict, dims_dict=name_dict
         )
-        return self._replace(
-            variables, coord_names, dims=dims, indexes=indexes, inplace=inplace
-        )
+        return self._replace(variables, coord_names, dims=dims, indexes=indexes)
 
     def rename_dims(
         self, dims_dict: Mapping[Hashable, Hashable] = None, **dims: Hashable
@@ -2727,15 +2673,35 @@ def swap_dims(
             Dictionary whose keys are current dimension names and whose values
             are new names. Each value must already be a variable in the
             dataset.
-        inplace : bool, optional
-            If True, swap dimensions in-place. Otherwise, return a new dataset
-            object.
 
         Returns
         -------
-        renamed : Dataset
+        swapped : Dataset
             Dataset with swapped dimensions.
 
+        Examples
+        --------
+        >>> ds = xr.Dataset(data_vars={"a": ("x", [5, 7]), "b": ("x", [0.1, 2.4])},
+                            coords={"x": ["a", "b"], "y": ("x", [0, 1])})
+        >>> ds
+        <xarray.Dataset>
+        Dimensions:  (x: 2)
+        Coordinates:
+          * x        (x) <U1 'a' 'b'
+            y        (x) int64 0 1
+        Data variables:
+            a        (x) int64 5 7
+            b        (x) float64 0.1 2.4
+        >>> ds.swap_dims({"x": "y"})
+        <xarray.Dataset>
+        Dimensions:  (y: 2)
+        Coordinates:
+            x        (y) <U1 'a' 'b'
+          * y        (y) int64 0 1
+        Data variables:
+            a        (y) int64 5 7
+            b        (y) float64 0.1 2.4
+
         See Also
         --------
 
@@ -2744,7 +2710,7 @@ def swap_dims(
         """
         # TODO: deprecate this method in favor of a (less confusing)
         # rename_dims() method that only renames dimensions.
-        inplace = _check_inplace(inplace)
+        _check_inplace(inplace)
         for k, v in dims_dict.items():
             if k not in self.dims:
                 raise ValueError(
@@ -2777,9 +2743,7 @@ def swap_dims(
             var.dims = dims
             variables[k] = var
 
-        return self._replace_with_new_dims(
-            variables, coord_names, indexes=indexes, inplace=inplace
-        )
+        return self._replace_with_new_dims(variables, coord_names, indexes=indexes)
 
     def expand_dims(
         self,
@@ -2953,9 +2917,6 @@ def set_index(
         append : bool, optional
             If True, append the supplied index(es) to the existing index(es).
             Otherwise replace the existing index(es) (default).
-        inplace : bool, optional
-            If True, set new index(es) in-place. Otherwise, return a new
-            Dataset object.
         **indexes_kwargs: optional
             The keyword arguments form of ``indexes``.
             One of indexes or indexes_kwargs must be provided.
@@ -2997,14 +2958,12 @@ def set_index(
         Dataset.reset_index
         Dataset.swap_dims
         """
-        inplace = _check_inplace(inplace)
+        _check_inplace(inplace)
         indexes = either_dict_or_kwargs(indexes, indexes_kwargs, "set_index")
         variables, coord_names = merge_indexes(
             indexes, self._variables, self._coord_names, append=append
         )
-        return self._replace_vars_and_dims(
-            variables, coord_names=coord_names, inplace=inplace
-        )
+        return self._replace_vars_and_dims(variables, coord_names=coord_names)
 
     def reset_index(
         self,
@@ -3022,9 +2981,6 @@ def reset_index(
         drop : bool, optional
             If True, remove the specified indexes and/or multi-index levels
             instead of extracting them as new coordinates (default: False).
-        inplace : bool, optional
-            If True, modify the dataset in-place. Otherwise, return a new
-            Dataset object.
 
         Returns
         -------
@@ -3035,7 +2991,7 @@ def reset_index(
         --------
         Dataset.set_index
         """
-        inplace = _check_inplace(inplace)
+        _check_inplace(inplace)
         variables, coord_names = split_indexes(
             dims_or_levels,
             self._variables,
@@ -3043,9 +2999,7 @@ def reset_index(
             cast(Mapping[Hashable, Hashable], self._level_coords),
             drop=drop,
         )
-        return self._replace_vars_and_dims(
-            variables, coord_names=coord_names, inplace=inplace
-        )
+        return self._replace_vars_and_dims(variables, coord_names=coord_names)
 
     def reorder_levels(
         self,
@@ -3061,9 +3015,6 @@ def reorder_levels(
             Mapping from names matching dimensions and values given
             by lists representing new level orders. Every given dimension
             must have a multi-index.
-        inplace : bool, optional
-            If True, modify the dataset in-place. Otherwise, return a new
-            DataArray object.
         **dim_order_kwargs: optional
             The keyword arguments form of ``dim_order``.
             One of dim_order or dim_order_kwargs must be provided.
@@ -3074,7 +3025,7 @@ def reorder_levels(
             Another dataset, with this dataset's data but replaced
             coordinates.
         """
-        inplace = _check_inplace(inplace)
+        _check_inplace(inplace)
         dim_order = either_dict_or_kwargs(dim_order, dim_order_kwargs, "reorder_levels")
         variables = self._variables.copy()
         indexes = OrderedDict(self.indexes)
@@ -3087,7 +3038,7 @@ def reorder_levels(
             variables[dim] = IndexVariable(coord.dims, new_index)
             indexes[dim] = new_index
 
-        return self._replace(variables, indexes=indexes, inplace=inplace)
+        return self._replace(variables, indexes=indexes)
 
     def _stack_once(self, dims, new_dim):
         variables = OrderedDict()
@@ -3383,9 +3334,6 @@ def update(self, other: "DatasetLike", inplace: bool = None) -> "Dataset":
             - mapping {var name: (dimension name, array-like)}
             - mapping {var name: (tuple of dimension names, array-like)}
 
-        inplace : bool, optional
-            If True, merge the other dataset into this dataset in-place.
-            Otherwise, return a new dataset object.
 
         Returns
         -------
@@ -3398,12 +3346,10 @@ def update(self, other: "DatasetLike", inplace: bool = None) -> "Dataset":
             If any dimensions would have inconsistent sizes in the updated
             dataset.
         """
-        inplace = _check_inplace(inplace, default=True)
+        _check_inplace(inplace)
         variables, coord_names, dims = dataset_update_method(self, other)
 
-        return self._replace_vars_and_dims(
-            variables, coord_names, dims, inplace=inplace
-        )
+        return self._replace_vars_and_dims(variables, coord_names, dims, inplace=True)
 
     def merge(
         self,
@@ -3425,9 +3371,6 @@ def merge(
         ----------
         other : Dataset or castable to Dataset
             Dataset or variables to merge with this dataset.
-        inplace : bool, optional
-            If True, merge the other dataset into this dataset in-place.
-            Otherwise, return a new dataset object.
         overwrite_vars : Hashable or iterable of Hashable, optional
             If provided, update variables of these name(s) without checking for
             conflicts in this dataset.
@@ -3464,7 +3407,7 @@ def merge(
         MergeError
             If any variables conflict (see ``compat``).
         """
-        inplace = _check_inplace(inplace)
+        _check_inplace(inplace)
         variables, coord_names, dims = dataset_merge_method(
             self,
             other,
@@ -3474,9 +3417,7 @@ def merge(
             fill_value=fill_value,
         )
 
-        return self._replace_vars_and_dims(
-            variables, coord_names, dims, inplace=inplace
-        )
+        return self._replace_vars_and_dims(variables, coord_names, dims)
 
     def _assert_all_in_dataset(
         self, names: Iterable[Hashable], virtual_okay: bool = False
@@ -3554,9 +3495,23 @@ def drop(  # noqa: F811
         if errors not in ["raise", "ignore"]:
             raise ValueError('errors must be either "raise" or "ignore"')
 
-        labels_are_coords = isinstance(labels, DataArrayCoordinates)
-        if labels_kwargs or (utils.is_dict_like(labels) and not labels_are_coords):
-            labels_kwargs = utils.either_dict_or_kwargs(labels, labels_kwargs, "drop")
+        if is_dict_like(labels) and not isinstance(labels, dict):
+            warnings.warn(
+                "dropping coordinates using key values of dict-like labels is "
+                "deprecated; use drop_vars or a list of coordinates.",
+                FutureWarning,
+                stacklevel=2,
+            )
+        if dim is not None and is_list_like(labels):
+            warnings.warn(
+                "dropping dimensions using list-like labels is deprecated; use "
+                "dict-like arguments.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+
+        if labels_kwargs or isinstance(labels, dict):
+            labels_kwargs = either_dict_or_kwargs(labels, labels_kwargs, "drop")
             if dim is not None:
                 raise ValueError("cannot specify dim and dict-like arguments.")
             ds = self
@@ -3570,13 +3525,6 @@ def drop(  # noqa: F811
                 labels = set(labels)
             return self._drop_vars(labels, errors=errors)
         else:
-            if utils.is_list_like(labels):
-                warnings.warn(
-                    "dropping dimensions using list-like labels is deprecated; "
-                    "use dict-like arguments.",
-                    DeprecationWarning,
-                    stacklevel=2,
-                )
             return self._drop_labels(labels, dim, errors=errors)
 
     def _drop_labels(self, labels=None, dim=None, errors="raise"):
@@ -3951,9 +3899,7 @@ def reduce(
             Dataset with this object's DataArrays replaced with new DataArrays
             of summarized data and the indicated dimension(s) removed.
         """
-        if dim is ALL_DIMS:
-            dim = None
-        if dim is None:
+        if dim is None or dim is ALL_DIMS:
             dims = set(self.dims)
         elif isinstance(dim, str) or not isinstance(dim, Iterable):
             dims = {dim}
@@ -4155,8 +4101,61 @@ def to_dataframe(self):
         """
         return self._to_dataframe(self.dims)
 
+    def _set_sparse_data_from_dataframe(
+        self, dataframe: pd.DataFrame, dims: tuple, shape: Tuple[int, ...]
+    ) -> None:
+        from sparse import COO
+
+        idx = dataframe.index
+        if isinstance(idx, pd.MultiIndex):
+            try:
+                codes = idx.codes
+            except AttributeError:
+                # deprecated since pandas 0.24
+                codes = idx.labels
+            coords = np.stack([np.asarray(code) for code in codes], axis=0)
+            is_sorted = idx.is_lexsorted
+        else:
+            coords = np.arange(idx.size).reshape(1, -1)
+            is_sorted = True
+
+        for name, series in dataframe.items():
+            # Cast to a NumPy array first, in case the Series is a pandas
+            # Extension array (which doesn't have a valid NumPy dtype)
+            values = np.asarray(series)
+
+            # In virtually all real use cases, the sparse array will now have
+            # missing values and needs a fill_value. For consistency, don't
+            # special case the rare exceptions (e.g., dtype=int without a
+            # MultiIndex).
+            dtype, fill_value = dtypes.maybe_promote(values.dtype)
+            values = np.asarray(values, dtype=dtype)
+
+            data = COO(
+                coords,
+                values,
+                shape,
+                has_duplicates=False,
+                sorted=is_sorted,
+                fill_value=fill_value,
+            )
+            self[name] = (dims, data)
+
+    def _set_numpy_data_from_dataframe(
+        self, dataframe: pd.DataFrame, dims: tuple, shape: Tuple[int, ...]
+    ) -> None:
+        idx = dataframe.index
+        if isinstance(idx, pd.MultiIndex):
+            # expand the DataFrame to include the product of all levels
+            full_idx = pd.MultiIndex.from_product(idx.levels, names=idx.names)
+            dataframe = dataframe.reindex(full_idx)
+
+        for name, series in dataframe.items():
+            data = np.asarray(series).reshape(shape)
+            self[name] = (dims, data)
+
     @classmethod
-    def from_dataframe(cls, dataframe):
+    def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Dataset":
         """Convert a pandas.DataFrame into an xarray.Dataset
 
         Each column will be converted into an independent variable in the
@@ -4165,7 +4164,24 @@ def from_dataframe(cls, dataframe):
         values with NaN). This method will produce a Dataset very similar to
         that on which the 'to_dataframe' method was called, except with
         possibly redundant dimensions (since all dataset variables will have
-        the same dimensionality).
+        the same dimensionality)
+
+        Parameters
+        ----------
+        dataframe : pandas.DataFrame
+            DataFrame from which to copy data and indices.
+        sparse : bool
+            If true, create a sparse arrays instead of dense numpy arrays. This
+            can potentially save a large amount of memory if the DataFrame has
+            a MultiIndex. Requires the sparse package (sparse.pydata.org).
+
+        Returns
+        -------
+        New Dataset.
+
+        See also
+        --------
+        xarray.DataArray.from_series
         """
         # TODO: Add an option to remove dimensions along which the variables
         # are constant, to enable consistent serialization to/from a dataframe,
@@ -4178,25 +4194,23 @@ def from_dataframe(cls, dataframe):
         obj = cls()
 
         if isinstance(idx, pd.MultiIndex):
-            # it's a multi-index
-            # expand the DataFrame to include the product of all levels
-            full_idx = pd.MultiIndex.from_product(idx.levels, names=idx.names)
-            dataframe = dataframe.reindex(full_idx)
-            dims = [
+            dims = tuple(
                 name if name is not None else "level_%i" % n
                 for n, name in enumerate(idx.names)
-            ]
+            )
             for dim, lev in zip(dims, idx.levels):
                 obj[dim] = (dim, lev)
-            shape = [lev.size for lev in idx.levels]
+            shape = tuple(lev.size for lev in idx.levels)
         else:
-            dims = (idx.name if idx.name is not None else "index",)
-            obj[dims[0]] = (dims, idx)
-            shape = -1
+            index_name = idx.name if idx.name is not None else "index"
+            dims = (index_name,)
+            obj[index_name] = (dims, idx)
+            shape = (idx.size,)
 
-        for name, series in dataframe.items():
-            data = np.asarray(series).reshape(shape)
-            obj[name] = (dims, data)
+        if sparse:
+            obj._set_sparse_data_from_dataframe(dataframe, dims, shape)
+        else:
+            obj._set_numpy_data_from_dataframe(dataframe, dims, shape)
         return obj
 
     def to_dask_dataframe(self, dim_order=None, set_index=False):
@@ -4811,7 +4825,7 @@ def quantile(
 
         if isinstance(dim, str):
             dims = {dim}
-        elif dim is None:
+        elif dim is None or dim is ALL_DIMS:
             dims = set(self.dims)
         else:
             dims = set(dim)
@@ -4839,7 +4853,10 @@ def quantile(
                             # the former is often more efficient
                             reduce_dims = None
                         variables[name] = var.quantile(
-                            q, dim=reduce_dims, interpolation=interpolation
+                            q,
+                            dim=reduce_dims,
+                            interpolation=interpolation,
+                            keep_attrs=keep_attrs,
                         )
 
             else:
@@ -5139,7 +5156,7 @@ def filter_by_attrs(self, **kwargs):
 
         """  # noqa
         selection = []
-        for var_name, variable in self.data_vars.items():
+        for var_name, variable in self.variables.items():
             has_value_flag = False
             for attr_name, pattern in kwargs.items():
                 attr_value = variable.attrs.get(attr_name)
diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
index 3d7e7cc64bc..fcd0400566f 100644
--- a/xarray/core/duck_array_ops.py
+++ b/xarray/core/duck_array_ops.py
@@ -13,7 +13,7 @@
 
 from . import dask_array_ops, dtypes, npcompat, nputils
 from .nputils import nanfirst, nanlast
-from .pycompat import dask_array_type, sparse_array_type
+from .pycompat import dask_array_type
 
 try:
     import dask.array as dask_array
diff --git a/xarray/core/extensions.py b/xarray/core/extensions.py
index 302a7fb2ec6..f473eaa497d 100644
--- a/xarray/core/extensions.py
+++ b/xarray/core/extensions.py
@@ -19,6 +19,14 @@ def __get__(self, obj, cls):
         if obj is None:
             # we're accessing the attribute of the class, i.e., Dataset.geo
             return self._accessor
+
+        try:
+            return obj._accessors[self._name]
+        except TypeError:
+            obj._accessors = {}
+        except KeyError:
+            pass
+
         try:
             accessor_obj = self._accessor(obj)
         except AttributeError:
@@ -26,11 +34,8 @@ def __get__(self, obj, cls):
             # raised when initializing the accessor, so we need to raise as
             # something else (GH933):
             raise RuntimeError("error initializing %r accessor." % self._name)
-        # Replace the property with the accessor object. Inspired by:
-        # http://www.pydanny.com/cached-property.html
-        # We need to use object.__setattr__ because we overwrite __setattr__ on
-        # AttrAccessMixin.
-        object.__setattr__(obj, self._name, accessor_obj)
+
+        obj._accessors[self._name] = accessor_obj
         return accessor_obj
 
 
diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
index 51664fb3e32..c6b2537c958 100644
--- a/xarray/core/formatting.py
+++ b/xarray/core/formatting.py
@@ -96,7 +96,7 @@ def last_item(array):
         return []
 
     indexer = (slice(-1, None),) * array.ndim
-    return np.ravel(array[indexer]).tolist()
+    return np.ravel(np.asarray(array[indexer])).tolist()
 
 
 def format_timestamp(t):
diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index 3ed3491b582..bae3057aabe 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -5,18 +5,18 @@
 import numpy as np
 import pandas as pd
 
-from . import dtypes, duck_array_ops, nputils, ops, utils
+from . import dtypes, duck_array_ops, nputils, ops
 from .arithmetic import SupportsArithmetic
+from .common import ImplementsArrayReduce, ImplementsDatasetReduce
 from .concat import concat
-from .common import ALL_DIMS, ImplementsArrayReduce, ImplementsDatasetReduce
 from .options import _get_keep_attrs
 from .pycompat import integer_types
 from .utils import (
+    either_dict_or_kwargs,
     hashable,
     maybe_wrap_array,
     peek_at,
     safe_cast_to_index,
-    either_dict_or_kwargs,
 )
 from .variable import IndexVariable, Variable, as_variable
 
@@ -139,13 +139,24 @@ class _DummyGroup:
     Should not be user visible.
     """
 
+    __slots__ = ("name", "coords", "size")
+
     def __init__(self, obj, name, coords):
         self.name = name
         self.coords = coords
-        self.dims = (name,)
-        self.ndim = 1
         self.size = obj.sizes[name]
-        self.values = range(self.size)
+
+    @property
+    def dims(self):
+        return (self.name,)
+
+    @property
+    def ndim(self):
+        return 1
+
+    @property
+    def values(self):
+        return range(self.size)
 
 
 def _ensure_1d(group, obj):
@@ -216,6 +227,19 @@ class GroupBy(SupportsArithmetic):
     DataArray.groupby
     """
 
+    __slots__ = (
+        "_full_index",
+        "_inserted_dims",
+        "_group",
+        "_group_dim",
+        "_group_indices",
+        "_groups",
+        "_obj",
+        "_restore_coord_dims",
+        "_stacked_dim",
+        "_unique_coord",
+    )
+
     def __init__(
         self,
         obj,
@@ -676,19 +700,8 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None):
         numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile,
         DataArray.quantile
         """
-        if dim == DEFAULT_DIMS:
-            dim = ALL_DIMS
-            # TODO change this to dim = self._group_dim after
-            # the deprecation process
-            if self._obj.ndim > 1:
-                warnings.warn(
-                    "Default reduction dimension will be changed to the "
-                    "grouped dimension in a future version of xarray. To "
-                    "silence this warning, pass dim=xarray.ALL_DIMS "
-                    "explicitly.",
-                    FutureWarning,
-                    stacklevel=2,
-                )
+        if dim is None:
+            dim = self._group_dim
 
         out = self.apply(
             self._obj.__class__.quantile,
@@ -734,20 +747,6 @@ def reduce(
             Array with summarized data and the indicated dimension(s)
             removed.
         """
-        if dim == DEFAULT_DIMS:
-            dim = ALL_DIMS
-            # TODO change this to dim = self._group_dim after
-            # the deprecation process
-            if self._obj.ndim > 1:
-                warnings.warn(
-                    "Default reduction dimension will be changed to the "
-                    "grouped dimension in a future version of xarray. To "
-                    "silence this warning, pass dim=xarray.ALL_DIMS "
-                    "explicitly.",
-                    FutureWarning,
-                    stacklevel=2,
-                )
-
         if keep_attrs is None:
             keep_attrs = _get_keep_attrs(default=False)
 
@@ -756,43 +755,6 @@ def reduce_array(ar):
 
         return self.apply(reduce_array, shortcut=shortcut)
 
-    # TODO remove the following class method and DEFAULT_DIMS after the
-    # deprecation cycle
-    @classmethod
-    def _reduce_method(cls, func, include_skipna, numeric_only):
-        if include_skipna:
-
-            def wrapped_func(
-                self,
-                dim=DEFAULT_DIMS,
-                axis=None,
-                skipna=None,
-                keep_attrs=None,
-                **kwargs
-            ):
-                return self.reduce(
-                    func,
-                    dim,
-                    axis,
-                    keep_attrs=keep_attrs,
-                    skipna=skipna,
-                    allow_lazy=True,
-                    **kwargs
-                )
-
-        else:
-
-            def wrapped_func(  # type: ignore
-                self, dim=DEFAULT_DIMS, axis=None, keep_attrs=None, **kwargs
-            ):
-                return self.reduce(
-                    func, dim, axis, keep_attrs=keep_attrs, allow_lazy=True, **kwargs
-                )
-
-        return wrapped_func
-
-
-DEFAULT_DIMS = utils.ReprObject("<default-dims>")
 
 ops.inject_reduce_methods(DataArrayGroupBy)
 ops.inject_binary_ops(DataArrayGroupBy)
@@ -874,19 +836,7 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs):
             Array with summarized data and the indicated dimension(s)
             removed.
         """
-        if dim == DEFAULT_DIMS:
-            dim = ALL_DIMS
-            # TODO change this to dim = self._group_dim after
-            # the deprecation process. Do not forget to remove _reduce_method
-            warnings.warn(
-                "Default reduction dimension will be changed to the "
-                "grouped dimension in a future version of xarray. To "
-                "silence this warning, pass dim=xarray.ALL_DIMS "
-                "explicitly.",
-                FutureWarning,
-                stacklevel=2,
-            )
-        elif dim is None:
+        if dim is None:
             dim = self._group_dim
 
         if keep_attrs is None:
@@ -897,31 +847,6 @@ def reduce_dataset(ds):
 
         return self.apply(reduce_dataset)
 
-    # TODO remove the following class method and DEFAULT_DIMS after the
-    # deprecation cycle
-    @classmethod
-    def _reduce_method(cls, func, include_skipna, numeric_only):
-        if include_skipna:
-
-            def wrapped_func(self, dim=DEFAULT_DIMS, skipna=None, **kwargs):
-                return self.reduce(
-                    func,
-                    dim,
-                    skipna=skipna,
-                    numeric_only=numeric_only,
-                    allow_lazy=True,
-                    **kwargs
-                )
-
-        else:
-
-            def wrapped_func(self, dim=DEFAULT_DIMS, **kwargs):  # type: ignore
-                return self.reduce(
-                    func, dim, numeric_only=numeric_only, allow_lazy=True, **kwargs
-                )
-
-        return wrapped_func
-
     def assign(self, **kwargs):
         """Assign data variables by group.
 
diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py
index 5917f7c7a2d..94188fabc92 100644
--- a/xarray/core/indexes.py
+++ b/xarray/core/indexes.py
@@ -11,6 +11,8 @@
 class Indexes(collections.abc.Mapping):
     """Immutable proxy for Dataset or DataArrary indexes."""
 
+    __slots__ = ("_indexes",)
+
     def __init__(self, indexes):
         """Not for public consumption.
 
diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
index c5c3cadf7a2..c6a8f6f35e4 100644
--- a/xarray/core/indexing.py
+++ b/xarray/core/indexing.py
@@ -1,16 +1,17 @@
+import enum
 import functools
 import operator
 from collections import defaultdict
 from contextlib import suppress
 from datetime import timedelta
-from typing import Any, Tuple, Sequence, Union
+from typing import Any, Callable, Sequence, Tuple, Union
 
 import numpy as np
 import pandas as pd
 
 from . import duck_array_ops, nputils, utils
 from .npcompat import DTypeLike
-from .pycompat import dask_array_type, integer_types
+from .pycompat import dask_array_type, integer_types, sparse_array_type
 from .utils import is_dict_like, maybe_cast_to_coords_dtype
 
 
@@ -327,6 +328,8 @@ class ExplicitIndexer:
     sub-classes BasicIndexer, OuterIndexer or VectorizedIndexer.
     """
 
+    __slots__ = ("_key",)
+
     def __init__(self, key):
         if type(self) is ExplicitIndexer:  # noqa
             raise TypeError("cannot instantiate base ExplicitIndexer objects")
@@ -359,6 +362,8 @@ class BasicIndexer(ExplicitIndexer):
     indexed with an integer are dropped from the result.
     """
 
+    __slots__ = ()
+
     def __init__(self, key):
         if not isinstance(key, tuple):
             raise TypeError("key must be a tuple: {!r}".format(key))
@@ -389,6 +394,8 @@ class OuterIndexer(ExplicitIndexer):
     indexing works like MATLAB/Fortran.
     """
 
+    __slots__ = ()
+
     def __init__(self, key):
         if not isinstance(key, tuple):
             raise TypeError("key must be a tuple: {!r}".format(key))
@@ -432,6 +439,8 @@ class VectorizedIndexer(ExplicitIndexer):
     https://github.com/numpy/numpy/pull/6256
     """
 
+    __slots__ = ()
+
     def __init__(self, key):
         if not isinstance(key, tuple):
             raise TypeError("key must be a tuple: {!r}".format(key))
@@ -468,10 +477,15 @@ def __init__(self, key):
 
 
 class ExplicitlyIndexed:
-    """Mixin to mark support for Indexer subclasses in indexing."""
+    """Mixin to mark support for Indexer subclasses in indexing.
+    """
+
+    __slots__ = ()
 
 
 class ExplicitlyIndexedNDArrayMixin(utils.NDArrayMixin, ExplicitlyIndexed):
+    __slots__ = ()
+
     def __array__(self, dtype=None):
         key = BasicIndexer((slice(None),) * self.ndim)
         return np.asarray(self[key], dtype=dtype)
@@ -480,6 +494,8 @@ def __array__(self, dtype=None):
 class ImplicitToExplicitIndexingAdapter(utils.NDArrayMixin):
     """Wrap an array, converting tuples into the indicated explicit indexer."""
 
+    __slots__ = ("array", "indexer_cls")
+
     def __init__(self, array, indexer_cls=BasicIndexer):
         self.array = as_indexable(array)
         self.indexer_cls = indexer_cls
@@ -502,6 +518,8 @@ class LazilyOuterIndexedArray(ExplicitlyIndexedNDArrayMixin):
     """Wrap an array to make basic and outer indexing lazy.
     """
 
+    __slots__ = ("array", "key")
+
     def __init__(self, array, key=None):
         """
         Parameters
@@ -577,6 +595,8 @@ class LazilyVectorizedIndexedArray(ExplicitlyIndexedNDArrayMixin):
     """Wrap an array to make vectorized indexing lazy.
     """
 
+    __slots__ = ("array", "key")
+
     def __init__(self, array, key):
         """
         Parameters
@@ -631,6 +651,8 @@ def _wrap_numpy_scalars(array):
 
 
 class CopyOnWriteArray(ExplicitlyIndexedNDArrayMixin):
+    __slots__ = ("array", "_copied")
+
     def __init__(self, array):
         self.array = as_indexable(array)
         self._copied = False
@@ -655,6 +677,8 @@ def __setitem__(self, key, value):
 
 
 class MemoryCachedArray(ExplicitlyIndexedNDArrayMixin):
+    __slots__ = ("array",)
+
     def __init__(self, array):
         self.array = _wrap_numpy_scalars(as_indexable(array))
 
@@ -783,18 +807,24 @@ def _combine_indexers(old_key, shape, new_key):
     )
 
 
-class IndexingSupport:  # could inherit from enum.Enum on Python 3
+@enum.unique
+class IndexingSupport(enum.Enum):
     # for backends that support only basic indexer
-    BASIC = "BASIC"
+    BASIC = 0
     # for backends that support basic / outer indexer
-    OUTER = "OUTER"
+    OUTER = 1
     # for backends that support outer indexer including at most 1 vector.
-    OUTER_1VECTOR = "OUTER_1VECTOR"
+    OUTER_1VECTOR = 2
     # for backends that support full vectorized indexer.
-    VECTORIZED = "VECTORIZED"
+    VECTORIZED = 3
 
 
-def explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method):
+def explicit_indexing_adapter(
+    key: ExplicitIndexer,
+    shape: Tuple[int, ...],
+    indexing_support: IndexingSupport,
+    raw_indexing_method: Callable,
+) -> Any:
     """Support explicit indexing by delegating to a raw indexing method.
 
     Outer and/or vectorized indexers are supported by indexing a second time
@@ -824,7 +854,9 @@ def explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method)
     return result
 
 
-def decompose_indexer(indexer, shape, indexing_support):
+def decompose_indexer(
+    indexer: ExplicitIndexer, shape: Tuple[int, ...], indexing_support: IndexingSupport
+) -> Tuple[ExplicitIndexer, ExplicitIndexer]:
     if isinstance(indexer, VectorizedIndexer):
         return _decompose_vectorized_indexer(indexer, shape, indexing_support)
     if isinstance(indexer, (BasicIndexer, OuterIndexer)):
@@ -848,7 +880,11 @@ def _decompose_slice(key, size):
         return slice(start, stop, -step), slice(None, None, -1)
 
 
-def _decompose_vectorized_indexer(indexer, shape, indexing_support):
+def _decompose_vectorized_indexer(
+    indexer: VectorizedIndexer,
+    shape: Tuple[int, ...],
+    indexing_support: IndexingSupport,
+) -> Tuple[ExplicitIndexer, ExplicitIndexer]:
     """
     Decompose vectorized indexer to the successive two indexers, where the
     first indexer will be used to index backend arrays, while the second one
@@ -884,45 +920,49 @@ def _decompose_vectorized_indexer(indexer, shape, indexing_support):
     if indexing_support is IndexingSupport.VECTORIZED:
         return indexer, BasicIndexer(())
 
-    backend_indexer = []
-    np_indexer = []
+    backend_indexer_elems = []
+    np_indexer_elems = []
     # convert negative indices
-    indexer = [
+    indexer_elems = [
         np.where(k < 0, k + s, k) if isinstance(k, np.ndarray) else k
         for k, s in zip(indexer.tuple, shape)
     ]
 
-    for k, s in zip(indexer, shape):
+    for k, s in zip(indexer_elems, shape):
         if isinstance(k, slice):
             # If it is a slice, then we will slice it as-is
             # (but make its step positive) in the backend,
             # and then use all of it (slice(None)) for the in-memory portion.
             bk_slice, np_slice = _decompose_slice(k, s)
-            backend_indexer.append(bk_slice)
-            np_indexer.append(np_slice)
+            backend_indexer_elems.append(bk_slice)
+            np_indexer_elems.append(np_slice)
         else:
             # If it is a (multidimensional) np.ndarray, just pickup the used
             # keys without duplication and store them as a 1d-np.ndarray.
             oind, vind = np.unique(k, return_inverse=True)
-            backend_indexer.append(oind)
-            np_indexer.append(vind.reshape(*k.shape))
+            backend_indexer_elems.append(oind)
+            np_indexer_elems.append(vind.reshape(*k.shape))
 
-    backend_indexer = OuterIndexer(tuple(backend_indexer))
-    np_indexer = VectorizedIndexer(tuple(np_indexer))
+    backend_indexer = OuterIndexer(tuple(backend_indexer_elems))
+    np_indexer = VectorizedIndexer(tuple(np_indexer_elems))
 
     if indexing_support is IndexingSupport.OUTER:
         return backend_indexer, np_indexer
 
     # If the backend does not support outer indexing,
     # backend_indexer (OuterIndexer) is also decomposed.
-    backend_indexer, np_indexer1 = _decompose_outer_indexer(
+    backend_indexer1, np_indexer1 = _decompose_outer_indexer(
         backend_indexer, shape, indexing_support
     )
     np_indexer = _combine_indexers(np_indexer1, shape, np_indexer)
-    return backend_indexer, np_indexer
+    return backend_indexer1, np_indexer
 
 
-def _decompose_outer_indexer(indexer, shape, indexing_support):
+def _decompose_outer_indexer(
+    indexer: Union[BasicIndexer, OuterIndexer],
+    shape: Tuple[int, ...],
+    indexing_support: IndexingSupport,
+) -> Tuple[ExplicitIndexer, ExplicitIndexer]:
     """
     Decompose outer indexer to the successive two indexers, where the
     first indexer will be used to index backend arrays, while the second one
@@ -930,7 +970,7 @@ def _decompose_outer_indexer(indexer, shape, indexing_support):
 
     Parameters
     ----------
-    indexer: VectorizedIndexer
+    indexer: OuterIndexer or BasicIndexer
     indexing_support: One of the entries of IndexingSupport
 
     Returns
@@ -968,7 +1008,7 @@ def _decompose_outer_indexer(indexer, shape, indexing_support):
             pos_indexer.append(k + s)
         else:
             pos_indexer.append(k)
-    indexer = pos_indexer
+    indexer_elems = pos_indexer
 
     if indexing_support is IndexingSupport.OUTER_1VECTOR:
         # some backends such as h5py supports only 1 vector in indexers
@@ -977,11 +1017,11 @@ def _decompose_outer_indexer(indexer, shape, indexing_support):
             (np.max(k) - np.min(k) + 1.0) / len(np.unique(k))
             if isinstance(k, np.ndarray)
             else 0
-            for k in indexer
+            for k in indexer_elems
         ]
         array_index = np.argmax(np.array(gains)) if len(gains) > 0 else None
 
-        for i, (k, s) in enumerate(zip(indexer, shape)):
+        for i, (k, s) in enumerate(zip(indexer_elems, shape)):
             if isinstance(k, np.ndarray) and i != array_index:
                 # np.ndarray key is converted to slice that covers the entire
                 # entries of this key.
@@ -1002,7 +1042,7 @@ def _decompose_outer_indexer(indexer, shape, indexing_support):
         return (OuterIndexer(tuple(backend_indexer)), OuterIndexer(tuple(np_indexer)))
 
     if indexing_support == IndexingSupport.OUTER:
-        for k, s in zip(indexer, shape):
+        for k, s in zip(indexer_elems, shape):
             if isinstance(k, slice):
                 # slice:  convert positive step slice for backend
                 bk_slice, np_slice = _decompose_slice(k, s)
@@ -1024,7 +1064,7 @@ def _decompose_outer_indexer(indexer, shape, indexing_support):
     # basic indexer
     assert indexing_support == IndexingSupport.BASIC
 
-    for k, s in zip(indexer, shape):
+    for k, s in zip(indexer_elems, shape):
         if isinstance(k, np.ndarray):
             # np.ndarray key is converted to slice that covers the entire
             # entries of this key.
@@ -1076,19 +1116,30 @@ def _logical_any(args):
     return functools.reduce(operator.or_, args)
 
 
-def _masked_result_drop_slice(key, chunks_hint=None):
+def _masked_result_drop_slice(key, data=None):
+
     key = (k for k in key if not isinstance(k, slice))
-    if chunks_hint is not None:
-        key = [
-            _dask_array_with_chunks_hint(k, chunks_hint)
-            if isinstance(k, np.ndarray)
-            else k
-            for k in key
-        ]
-    return _logical_any(k == -1 for k in key)
+    chunks_hint = getattr(data, "chunks", None)
+
+    new_keys = []
+    for k in key:
+        if isinstance(k, np.ndarray):
+            if isinstance(data, dask_array_type):
+                new_keys.append(_dask_array_with_chunks_hint(k, chunks_hint))
+            elif isinstance(data, sparse_array_type):
+                import sparse
+
+                new_keys.append(sparse.COO.from_numpy(k))
+            else:
+                new_keys.append(k)
+        else:
+            new_keys.append(k)
+
+    mask = _logical_any(k == -1 for k in new_keys)
+    return mask
 
 
-def create_mask(indexer, shape, chunks_hint=None):
+def create_mask(indexer, shape, data=None):
     """Create a mask for indexing with a fill-value.
 
     Parameters
@@ -1098,25 +1149,24 @@ def create_mask(indexer, shape, chunks_hint=None):
         the result that should be masked.
     shape : tuple
         Shape of the array being indexed.
-    chunks_hint : tuple, optional
-        Optional tuple indicating desired chunks for the result. If provided,
-        used as a hint for chunks on the resulting dask. Must have a hint for
-        each dimension on the result array.
+    data : optional
+        Data for which mask is being created. If data is a dask arrays, its chunks
+        are used as a hint for chunks on the resulting mask. If data is a sparse
+        array, the returned mask is also a sparse array.
 
     Returns
     -------
-    mask : bool, np.ndarray or dask.array.Array with dtype=bool
-        Dask array if chunks_hint is provided, otherwise a NumPy array. Has the
-        same shape as the indexing result.
+    mask : bool, np.ndarray, SparseArray or dask.array.Array with dtype=bool
+        Same type as data. Has the same shape as the indexing result.
     """
     if isinstance(indexer, OuterIndexer):
         key = _outer_to_vectorized_indexer(indexer, shape).tuple
         assert not any(isinstance(k, slice) for k in key)
-        mask = _masked_result_drop_slice(key, chunks_hint)
+        mask = _masked_result_drop_slice(key, data)
 
     elif isinstance(indexer, VectorizedIndexer):
         key = indexer.tuple
-        base_mask = _masked_result_drop_slice(key, chunks_hint)
+        base_mask = _masked_result_drop_slice(key, data)
         slice_shape = tuple(
             np.arange(*k.indices(size)).size
             for k, size in zip(key, shape)
@@ -1189,6 +1239,8 @@ def posify_mask_indexer(indexer):
 class NumpyIndexingAdapter(ExplicitlyIndexedNDArrayMixin):
     """Wrap a NumPy array to use explicit indexing."""
 
+    __slots__ = ("array",)
+
     def __init__(self, array):
         # In NumpyIndexingAdapter we only allow to store bare np.ndarray
         if not isinstance(array, np.ndarray):
@@ -1239,6 +1291,8 @@ def __setitem__(self, key, value):
 
 
 class NdArrayLikeIndexingAdapter(NumpyIndexingAdapter):
+    __slots__ = ("array",)
+
     def __init__(self, array):
         if not hasattr(array, "__array_function__"):
             raise TypeError(
@@ -1251,6 +1305,8 @@ def __init__(self, array):
 class DaskIndexingAdapter(ExplicitlyIndexedNDArrayMixin):
     """Wrap a dask array to support explicit indexing."""
 
+    __slots__ = ("array",)
+
     def __init__(self, array):
         """ This adapter is created in Variable.__getitem__ in
         Variable._broadcast_indexes.
@@ -1292,6 +1348,8 @@ class PandasIndexAdapter(ExplicitlyIndexedNDArrayMixin):
     """Wrap a pandas.Index to preserve dtypes and handle explicit indexing.
     """
 
+    __slots__ = ("array", "_dtype")
+
     def __init__(self, array: Any, dtype: DTypeLike = None):
         self.array = utils.safe_cast_to_index(array)
         if dtype is None:
diff --git a/xarray/core/merge.py b/xarray/core/merge.py
index 882667dbaaa..6dba659f992 100644
--- a/xarray/core/merge.py
+++ b/xarray/core/merge.py
@@ -1,5 +1,6 @@
 from collections import OrderedDict
 from typing import (
+    TYPE_CHECKING,
     Any,
     Dict,
     Hashable,
@@ -11,7 +12,6 @@
     Set,
     Tuple,
     Union,
-    TYPE_CHECKING,
 )
 
 import pandas as pd
@@ -44,6 +44,7 @@
         "broadcast_equals": 2,
         "minimal": 3,
         "no_conflicts": 4,
+        "override": 5,
     }
 )
 
@@ -70,8 +71,8 @@ class MergeError(ValueError):
     # TODO: move this to an xarray.exceptions module?
 
 
-def unique_variable(name, variables, compat="broadcast_equals"):
-    # type: (Any, List[Variable], str) -> Variable
+def unique_variable(name, variables, compat="broadcast_equals", equals=None):
+    # type: (Any, List[Variable], str, bool) -> Variable
     """Return the unique variable from a list of variables or raise MergeError.
 
     Parameters
@@ -81,8 +82,10 @@ def unique_variable(name, variables, compat="broadcast_equals"):
     variables : list of xarray.Variable
         List of Variable objects, all of which go by the same name in different
         inputs.
-    compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts'}, optional
+    compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts', 'override'}, optional
         Type of equality check to use.
+    equals: None or bool,
+        corresponding to result of compat test
 
     Returns
     -------
@@ -93,30 +96,38 @@ def unique_variable(name, variables, compat="broadcast_equals"):
     MergeError: if any of the variables are not equal.
     """  # noqa
     out = variables[0]
-    if len(variables) > 1:
-        combine_method = None
 
-        if compat == "minimal":
-            compat = "broadcast_equals"
+    if len(variables) == 1 or compat == "override":
+        return out
+
+    combine_method = None
+
+    if compat == "minimal":
+        compat = "broadcast_equals"
+
+    if compat == "broadcast_equals":
+        dim_lengths = broadcast_dimension_size(variables)
+        out = out.set_dims(dim_lengths)
 
-        if compat == "broadcast_equals":
-            dim_lengths = broadcast_dimension_size(variables)
-            out = out.set_dims(dim_lengths)
+    if compat == "no_conflicts":
+        combine_method = "fillna"
 
-        if compat == "no_conflicts":
-            combine_method = "fillna"
+    if equals is None:
+        out = out.compute()
+        for var in variables[1:]:
+            equals = getattr(out, compat)(var)
+            if not equals:
+                break
+
+    if not equals:
+        raise MergeError(
+            "conflicting values for variable %r on objects to be combined. You can skip this check by specifying compat='override'."
+            % (name)
+        )
 
+    if combine_method:
         for var in variables[1:]:
-            if not getattr(out, compat)(var):
-                raise MergeError(
-                    "conflicting values for variable %r on "
-                    "objects to be combined:\n"
-                    "first value: %r\nsecond value: %r" % (name, out, var)
-                )
-            if combine_method:
-                # TODO: add preservation of attrs into fillna
-                out = getattr(out, combine_method)(var)
-                out.attrs = var.attrs
+            out = getattr(out, combine_method)(var)
 
     return out
 
@@ -152,7 +163,7 @@ def merge_variables(
     priority_vars : mapping with Variable or None values, optional
         If provided, variables are always taken from this dict in preference to
         the input variable dictionaries, without checking for conflicts.
-    compat : {'identical', 'equals', 'broadcast_equals', 'minimal', 'no_conflicts'}, optional
+    compat : {'identical', 'equals', 'broadcast_equals', 'minimal', 'no_conflicts', 'override'}, optional
         Type of equality check to use when checking for conflicts.
 
     Returns
@@ -449,7 +460,7 @@ def merge_core(
     ----------
     objs : list of mappings
         All values must be convertable to labeled arrays.
-    compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts'}, optional
+    compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts', 'override'}, optional
         Compatibility checks to use when merging variables.
     join : {'outer', 'inner', 'left', 'right'}, optional
         How to combine objects with different indexes.
@@ -519,7 +530,7 @@ def merge(objects, compat="no_conflicts", join="outer", fill_value=dtypes.NA):
     objects : Iterable[Union[xarray.Dataset, xarray.DataArray, dict]]
         Merge together all variables from these objects. If any of them are
         DataArray objects, they must have a name.
-    compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts'}, optional
+    compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts', 'override'}, optional
         String indicating how to compare variables of the same name for
         potential conflicts:
 
@@ -531,6 +542,7 @@ def merge(objects, compat="no_conflicts", join="outer", fill_value=dtypes.NA):
         - 'no_conflicts': only values which are not null in both datasets
           must be equal. The returned dataset then contains the combination
           of all non-null values.
+        - 'override': skip comparing and pick variable from first dataset
     join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
         String indicating how to combine differing indexes in objects.
 
diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py
index 9ba4eae29ae..17240faf007 100644
--- a/xarray/core/nanops.py
+++ b/xarray/core/nanops.py
@@ -88,38 +88,21 @@ def nanmax(a, axis=None, out=None):
 
 
 def nanargmin(a, axis=None):
-    fill_value = dtypes.get_pos_infinity(a.dtype)
     if a.dtype.kind == "O":
+        fill_value = dtypes.get_pos_infinity(a.dtype)
         return _nan_argminmax_object("argmin", fill_value, a, axis=axis)
-    a, mask = _replace_nan(a, fill_value)
-    if isinstance(a, dask_array_type):
-        res = dask_array.argmin(a, axis=axis)
-    else:
-        res = np.argmin(a, axis=axis)
 
-    if mask is not None:
-        mask = mask.all(axis=axis)
-        if mask.any():
-            raise ValueError("All-NaN slice encountered")
-    return res
+    module = dask_array if isinstance(a, dask_array_type) else nputils
+    return module.nanargmin(a, axis=axis)
 
 
 def nanargmax(a, axis=None):
-    fill_value = dtypes.get_neg_infinity(a.dtype)
     if a.dtype.kind == "O":
+        fill_value = dtypes.get_neg_infinity(a.dtype)
         return _nan_argminmax_object("argmax", fill_value, a, axis=axis)
 
-    a, mask = _replace_nan(a, fill_value)
-    if isinstance(a, dask_array_type):
-        res = dask_array.argmax(a, axis=axis)
-    else:
-        res = np.argmax(a, axis=axis)
-
-    if mask is not None:
-        mask = mask.all(axis=axis)
-        if mask.any():
-            raise ValueError("All-NaN slice encountered")
-    return res
+    module = dask_array if isinstance(a, dask_array_type) else nputils
+    return module.nanargmax(a, axis=axis)
 
 
 def nansum(a, axis=None, dtype=None, out=None, min_count=None):
diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py
index ecaadae726e..22c14d9ff40 100644
--- a/xarray/core/npcompat.py
+++ b/xarray/core/npcompat.py
@@ -29,10 +29,10 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import builtins
+import operator
 from distutils.version import LooseVersion
 from typing import Union
 
-import operator
 import numpy as np
 
 try:
diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py
index a9971e7125a..df36c98f94c 100644
--- a/xarray/core/nputils.py
+++ b/xarray/core/nputils.py
@@ -209,6 +209,7 @@ def f(values, axis=None, **kwargs):
 
         if (
             _USE_BOTTLENECK
+            and isinstance(values, np.ndarray)
             and bn_func is not None
             and not isinstance(axis, tuple)
             and values.dtype.kind in "uifc"
@@ -236,3 +237,5 @@ def f(values, axis=None, **kwargs):
 nanprod = _create_bottleneck_method("nanprod")
 nancumsum = _create_bottleneck_method("nancumsum")
 nancumprod = _create_bottleneck_method("nancumprod")
+nanargmin = _create_bottleneck_method("nanargmin")
+nanargmax = _create_bottleneck_method("nanargmax")
diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py
index 654a43b505e..91998482e3e 100644
--- a/xarray/core/pdcompat.py
+++ b/xarray/core/pdcompat.py
@@ -38,10 +38,10 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from distutils.version import LooseVersion
+
 import numpy as np
 import pandas as pd
 
-
 # allow ourselves to type checks for Panel even after it's removed
 if LooseVersion(pd.__version__) < "0.25.0":
     Panel = pd.Panel
diff --git a/xarray/core/resample.py b/xarray/core/resample.py
index de70ebb6950..1f2e5c0be43 100644
--- a/xarray/core/resample.py
+++ b/xarray/core/resample.py
@@ -1,5 +1,5 @@
 from . import ops
-from .groupby import DEFAULT_DIMS, DataArrayGroupBy, DatasetGroupBy
+from .groupby import DataArrayGroupBy, DatasetGroupBy
 
 RESAMPLE_DIM = "__resample_dim__"
 
@@ -307,9 +307,6 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs):
             Array with summarized data and the indicated dimension(s)
             removed.
         """
-        if dim == DEFAULT_DIMS:
-            dim = None
-
         return super().reduce(func, dim, keep_attrs, **kwargs)
 
 
diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py
index 592cae9007e..a812e7472ca 100644
--- a/xarray/core/rolling.py
+++ b/xarray/core/rolling.py
@@ -43,7 +43,8 @@ class Rolling:
     DataArray.rolling
     """
 
-    _attributes = ["window", "min_periods", "center", "dim"]
+    __slots__ = ("obj", "window", "min_periods", "center", "dim")
+    _attributes = ("window", "min_periods", "center", "dim")
 
     def __init__(self, obj, windows, min_periods=None, center=False):
         """
@@ -93,17 +94,17 @@ def __init__(self, obj, windows, min_periods=None, center=False):
 
         # attributes
         self.window = window
+        if min_periods is not None and min_periods <= 0:
+            raise ValueError("min_periods must be greater than zero or None")
         self.min_periods = min_periods
-        if min_periods is None:
-            self._min_periods = window
-        else:
-            if min_periods <= 0:
-                raise ValueError("min_periods must be greater than zero or None")
 
-            self._min_periods = min_periods
         self.center = center
         self.dim = dim
 
+    @property
+    def _min_periods(self):
+        return self.min_periods if self.min_periods is not None else self.window
+
     def __repr__(self):
         """provide a nice str repr of our rolling object"""
 
@@ -152,6 +153,8 @@ def count(self):
 
 
 class DataArrayRolling(Rolling):
+    __slots__ = ("window_labels",)
+
     def __init__(self, obj, windows, min_periods=None, center=False):
         """
         Moving window object for DataArray.
@@ -381,6 +384,8 @@ def _numpy_or_bottleneck_reduce(
 
 
 class DatasetRolling(Rolling):
+    __slots__ = ("rollings",)
+
     def __init__(self, obj, windows, min_periods=None, center=False):
         """
         Moving window object for Dataset.
@@ -516,7 +521,8 @@ class Coarsen:
     DataArray.coarsen
     """
 
-    _attributes = ["windows", "side", "trim_excess"]
+    __slots__ = ("obj", "boundary", "coord_func", "windows", "side", "trim_excess")
+    _attributes = ("windows", "side", "trim_excess")
 
     def __init__(self, obj, windows, boundary, side, coord_func):
         """
@@ -569,6 +575,8 @@ def __repr__(self):
 
 
 class DataArrayCoarsen(Coarsen):
+    __slots__ = ()
+
     @classmethod
     def _reduce_method(cls, func):
         """
@@ -599,6 +607,8 @@ def wrapped_func(self, **kwargs):
 
 
 class DatasetCoarsen(Coarsen):
+    __slots__ = ()
+
     @classmethod
     def _reduce_method(cls, func):
         """
diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index ba478686d61..0d730edeaeb 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -29,27 +29,18 @@
 import numpy as np
 import pandas as pd
 
-from .pycompat import dask_array_type
-
-
 K = TypeVar("K")
 V = TypeVar("V")
 T = TypeVar("T")
 
 
-def _check_inplace(inplace: Optional[bool], default: bool = False) -> bool:
-    if inplace is None:
-        inplace = default
-    else:
-        warnings.warn(
-            "The inplace argument has been deprecated and will be "
-            "removed in a future version of xarray.",
-            FutureWarning,
-            stacklevel=3,
+def _check_inplace(inplace: Optional[bool]) -> None:
+    if inplace is not None:
+        raise TypeError(
+            "The `inplace` argument has been removed from xarray. "
+            "You can achieve an identical effect with python's standard assignment."
         )
 
-    return inplace
-
 
 def alias_message(old_name: str, new_name: str) -> str:
     return "%s has been deprecated. Use %s instead." % (old_name, new_name)
@@ -276,16 +267,20 @@ def either_dict_or_kwargs(
         return cast(Mapping[Hashable, T], kw_kwargs)
 
 
-def is_scalar(value: Any) -> bool:
+def is_scalar(value: Any, include_0d: bool = True) -> bool:
     """Whether to treat a value as a scalar.
 
     Any non-iterable, string, or 0-D array
     """
+    from .variable import NON_NUMPY_SUPPORTED_ARRAY_TYPES
+
+    if include_0d:
+        include_0d = getattr(value, "ndim", None) == 0
     return (
-        getattr(value, "ndim", None) == 0
+        include_0d
         or isinstance(value, (str, bytes))
         or not (
-            isinstance(value, (Iterable,) + dask_array_type)
+            isinstance(value, (Iterable,) + NON_NUMPY_SUPPORTED_ARRAY_TYPES)
             or hasattr(value, "__array_function__")
         )
     )
@@ -381,7 +376,7 @@ class Frozen(Mapping[K, V]):
     saved under the `mapping` attribute.
     """
 
-    __slots__ = ["mapping"]
+    __slots__ = ("mapping",)
 
     def __init__(self, mapping: Mapping[K, V]):
         self.mapping = mapping
@@ -412,7 +407,7 @@ class SortedKeysDict(MutableMapping[K, V]):
     mapping.
     """
 
-    __slots__ = ["mapping"]
+    __slots__ = ("mapping",)
 
     def __init__(self, mapping: MutableMapping[K, V] = None):
         self.mapping = {} if mapping is None else mapping
@@ -446,6 +441,8 @@ class OrderedSet(MutableSet[T]):
     elements, like an OrderedDict.
     """
 
+    __slots__ = ("_ordered_dict",)
+
     def __init__(self, values: AbstractSet[T] = None):
         self._ordered_dict = OrderedDict()  # type: MutableMapping[T, None]
         if values is not None:
@@ -486,6 +483,8 @@ class NdimSizeLenMixin:
     one that also defines ``ndim``, ``size`` and ``__len__``.
     """
 
+    __slots__ = ()
+
     @property
     def ndim(self: Any) -> int:
         return len(self.shape)
@@ -510,6 +509,8 @@ class NDArrayMixin(NdimSizeLenMixin):
     `dtype`, `shape` and `__getitem__`.
     """
 
+    __slots__ = ()
+
     @property
     def dtype(self: Any) -> np.dtype:
         return self.array.dtype
@@ -623,6 +624,8 @@ class HiddenKeyDict(MutableMapping[K, V]):
     """Acts like a normal dictionary, but hides certain keys.
     """
 
+    __slots__ = ("_data", "_hidden_keys")
+
     # ``__init__`` method required to create instance from class.
 
     def __init__(self, data: MutableMapping[K, V], hidden_keys: Iterable[K]):
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 4c095f3a062..b4b01f7ee49 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -3,7 +3,7 @@
 from collections import OrderedDict, defaultdict
 from datetime import timedelta
 from distutils.version import LooseVersion
-from typing import Any, Hashable, Mapping, MutableMapping, Union
+from typing import Any, Hashable, Mapping, Union
 
 import numpy as np
 import pandas as pd
@@ -18,9 +18,9 @@
     VectorizedIndexer,
     as_indexable,
 )
+from .npcompat import IS_NEP18_ACTIVE
 from .options import _get_keep_attrs
 from .pycompat import dask_array_type, integer_types
-from .npcompat import IS_NEP18_ACTIVE
 from .utils import (
     OrderedSet,
     decode_numpy_dict_values,
@@ -267,6 +267,8 @@ class Variable(
     they can use more complete metadata in context of coordinate labels.
     """
 
+    __slots__ = ("_dims", "_data", "_attrs", "_encoding")
+
     def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False):
         """
         Parameters
@@ -710,8 +712,7 @@ def _getitem_with_mask(self, key, fill_value=dtypes.NA):
                 actual_indexer = indexer
 
             data = as_indexable(self._data)[actual_indexer]
-            chunks_hint = getattr(data, "chunks", None)
-            mask = indexing.create_mask(indexer, self.shape, chunks_hint)
+            mask = indexing.create_mask(indexer, self.shape, data)
             data = duck_array_ops.where(mask, fill_value, data)
         else:
             # array cannot be indexed along dimensions of size 0, so just
@@ -1225,16 +1226,6 @@ def transpose(self, *dims) -> "Variable":
     def T(self) -> "Variable":
         return self.transpose()
 
-    def expand_dims(self, *args):
-        import warnings
-
-        warnings.warn(
-            "Variable.expand_dims is deprecated: use " "Variable.set_dims instead",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        return self.expand_dims(*args)
-
     def set_dims(self, dims, shape=None):
         """Return a new variable with given set of dimensions.
         This method might be used to attach new dimension(s) to variable.
@@ -1601,7 +1592,7 @@ def no_conflicts(self, other):
         """
         return self.broadcast_equals(other, equiv=duck_array_ops.array_notnull_equiv)
 
-    def quantile(self, q, dim=None, interpolation="linear"):
+    def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None):
         """Compute the qth quantile of the data along the specified dimension.
 
         Returns the qth quantiles(s) of the array elements.
@@ -1624,6 +1615,10 @@ def quantile(self, q, dim=None, interpolation="linear"):
                 * higher: ``j``.
                 * nearest: ``i`` or ``j``, whichever is nearest.
                 * midpoint: ``(i + j) / 2``.
+        keep_attrs : bool, optional
+            If True, the variable's attributes (`attrs`) will be copied from
+            the original object to the new one.  If False (default), the new
+            object will be returned without attributes.
 
         Returns
         -------
@@ -1632,7 +1627,7 @@ def quantile(self, q, dim=None, interpolation="linear"):
             is a scalar. If multiple percentiles are given, first axis of
             the result corresponds to the quantile and a quantile dimension
             is added to the return array. The other dimensions are the
-             dimensions that remain after the reduction of the array.
+            dimensions that remain after the reduction of the array.
 
         See Also
         --------
@@ -1660,14 +1655,19 @@ def quantile(self, q, dim=None, interpolation="linear"):
             axis = None
             new_dims = []
 
-        # only add the quantile dimension if q is array like
+        # Only add the quantile dimension if q is array-like
         if q.ndim != 0:
             new_dims = ["quantile"] + new_dims
 
         qs = np.nanpercentile(
             self.data, q * 100.0, axis=axis, interpolation=interpolation
         )
-        return Variable(new_dims, qs)
+
+        if keep_attrs is None:
+            keep_attrs = _get_keep_attrs(default=False)
+        attrs = self._attrs if keep_attrs else None
+
+        return Variable(new_dims, qs, attrs)
 
     def rank(self, dim, pct=False):
         """Ranks the data.
@@ -1697,18 +1697,24 @@ def rank(self, dim, pct=False):
         """
         import bottleneck as bn
 
-        if isinstance(self.data, dask_array_type):
+        data = self.data
+
+        if isinstance(data, dask_array_type):
             raise TypeError(
                 "rank does not work for arrays stored as dask "
                 "arrays. Load the data via .compute() or .load() "
                 "prior to calling this method."
             )
+        elif not isinstance(data, np.ndarray):
+            raise TypeError(
+                "rank is not implemented for {} objects.".format(type(data))
+            )
 
         axis = self.get_axis_num(dim)
         func = bn.nanrankdata if self.dtype.kind == "f" else bn.rankdata
-        ranked = func(self.data, axis=axis)
+        ranked = func(data, axis=axis)
         if pct:
-            count = np.sum(~np.isnan(self.data), axis=axis, keepdims=True)
+            count = np.sum(~np.isnan(data), axis=axis, keepdims=True)
             ranked /= count
         return Variable(self.dims, ranked)
 
@@ -1931,6 +1937,8 @@ class IndexVariable(Variable):
     unless another name is given.
     """
 
+    __slots__ = ()
+
     def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False):
         super().__init__(dims, data, attrs, encoding, fastpath)
         if self.ndim != 1:
diff --git a/xarray/plot/facetgrid.py b/xarray/plot/facetgrid.py
index 79f94077c8f..ec51ff26c07 100644
--- a/xarray/plot/facetgrid.py
+++ b/xarray/plot/facetgrid.py
@@ -67,7 +67,6 @@ class FacetGrid:
         Contains dictionaries mapping coordinate names to values. None is
         used as a sentinel value for axes which should remain empty, ie.
         sometimes the bottom right grid
-
     """
 
     def __init__(
diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py
index 14f03d42fe7..8ca62ef58f1 100644
--- a/xarray/plot/plot.py
+++ b/xarray/plot/plot.py
@@ -452,6 +452,8 @@ class _PlotMethods:
     For example, DataArray.plot.imshow
     """
 
+    __slots__ = ("_da",)
+
     def __init__(self, darray):
         self._da = darray
 
diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py
index 2d50734f519..f69a8af7a2f 100644
--- a/xarray/plot/utils.py
+++ b/xarray/plot/utils.py
@@ -731,17 +731,15 @@ def _process_cmap_cbar_kwargs(
 
     # colors is only valid when levels is supplied or the plot is of type
     # contour or contourf
-    if colors and (("contour" not in func.__name__) and (not levels)):
+    if colors and (("contour" not in func.__name__) and (levels is None)):
         raise ValueError("Can only specify colors with contour or levels")
 
     # we should not be getting a list of colors in cmap anymore
     # is there a better way to do this test?
     if isinstance(cmap, (list, tuple)):
-        warnings.warn(
+        raise ValueError(
             "Specifying a list of colors in cmap is deprecated. "
-            "Use colors keyword instead.",
-            DeprecationWarning,
-            stacklevel=3,
+            "Use colors keyword instead."
         )
 
     cmap_kwargs = {
diff --git a/xarray/testing.py b/xarray/testing.py
index 3c92eef04c6..9fa58b64001 100644
--- a/xarray/testing.py
+++ b/xarray/testing.py
@@ -5,12 +5,11 @@
 import numpy as np
 import pandas as pd
 
-from xarray.core import duck_array_ops
-from xarray.core import formatting
+from xarray.core import duck_array_ops, formatting
 from xarray.core.dataarray import DataArray
 from xarray.core.dataset import Dataset
-from xarray.core.variable import IndexVariable, Variable
 from xarray.core.indexes import default_indexes
+from xarray.core.variable import IndexVariable, Variable
 
 
 def _decode_string_data(data):
@@ -198,8 +197,6 @@ def _assert_dataarray_invariants(da: DataArray):
     if da._indexes is not None:
         _assert_indexes_invariants_checks(da._indexes, da._coords, da.dims)
 
-    assert da._initialized is True
-
 
 def _assert_dataset_invariants(ds: Dataset):
     assert isinstance(ds._variables, OrderedDict), type(ds._variables)
@@ -236,7 +233,6 @@ def _assert_dataset_invariants(ds: Dataset):
 
     assert isinstance(ds._encoding, (type(None), dict))
     assert isinstance(ds._attrs, (type(None), OrderedDict))
-    assert ds._initialized is True
 
 
 def _assert_internal_invariants(xarray_obj: Union[DataArray, Dataset, Variable],):
diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
index 044ba75e87f..ab1d2714b9d 100644
--- a/xarray/tests/__init__.py
+++ b/xarray/tests/__init__.py
@@ -1,4 +1,5 @@
 import importlib
+import platform
 import re
 import warnings
 from contextlib import contextmanager
@@ -32,7 +33,6 @@
 except ImportError:
     pass
 
-import platform
 
 arm_xfail = pytest.mark.xfail(
     platform.machine() == "aarch64" or "arm" in platform.machine(),
@@ -84,6 +84,7 @@ def LooseVersion(vstring):
 has_iris, requires_iris = _importorskip("iris")
 has_cfgrib, requires_cfgrib = _importorskip("cfgrib")
 has_numbagg, requires_numbagg = _importorskip("numbagg")
+has_sparse, requires_sparse = _importorskip("sparse")
 
 # some special cases
 has_h5netcdf07, requires_h5netcdf07 = _importorskip("h5netcdf", minversion="0.7")
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index dd102f8e2e1..f6254b32f4f 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -38,6 +38,7 @@
 from xarray.tests import mock
 
 from . import (
+    arm_xfail,
     assert_allclose,
     assert_array_equal,
     assert_equal,
@@ -61,14 +62,13 @@
     requires_scipy,
     requires_scipy_or_netCDF4,
     requires_zarr,
-    arm_xfail,
 )
 from .test_coding_times import (
     _ALL_CALENDARS,
     _NON_STANDARD_CALENDARS,
     _STANDARD_CALENDARS,
 )
-from .test_dataset import create_test_data, create_append_test_data
+from .test_dataset import create_append_test_data, create_test_data
 
 try:
     import netCDF4 as nc4
@@ -2163,6 +2163,7 @@ def test_encoding_unlimited_dims(self):
 
 @requires_h5netcdf
 @requires_netCDF4
+@pytest.mark.filterwarnings("ignore:use make_scale(name) instead")
 class TestH5NetCDFData(NetCDF4Base):
     engine = "h5netcdf"
 
@@ -2173,16 +2174,25 @@ def create_store(self):
 
     @pytest.mark.filterwarnings("ignore:complex dtypes are supported by h5py")
     @pytest.mark.parametrize(
-        "invalid_netcdf, warns, num_warns",
+        "invalid_netcdf, warntype, num_warns",
         [(None, FutureWarning, 1), (False, FutureWarning, 1), (True, None, 0)],
     )
-    def test_complex(self, invalid_netcdf, warns, num_warns):
+    def test_complex(self, invalid_netcdf, warntype, num_warns):
         expected = Dataset({"x": ("y", np.ones(5) + 1j * np.ones(5))})
         save_kwargs = {"invalid_netcdf": invalid_netcdf}
-        with pytest.warns(warns) as record:
+        with pytest.warns(warntype) as record:
             with self.roundtrip(expected, save_kwargs=save_kwargs) as actual:
                 assert_equal(expected, actual)
-        assert len(record) == num_warns
+
+        recorded_num_warns = 0
+        if warntype:
+            for warning in record:
+                if issubclass(warning.category, warntype) and (
+                    "complex dtypes" in str(warning.message)
+                ):
+                    recorded_num_warns += 1
+
+        assert recorded_num_warns == num_warns
 
     def test_cross_engine_read_write_netcdf4(self):
         # Drop dim3, because its labels include strings. These appear to be
@@ -2451,6 +2461,7 @@ def skip_if_not_engine(engine):
 
 
 @requires_dask
+@pytest.mark.filterwarnings("ignore:use make_scale(name) instead")
 def test_open_mfdataset_manyfiles(
     readengine, nfiles, parallel, chunks, file_cache_maxsize
 ):
@@ -3923,6 +3934,12 @@ def test_ENVI_tags(self):
                 assert isinstance(rioda.attrs["map_info"], str)
                 assert isinstance(rioda.attrs["samples"], str)
 
+    def test_geotiff_tags(self):
+        # Create a geotiff file with some tags
+        with create_tmp_geotiff() as (tmp_file, _):
+            with xr.open_rasterio(tmp_file) as rioda:
+                assert isinstance(rioda.attrs["AREA_OR_POINT"], str)
+
     def test_no_mftime(self):
         # rasterio can accept "filename" urguments that are actually urls,
         # including paths to remote files.
diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index ab5ed20d531..615a7e00172 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -19,13 +19,13 @@
 from xarray.testing import assert_equal
 
 from . import (
+    arm_xfail,
     assert_array_equal,
     has_cftime,
     has_cftime_or_netCDF4,
     has_dask,
     requires_cftime,
     requires_cftime_or_netCDF4,
-    arm_xfail,
 )
 
 try:
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index e3801d02bc8..6037669ac07 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -1,23 +1,29 @@
 from collections import OrderedDict
-from itertools import product
 from datetime import datetime
+from itertools import product
 
 import numpy as np
 import pytest
 
-from xarray import DataArray, Dataset, concat, combine_by_coords, combine_nested
-from xarray import auto_combine
+from xarray import (
+    DataArray,
+    Dataset,
+    auto_combine,
+    combine_by_coords,
+    combine_nested,
+    concat,
+)
 from xarray.core import dtypes
 from xarray.core.combine import (
-    _new_tile_id,
     _check_shape_tile_ids,
     _combine_all_along_first_dim,
     _combine_nd,
-    _infer_concat_order_from_positions,
     _infer_concat_order_from_coords,
+    _infer_concat_order_from_positions,
+    _new_tile_id,
 )
 
-from . import assert_identical, assert_equal, raises_regex
+from . import assert_equal, assert_identical, raises_regex
 from .test_dataset import create_test_data
 
 
@@ -321,13 +327,13 @@ class TestCheckShapeTileIDs:
     def test_check_depths(self):
         ds = create_test_data(0)
         combined_tile_ids = {(0,): ds, (0, 1): ds}
-        with raises_regex(ValueError, "sub-lists do not have " "consistent depths"):
+        with raises_regex(ValueError, "sub-lists do not have consistent depths"):
             _check_shape_tile_ids(combined_tile_ids)
 
     def test_check_lengths(self):
         ds = create_test_data(0)
         combined_tile_ids = {(0, 0): ds, (0, 1): ds, (0, 2): ds, (1, 0): ds, (1, 1): ds}
-        with raises_regex(ValueError, "sub-lists do not have " "consistent lengths"):
+        with raises_regex(ValueError, "sub-lists do not have consistent lengths"):
             _check_shape_tile_ids(combined_tile_ids)
 
 
@@ -559,11 +565,6 @@ def test_combine_concat_over_redundant_nesting(self):
         expected = Dataset({"x": [0]})
         assert_identical(expected, actual)
 
-    def test_combine_nested_but_need_auto_combine(self):
-        objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2], "wall": [0]})]
-        with raises_regex(ValueError, "cannot be combined"):
-            combine_nested(objs, concat_dim="x")
-
     @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0])
     def test_combine_nested_fill_value(self, fill_value):
         datasets = [
@@ -612,7 +613,7 @@ def test_combine_by_coords(self):
         assert_equal(actual, expected)
 
         objs = [Dataset({"x": 0}), Dataset({"x": 1})]
-        with raises_regex(ValueError, "Could not find any dimension " "coordinates"):
+        with raises_regex(ValueError, "Could not find any dimension coordinates"):
             combine_by_coords(objs)
 
         objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})]
@@ -713,7 +714,7 @@ def test_check_for_impossible_ordering(self):
 
 
 @pytest.mark.filterwarnings(
-    "ignore:In xarray version 0.13 `auto_combine` " "will be deprecated"
+    "ignore:In xarray version 0.14 `auto_combine` " "will be deprecated"
 )
 @pytest.mark.filterwarnings("ignore:Also `open_mfdataset` will no longer")
 @pytest.mark.filterwarnings("ignore:The datasets supplied")
@@ -755,7 +756,7 @@ def test_auto_combine(self):
             auto_combine(objs)
 
         objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})]
-        with pytest.raises(KeyError):
+        with raises_regex(ValueError, "'y' is not present in all datasets"):
             auto_combine(objs)
 
     def test_auto_combine_previously_failed(self):
diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py
index 4adcc0d5c49..00428f70966 100644
--- a/xarray/tests/test_concat.py
+++ b/xarray/tests/test_concat.py
@@ -5,7 +5,7 @@
 import pytest
 
 from xarray import DataArray, Dataset, Variable, concat
-from xarray.core import dtypes
+from xarray.core import dtypes, merge
 from . import (
     InaccessibleArray,
     assert_array_equal,
@@ -17,6 +17,34 @@
 from .test_dataset import create_test_data
 
 
+def test_concat_compat():
+    ds1 = Dataset(
+        {
+            "has_x_y": (("y", "x"), [[1, 2]]),
+            "has_x": ("x", [1, 2]),
+            "no_x_y": ("z", [1, 2]),
+        },
+        coords={"x": [0, 1], "y": [0], "z": [-1, -2]},
+    )
+    ds2 = Dataset(
+        {
+            "has_x_y": (("y", "x"), [[3, 4]]),
+            "has_x": ("x", [1, 2]),
+            "no_x_y": (("q", "z"), [[1, 2]]),
+        },
+        coords={"x": [0, 1], "y": [1], "z": [-1, -2], "q": [0]},
+    )
+
+    result = concat([ds1, ds2], dim="y", data_vars="minimal", compat="broadcast_equals")
+    assert_equal(ds2.no_x_y, result.no_x_y.transpose())
+
+    for var in ["has_x", "no_x_y"]:
+        assert "y" not in result[var]
+
+    with raises_regex(ValueError, "'q' is not present in all datasets"):
+        concat([ds1, ds2], dim="q", data_vars="all", compat="broadcast_equals")
+
+
 class TestConcatDataset:
     @pytest.fixture
     def data(self):
@@ -91,7 +119,7 @@ def test_concat_coords(self):
             actual = concat(objs, dim="x", coords=coords)
             assert_identical(expected, actual)
         for coords in ["minimal", []]:
-            with raises_regex(ValueError, "not equal across"):
+            with raises_regex(merge.MergeError, "conflicting values"):
                 concat(objs, dim="x", coords=coords)
 
     def test_concat_constant_index(self):
@@ -102,8 +130,10 @@ def test_concat_constant_index(self):
         for mode in ["different", "all", ["foo"]]:
             actual = concat([ds1, ds2], "y", data_vars=mode)
             assert_identical(expected, actual)
-        with raises_regex(ValueError, "not equal across datasets"):
-            concat([ds1, ds2], "y", data_vars="minimal")
+        with raises_regex(merge.MergeError, "conflicting values"):
+            # previously dim="y", and raised error which makes no sense.
+            # "foo" has dimension "y" so minimal should concatenate it?
+            concat([ds1, ds2], "new_dim", data_vars="minimal")
 
     def test_concat_size0(self):
         data = create_test_data()
@@ -133,6 +163,14 @@ def test_concat_errors(self):
         data = create_test_data()
         split_data = [data.isel(dim1=slice(3)), data.isel(dim1=slice(3, None))]
 
+        with raises_regex(ValueError, "must supply at least one"):
+            concat([], "dim1")
+
+        with raises_regex(ValueError, "Cannot specify both .*='different'"):
+            concat(
+                [data, data], dim="concat_dim", data_vars="different", compat="override"
+            )
+
         with raises_regex(ValueError, "must supply at least one"):
             concat([], "dim1")
 
@@ -145,7 +183,7 @@ def test_concat_errors(self):
             concat([data0, data1], "dim1", compat="identical")
         assert_identical(data, concat([data0, data1], "dim1", compat="equals"))
 
-        with raises_regex(ValueError, "encountered unexpected"):
+        with raises_regex(ValueError, "present in some datasets"):
             data0, data1 = deepcopy(split_data)
             data1["foo"] = ("bar", np.random.randn(10))
             concat([data0, data1], "dim1")
@@ -162,11 +200,6 @@ def test_concat_errors(self):
         with raises_regex(ValueError, "coordinate in some datasets but not others"):
             concat([Dataset({"x": 0}), Dataset({}, {"x": 1})], dim="z")
 
-        with raises_regex(ValueError, "no longer a valid"):
-            concat([data, data], "new_dim", mode="different")
-        with raises_regex(ValueError, "no longer a valid"):
-            concat([data, data], "new_dim", concat_over="different")
-
     def test_concat_join_kwarg(self):
         ds1 = Dataset({"a": (("x", "y"), [[0]])}, coords={"x": [0], "y": [0]})
         ds2 = Dataset({"a": (("x", "y"), [[0]])}, coords={"x": [1], "y": [0.0001]})
diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py
index 36c1d845f8e..5d80abb4661 100644
--- a/xarray/tests/test_conventions.py
+++ b/xarray/tests/test_conventions.py
@@ -278,6 +278,26 @@ def test_decode_cf_with_dask(self):
         )
         assert_identical(decoded, conventions.decode_cf(original).compute())
 
+    @requires_dask
+    def test_decode_dask_times(self):
+        original = Dataset.from_dict(
+            {
+                "coords": {},
+                "dims": {"time": 5},
+                "data_vars": {
+                    "average_T1": {
+                        "dims": ("time",),
+                        "attrs": {"units": "days since 1958-01-01 00:00:00"},
+                        "data": [87659.0, 88024.0, 88389.0, 88754.0, 89119.0],
+                    }
+                },
+            }
+        )
+        assert_identical(
+            conventions.decode_cf(original.chunk()),
+            conventions.decode_cf(original).chunk(),
+        )
+
 
 class CFEncodedInMemoryStore(WritableCFDataStore, InMemoryDataStore):
     def encode_variable(self, var):
diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index e3fc6f65e0f..76b3ed1a8d6 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -27,14 +27,49 @@
 dd = pytest.importorskip("dask.dataframe")
 
 
+class CountingScheduler:
+    """ Simple dask scheduler counting the number of computes.
+
+    Reference: https://stackoverflow.com/questions/53289286/ """
+
+    def __init__(self, max_computes=0):
+        self.total_computes = 0
+        self.max_computes = max_computes
+
+    def __call__(self, dsk, keys, **kwargs):
+        self.total_computes += 1
+        if self.total_computes > self.max_computes:
+            raise RuntimeError(
+                "Too many computes. Total: %d > max: %d."
+                % (self.total_computes, self.max_computes)
+            )
+        return dask.get(dsk, keys, **kwargs)
+
+
+def _set_dask_scheduler(scheduler=dask.get):
+    """ Backwards compatible way of setting scheduler. """
+    if LooseVersion(dask.__version__) >= LooseVersion("0.18.0"):
+        return dask.config.set(scheduler=scheduler)
+    return dask.set_options(get=scheduler)
+
+
+def raise_if_dask_computes(max_computes=0):
+    scheduler = CountingScheduler(max_computes)
+    return _set_dask_scheduler(scheduler)
+
+
+def test_raise_if_dask_computes():
+    data = da.from_array(np.random.RandomState(0).randn(4, 6), chunks=(2, 2))
+    with raises_regex(RuntimeError, "Too many computes"):
+        with raise_if_dask_computes():
+            data.compute()
+
+
 class DaskTestCase:
     def assertLazyAnd(self, expected, actual, test):
-
-        with (
-            dask.config.set(scheduler="single-threaded")
-            if LooseVersion(dask.__version__) >= LooseVersion("0.18.0")
-            else dask.set_options(get=dask.get)
-        ):
+        with _set_dask_scheduler(dask.get):
+            # dask.get is the syncronous scheduler, which get's set also by
+            # dask.config.set(scheduler="syncronous") in current versions.
             test(actual, expected)
 
         if isinstance(actual, Dataset):
@@ -174,7 +209,12 @@ def test_reduce(self):
         v = self.lazy_var
         self.assertLazyAndAllClose(u.mean(), v.mean())
         self.assertLazyAndAllClose(u.std(), v.std())
-        self.assertLazyAndAllClose(u.argmax(dim="x"), v.argmax(dim="x"))
+        with raise_if_dask_computes():
+            actual = v.argmax(dim="x")
+        self.assertLazyAndAllClose(u.argmax(dim="x"), actual)
+        with raise_if_dask_computes():
+            actual = v.argmin(dim="x")
+        self.assertLazyAndAllClose(u.argmin(dim="x"), actual)
         self.assertLazyAndAllClose((u > 1).any(), (v > 1).any())
         self.assertLazyAndAllClose((u < 1).all("x"), (v < 1).all("x"))
         with raises_regex(NotImplementedError, "dask"):
@@ -785,7 +825,6 @@ def kernel(name):
     """Dask kernel to test pickling/unpickling and __repr__.
     Must be global to make it pickleable.
     """
-    print("kernel(%s)" % name)
     global kernel_call_count
     kernel_call_count += 1
     return np.ones(1, dtype=np.int64)
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index 506c437c2bf..9ba3eecc5a0 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -24,12 +24,12 @@
     assert_identical,
     raises_regex,
     requires_bottleneck,
-    requires_cftime,
     requires_dask,
     requires_iris,
     requires_np113,
     requires_numbagg,
     requires_scipy,
+    requires_sparse,
     source_ndarray,
 )
 
@@ -1002,63 +1002,53 @@ def test_isel_drop(self):
         selected = data.isel(x=0, drop=False)
         assert_identical(expected, selected)
 
-    @pytest.mark.filterwarnings("ignore:Dataset.isel_points")
-    def test_isel_points(self):
-        shape = (10, 5, 6)
-        np_array = np.random.random(shape)
-        da = DataArray(
-            np_array, dims=["time", "y", "x"], coords={"time": np.arange(0, 100, 10)}
-        )
-        y = [1, 3]
-        x = [3, 0]
-
-        expected = da.values[:, y, x]
-
-        actual = da.isel_points(y=y, x=x, dim="test_coord")
-        assert actual.coords["test_coord"].shape == (len(y),)
-        assert list(actual.coords) == ["time"]
-        assert actual.dims == ("test_coord", "time")
-
-        actual = da.isel_points(y=y, x=x)
-        assert "points" in actual.dims
-        # Note that because xarray always concatenates along the first
-        # dimension, We must transpose the result to match the numpy style of
-        # concatenation.
-        np.testing.assert_equal(actual.T, expected)
-
-        # a few corner cases
-        da.isel_points(time=[1, 2], x=[2, 2], y=[3, 4])
-        np.testing.assert_allclose(
-            da.isel_points(time=[1], x=[2], y=[4]).values.squeeze(),
-            np_array[1, 4, 2].squeeze(),
-        )
-        da.isel_points(time=[1, 2])
-        y = [-1, 0]
-        x = [-2, 2]
-        expected = da.values[:, y, x]
-        actual = da.isel_points(x=x, y=y).values
-        np.testing.assert_equal(actual.T, expected)
-
-        # test that the order of the indexers doesn't matter
-        assert_identical(da.isel_points(y=y, x=x), da.isel_points(x=x, y=y))
-
-        # make sure we're raising errors in the right places
-        with raises_regex(ValueError, "All indexers must be the same length"):
-            da.isel_points(y=[1, 2], x=[1, 2, 3])
-        with raises_regex(ValueError, "dimension bad_key does not exist"):
-            da.isel_points(bad_key=[1, 2])
-        with raises_regex(TypeError, "Indexers must be integers"):
-            da.isel_points(y=[1.5, 2.2])
-        with raises_regex(TypeError, "Indexers must be integers"):
-            da.isel_points(x=[1, 2, 3], y=slice(3))
-        with raises_regex(ValueError, "Indexers must be 1 dimensional"):
-            da.isel_points(y=1, x=2)
-        with raises_regex(ValueError, "Existing dimension names are not"):
-            da.isel_points(y=[1, 2], x=[1, 2], dim="x")
-
-        # using non string dims
-        actual = da.isel_points(y=[1, 2], x=[1, 2], dim=["A", "B"])
-        assert "points" in actual.coords
+    def test_head(self):
+        assert_equal(self.dv.isel(x=slice(5)), self.dv.head(x=5))
+        assert_equal(self.dv.isel(x=slice(0)), self.dv.head(x=0))
+        assert_equal(
+            self.dv.isel({dim: slice(6) for dim in self.dv.dims}), self.dv.head(6)
+        )
+        assert_equal(
+            self.dv.isel({dim: slice(5) for dim in self.dv.dims}), self.dv.head()
+        )
+        with raises_regex(TypeError, "either dict-like or a single int"):
+            self.dv.head([3])
+        with raises_regex(TypeError, "expected integer type"):
+            self.dv.head(x=3.1)
+        with raises_regex(ValueError, "expected positive int"):
+            self.dv.head(-3)
+
+    def test_tail(self):
+        assert_equal(self.dv.isel(x=slice(-5, None)), self.dv.tail(x=5))
+        assert_equal(self.dv.isel(x=slice(0)), self.dv.tail(x=0))
+        assert_equal(
+            self.dv.isel({dim: slice(-6, None) for dim in self.dv.dims}),
+            self.dv.tail(6),
+        )
+        assert_equal(
+            self.dv.isel({dim: slice(-5, None) for dim in self.dv.dims}), self.dv.tail()
+        )
+        with raises_regex(TypeError, "either dict-like or a single int"):
+            self.dv.tail([3])
+        with raises_regex(TypeError, "expected integer type"):
+            self.dv.tail(x=3.1)
+        with raises_regex(ValueError, "expected positive int"):
+            self.dv.tail(-3)
+
+    def test_thin(self):
+        assert_equal(self.dv.isel(x=slice(None, None, 5)), self.dv.thin(x=5))
+        assert_equal(
+            self.dv.isel({dim: slice(None, None, 6) for dim in self.dv.dims}),
+            self.dv.thin(6),
+        )
+        with raises_regex(TypeError, "either dict-like or a single int"):
+            self.dv.thin([3])
+        with raises_regex(TypeError, "expected integer type"):
+            self.dv.thin(x=3.1)
+        with raises_regex(ValueError, "expected positive int"):
+            self.dv.thin(-3)
+        with raises_regex(ValueError, "cannot be zero"):
+            self.dv.thin(time=0)
 
     def test_loc(self):
         self.ds["x"] = ("x", np.array(list("abcdefghij")))
@@ -1350,9 +1340,8 @@ def test_reset_coords(self):
         )
         assert_identical(actual, expected)
 
-        with pytest.warns(FutureWarning, match="The inplace argument"):
-            with raises_regex(ValueError, "cannot reset coord"):
-                data = data.reset_coords(inplace=True)
+        with pytest.raises(TypeError):
+            data = data.reset_coords(inplace=True)
         with raises_regex(ValueError, "cannot be found"):
             data.reset_coords("foo", drop=True)
         with raises_regex(ValueError, "cannot be found"):
@@ -1454,13 +1443,11 @@ def test_reindex_like_no_index(self):
         with raises_regex(ValueError, "different size for unlabeled"):
             foo.reindex_like(bar)
 
-    @pytest.mark.filterwarnings("ignore:Indexer has dimensions")
     def test_reindex_regressions(self):
-        # regression test for #279
-        expected = DataArray(np.random.randn(5), coords=[("time", range(5))])
+        da = DataArray(np.random.randn(5), coords=[("time", range(5))])
         time2 = DataArray(np.arange(5), dims="time2")
-        actual = expected.reindex(time=time2)
-        assert_identical(actual, expected)
+        with pytest.raises(ValueError):
+            da.reindex(time=time2)
 
         # regression test for #736, reindex can not change complex nums dtype
         x = np.array([1, 2, 3], dtype=np.complex)
@@ -1506,6 +1493,32 @@ def test_rename(self):
         renamed_kwargs = self.dv.x.rename(x="z").rename("z")
         assert_identical(renamed, renamed_kwargs)
 
+    def test_init_value(self):
+        expected = DataArray(
+            np.full((3, 4), 3), dims=["x", "y"], coords=[range(3), range(4)]
+        )
+        actual = DataArray(3, dims=["x", "y"], coords=[range(3), range(4)])
+        assert_identical(expected, actual)
+
+        expected = DataArray(
+            np.full((1, 10, 2), 0),
+            dims=["w", "x", "y"],
+            coords={"x": np.arange(10), "y": ["north", "south"]},
+        )
+        actual = DataArray(0, dims=expected.dims, coords=expected.coords)
+        assert_identical(expected, actual)
+
+        expected = DataArray(
+            np.full((10, 2), np.nan), coords=[("x", np.arange(10)), ("y", ["a", "b"])]
+        )
+        actual = DataArray(coords=[("x", np.arange(10)), ("y", ["a", "b"])])
+        assert_identical(expected, actual)
+
+        with raises_regex(ValueError, "different number of dim"):
+            DataArray(np.array(1), coords={"x": np.arange(10)}, dims=["x"])
+        with raises_regex(ValueError, "does not match the 0 dim"):
+            DataArray(np.array(1), coords=[("x", np.arange(10))])
+
     def test_swap_dims(self):
         array = DataArray(np.random.randn(3), {"y": ("x", list("abc"))}, "x")
         expected = DataArray(array.values, {"y": list("abc")}, dims="y")
@@ -1761,10 +1774,9 @@ def test_reorder_levels(self):
         obj = self.mda.reorder_levels(x=["level_2", "level_1"])
         assert_identical(obj, expected)
 
-        with pytest.warns(FutureWarning, match="The inplace argument"):
+        with pytest.raises(TypeError):
             array = self.mda.copy()
             array.reorder_levels(x=["level_2", "level_1"], inplace=True)
-            assert_identical(array, expected)
 
         array = DataArray([1, 2], dims="x")
         with pytest.raises(KeyError):
@@ -2321,17 +2333,17 @@ def test_reduce_out(self):
         with pytest.raises(TypeError):
             orig.mean(out=np.ones(orig.shape))
 
-    # skip due to bug in older versions of numpy.nanpercentile
     def test_quantile(self):
         for q in [0.25, [0.50], [0.25, 0.75]]:
             for axis, dim in zip(
                 [None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]
             ):
-                actual = self.dv.quantile(q, dim=dim)
+                actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True)
                 expected = np.nanpercentile(
                     self.dv.values, np.array(q) * 100, axis=axis
                 )
                 np.testing.assert_allclose(actual.values, expected)
+                assert actual.attrs == self.attrs
 
     def test_reduce_keep_attrs(self):
         # Test dropped attrs
@@ -2487,16 +2499,6 @@ def test_groupby_sum(self):
         assert_allclose(expected_sum_axis1, grouped.reduce(np.sum, "y"))
         assert_allclose(expected_sum_axis1, grouped.sum("y"))
 
-    def test_groupby_warning(self):
-        array = self.make_groupby_example_array()
-        grouped = array.groupby("y")
-        with pytest.warns(FutureWarning):
-            grouped.sum()
-
-    @pytest.mark.skipif(
-        LooseVersion(xr.__version__) < LooseVersion("0.13"),
-        reason="not to forget the behavior change",
-    )
     def test_groupby_sum_default(self):
         array = self.make_groupby_example_array()
         grouped = array.groupby("abc")
@@ -2517,7 +2519,7 @@ def test_groupby_sum_default(self):
             }
         )["foo"]
 
-        assert_allclose(expected_sum_all, grouped.sum())
+        assert_allclose(expected_sum_all, grouped.sum(dim="y"))
 
     def test_groupby_count(self):
         array = DataArray(
@@ -3433,6 +3435,19 @@ def test_to_and_from_series(self):
         expected_da = self.dv.rename(None)
         assert_identical(expected_da, DataArray.from_series(actual).drop(["x", "y"]))
 
+    @requires_sparse
+    def test_from_series_sparse(self):
+        import sparse
+
+        series = pd.Series([1, 2], index=[("a", 1), ("b", 2)])
+
+        actual_sparse = DataArray.from_series(series, sparse=True)
+        actual_dense = DataArray.from_series(series, sparse=False)
+
+        assert isinstance(actual_sparse.data, sparse.COO)
+        actual_sparse.data = actual_sparse.data.todense()
+        assert_identical(actual_sparse, actual_dense)
+
     def test_to_and_from_empty_series(self):
         # GH697
         expected = pd.Series([])
@@ -3693,10 +3708,8 @@ def test_to_dataset_whole(self):
         expected = Dataset({"foo": ("x", [1, 2])})
         assert_identical(expected, actual)
 
-        expected = Dataset({"bar": ("x", [1, 2])})
-        with pytest.warns(FutureWarning):
+        with pytest.raises(TypeError):
             actual = named.to_dataset("bar")
-        assert_identical(expected, actual)
 
     def test_to_dataset_split(self):
         array = DataArray([1, 2, 3], coords=[("x", list("abc"))], attrs={"a": 1})
@@ -4637,3 +4650,36 @@ def test_rolling_exp(da, dim, window_type, window):
     )
 
     assert_allclose(expected.variable, result.variable)
+
+
+def test_no_dict():
+    d = DataArray()
+    with pytest.raises(AttributeError):
+        d.__dict__
+
+
+@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
+def test_subclass_slots():
+    """Test that DataArray subclasses must explicitly define ``__slots__``.
+
+    .. note::
+       As of 0.13.0, this is actually mitigated into a FutureWarning for any class
+       defined outside of the xarray package.
+    """
+    with pytest.raises(AttributeError) as e:
+
+        class MyArray(DataArray):
+            pass
+
+    assert str(e.value) == "MyArray must explicitly define __slots__"
+
+
+def test_weakref():
+    """Classes with __slots__ are incompatible with the weakref module unless they
+    explicitly state __weakref__ among their slots
+    """
+    from weakref import ref
+
+    a = DataArray(1)
+    r = ref(a)
+    assert r() is a
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 55358e47e41..f02990a1be9 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -31,8 +31,8 @@
 from xarray.core.pycompat import integer_types
 
 from . import (
-    LooseVersion,
     InaccessibleArray,
+    LooseVersion,
     UnexpectedDataAccess,
     assert_allclose,
     assert_array_equal,
@@ -46,6 +46,7 @@
     requires_dask,
     requires_numbagg,
     requires_scipy,
+    requires_sparse,
     source_ndarray,
 )
 
@@ -1410,115 +1411,77 @@ def test_isel_drop(self):
         selected = data.isel(x=0, drop=False)
         assert_identical(expected, selected)
 
-    @pytest.mark.filterwarnings("ignore:Dataset.isel_points")
-    def test_isel_points(self):
+    def test_head(self):
         data = create_test_data()
 
-        pdim1 = [1, 2, 3]
-        pdim2 = [4, 5, 1]
-        pdim3 = [1, 2, 3]
-        actual = data.isel_points(dim1=pdim1, dim2=pdim2, dim3=pdim3, dim="test_coord")
-        assert "test_coord" in actual.dims
-        assert actual.coords["test_coord"].shape == (len(pdim1),)
-
-        actual = data.isel_points(dim1=pdim1, dim2=pdim2)
-        assert "points" in actual.dims
-        assert "dim3" in actual.dims
-        assert "dim3" not in actual.data_vars
-        np.testing.assert_array_equal(data["dim2"][pdim2], actual["dim2"])
+        expected = data.isel(time=slice(5), dim2=slice(6))
+        actual = data.head(time=5, dim2=6)
+        assert_equal(expected, actual)
 
-        # test that the order of the indexers doesn't matter
-        assert_identical(
-            data.isel_points(dim1=pdim1, dim2=pdim2),
-            data.isel_points(dim2=pdim2, dim1=pdim1),
-        )
+        expected = data.isel(time=slice(0))
+        actual = data.head(time=0)
+        assert_equal(expected, actual)
 
-        # make sure we're raising errors in the right places
-        with raises_regex(ValueError, "All indexers must be the same length"):
-            data.isel_points(dim1=[1, 2], dim2=[1, 2, 3])
-        with raises_regex(ValueError, "dimension bad_key does not exist"):
-            data.isel_points(bad_key=[1, 2])
-        with raises_regex(TypeError, "Indexers must be integers"):
-            data.isel_points(dim1=[1.5, 2.2])
-        with raises_regex(TypeError, "Indexers must be integers"):
-            data.isel_points(dim1=[1, 2, 3], dim2=slice(3))
-        with raises_regex(ValueError, "Indexers must be 1 dimensional"):
-            data.isel_points(dim1=1, dim2=2)
-        with raises_regex(ValueError, "Existing dimension names are not valid"):
-            data.isel_points(dim1=[1, 2], dim2=[1, 2], dim="dim2")
+        expected = data.isel({dim: slice(6) for dim in data.dims})
+        actual = data.head(6)
+        assert_equal(expected, actual)
 
-        # test to be sure we keep around variables that were not indexed
-        ds = Dataset({"x": [1, 2, 3, 4], "y": 0})
-        actual = ds.isel_points(x=[0, 1, 2])
-        assert_identical(ds["y"], actual["y"])
+        expected = data.isel({dim: slice(5) for dim in data.dims})
+        actual = data.head()
+        assert_equal(expected, actual)
 
-        # tests using index or DataArray as a dim
-        stations = Dataset()
-        stations["station"] = ("station", ["A", "B", "C"])
-        stations["dim1s"] = ("station", [1, 2, 3])
-        stations["dim2s"] = ("station", [4, 5, 1])
+        with raises_regex(TypeError, "either dict-like or a single int"):
+            data.head([3])
+        with raises_regex(TypeError, "expected integer type"):
+            data.head(dim2=3.1)
+        with raises_regex(ValueError, "expected positive int"):
+            data.head(time=-3)
 
-        actual = data.isel_points(
-            dim1=stations["dim1s"], dim2=stations["dim2s"], dim=stations["station"]
-        )
-        assert "station" in actual.coords
-        assert "station" in actual.dims
-        assert_identical(actual["station"].drop(["dim2"]), stations["station"])
+    def test_tail(self):
+        data = create_test_data()
 
-        # make sure we get the default 'points' coordinate when passed a list
-        actual = data.isel_points(
-            dim1=stations["dim1s"], dim2=stations["dim2s"], dim=["A", "B", "C"]
-        )
-        assert "points" in actual.coords
-        assert actual.coords["points"].values.tolist() == ["A", "B", "C"]
+        expected = data.isel(time=slice(-5, None), dim2=slice(-6, None))
+        actual = data.tail(time=5, dim2=6)
+        assert_equal(expected, actual)
 
-        # test index
-        actual = data.isel_points(
-            dim1=stations["dim1s"].values,
-            dim2=stations["dim2s"].values,
-            dim=pd.Index(["A", "B", "C"], name="letters"),
-        )
-        assert "letters" in actual.coords
+        expected = data.isel(dim1=slice(0))
+        actual = data.tail(dim1=0)
+        assert_equal(expected, actual)
 
-        # can pass a numpy array
-        data.isel_points(
-            dim1=stations["dim1s"], dim2=stations["dim2s"], dim=np.array([4, 5, 6])
-        )
+        expected = data.isel({dim: slice(-6, None) for dim in data.dims})
+        actual = data.tail(6)
+        assert_equal(expected, actual)
 
-    @pytest.mark.filterwarnings("ignore:Dataset.sel_points")
-    @pytest.mark.filterwarnings("ignore:Dataset.isel_points")
-    def test_sel_points(self):
-        data = create_test_data()
+        expected = data.isel({dim: slice(-5, None) for dim in data.dims})
+        actual = data.tail()
+        assert_equal(expected, actual)
 
-        # add in a range() index
-        data["dim1"] = data.dim1
+        with raises_regex(TypeError, "either dict-like or a single int"):
+            data.tail([3])
+        with raises_regex(TypeError, "expected integer type"):
+            data.tail(dim2=3.1)
+        with raises_regex(ValueError, "expected positive int"):
+            data.tail(time=-3)
 
-        pdim1 = [1, 2, 3]
-        pdim2 = [4, 5, 1]
-        pdim3 = [1, 2, 3]
-        expected = data.isel_points(
-            dim1=pdim1, dim2=pdim2, dim3=pdim3, dim="test_coord"
-        )
-        actual = data.sel_points(
-            dim1=data.dim1[pdim1],
-            dim2=data.dim2[pdim2],
-            dim3=data.dim3[pdim3],
-            dim="test_coord",
-        )
-        assert_identical(expected, actual)
+    def test_thin(self):
+        data = create_test_data()
 
-        data = Dataset({"foo": (("x", "y"), np.arange(9).reshape(3, 3))})
-        expected = Dataset({"foo": ("points", [0, 4, 8])})
-        actual = data.sel_points(x=[0, 1, 2], y=[0, 1, 2])
-        assert_identical(expected, actual)
+        expected = data.isel(time=slice(None, None, 5), dim2=slice(None, None, 6))
+        actual = data.thin(time=5, dim2=6)
+        assert_equal(expected, actual)
 
-        data.coords.update({"x": [0, 1, 2], "y": [0, 1, 2]})
-        expected.coords.update({"x": ("points", [0, 1, 2]), "y": ("points", [0, 1, 2])})
-        actual = data.sel_points(x=[0.1, 1.1, 2.5], y=[0, 1.2, 2.0], method="pad")
-        assert_identical(expected, actual)
+        expected = data.isel({dim: slice(None, None, 6) for dim in data.dims})
+        actual = data.thin(6)
+        assert_equal(expected, actual)
 
-        with pytest.raises(KeyError):
-            data.sel_points(x=[2.5], y=[2.0], method="pad", tolerance=1e-3)
+        with raises_regex(TypeError, "either dict-like or a single int"):
+            data.thin([3])
+        with raises_regex(TypeError, "expected integer type"):
+            data.thin(dim2=3.1)
+        with raises_regex(ValueError, "cannot be zero"):
+            data.thin(time=0)
+        with raises_regex(ValueError, "expected positive int"):
+            data.thin(time=-3)
 
     @pytest.mark.filterwarnings("ignore::DeprecationWarning")
     def test_sel_fancy(self):
@@ -1766,9 +1729,8 @@ def test_reindex(self):
         # regression test for #279
         expected = Dataset({"x": ("time", np.random.randn(5))}, {"time": range(5)})
         time2 = DataArray(np.arange(5), dims="time2")
-        with pytest.warns(FutureWarning):
+        with pytest.raises(ValueError):
             actual = expected.reindex(time=time2)
-        assert_identical(actual, expected)
 
         # another regression test
         ds = Dataset(
@@ -1784,11 +1746,10 @@ def test_reindex(self):
     def test_reindex_warning(self):
         data = create_test_data()
 
-        with pytest.warns(FutureWarning) as ws:
+        with pytest.raises(ValueError):
             # DataArray with different dimension raises Future warning
             ind = xr.DataArray([0.0, 1.0], dims=["new_dim"], name="ind")
             data.reindex(dim2=ind)
-            assert any(["Indexer has dimensions " in str(w.message) for w in ws])
 
         # Should not warn
         ind = xr.DataArray([0.0, 1.0], dims=["dim2"], name="ind")
@@ -2214,7 +2175,7 @@ def test_drop_labels_by_keyword(self):
         # Basic functionality.
         assert len(data.coords["x"]) == 2
 
-        # This API is allowed but deprecated.
+        # In the future, this will break.
         with pytest.warns(DeprecationWarning):
             ds1 = data.drop(["a"], dim="x")
         ds2 = data.drop(x="a")
@@ -2222,6 +2183,13 @@ def test_drop_labels_by_keyword(self):
         ds4 = data.drop(x=["a", "b"])
         ds5 = data.drop(x=["a", "b"], y=range(0, 6, 2))
 
+        # In the future, this will result in different behavior.
+        arr = DataArray(range(3), dims=["c"])
+        with pytest.warns(FutureWarning):
+            data.drop(arr.coords)
+        with pytest.warns(FutureWarning):
+            data.drop(arr.indexes)
+
         assert_array_equal(ds1.coords["x"], ["b"])
         assert_array_equal(ds2.coords["x"], ["b"])
         assert_array_equal(ds3.coords["x"], ["b"])
@@ -2428,18 +2396,11 @@ def test_rename_same_name(self):
         renamed = data.rename(newnames)
         assert_identical(renamed, data)
 
-    @pytest.mark.filterwarnings("ignore:The inplace argument")
     def test_rename_inplace(self):
         times = pd.date_range("2000-01-01", periods=3)
         data = Dataset({"z": ("x", [2, 3, 4]), "t": ("t", times)})
-        copied = data.copy()
-        renamed = data.rename({"x": "y"})
-        data.rename({"x": "y"}, inplace=True)
-        assert_identical(data, renamed)
-        assert not data.equals(copied)
-        assert data.dims == {"y": 3, "t": 3}
-        # check virtual variables
-        assert_array_equal(data["t.dayofyear"], [1, 2, 3])
+        with pytest.raises(TypeError):
+            data.rename({"x": "y"}, inplace=True)
 
     def test_rename_dims(self):
         original = Dataset({"x": ("x", [0, 1, 2]), "y": ("x", [10, 11, 12]), "z": 42})
@@ -2702,7 +2663,7 @@ def test_set_index(self):
         obj = ds.set_index(x=mindex.names)
         assert_identical(obj, expected)
 
-        with pytest.warns(FutureWarning, match="The inplace argument"):
+        with pytest.raises(TypeError):
             ds.set_index(x=mindex.names, inplace=True)
             assert_identical(ds, expected)
 
@@ -2727,9 +2688,8 @@ def test_reset_index(self):
         obj = ds.reset_index("x")
         assert_identical(obj, expected)
 
-        with pytest.warns(FutureWarning, match="The inplace argument"):
+        with pytest.raises(TypeError):
             ds.reset_index("x", inplace=True)
-            assert_identical(ds, expected)
 
     def test_reorder_levels(self):
         ds = create_test_multiindex()
@@ -2740,9 +2700,8 @@ def test_reorder_levels(self):
         reindexed = ds.reorder_levels(x=["level_2", "level_1"])
         assert_identical(reindexed, expected)
 
-        with pytest.warns(FutureWarning, match="The inplace argument"):
+        with pytest.raises(TypeError):
             ds.reorder_levels(x=["level_2", "level_1"], inplace=True)
-            assert_identical(ds, expected)
 
         ds = Dataset({}, coords={"x": [1, 2]})
         with raises_regex(ValueError, "has no MultiIndex"):
@@ -2882,11 +2841,8 @@ def test_update(self):
         assert actual_result is actual
         assert_identical(expected, actual)
 
-        with pytest.warns(FutureWarning, match="The inplace argument"):
+        with pytest.raises(TypeError):
             actual = data.update(data, inplace=False)
-            expected = data
-            assert actual is not expected
-            assert_identical(expected, actual)
 
         other = Dataset(attrs={"new": "attr"})
         actual = data.copy()
@@ -3411,18 +3367,6 @@ def test_groupby_reduce(self):
         actual = data.groupby("letters").mean(ALL_DIMS)
         assert_allclose(expected, actual)
 
-    def test_groupby_warn(self):
-        data = Dataset(
-            {
-                "xy": (["x", "y"], np.random.randn(3, 4)),
-                "xonly": ("x", np.random.randn(3)),
-                "yonly": ("y", np.random.randn(4)),
-                "letters": ("y", ["a", "a", "b", "b"]),
-            }
-        )
-        with pytest.warns(FutureWarning):
-            data.groupby("x").mean()
-
     def test_groupby_math(self):
         def reorder_dims(x):
             return x.transpose("dim1", "dim2", "dim3", "time")
@@ -3768,6 +3712,28 @@ def test_to_and_from_dataframe(self):
         expected = pd.DataFrame([[]], index=idx)
         assert expected.equals(actual), (expected, actual)
 
+    @requires_sparse
+    def test_from_dataframe_sparse(self):
+        import sparse
+
+        df_base = pd.DataFrame(
+            {"x": range(10), "y": list("abcdefghij"), "z": np.arange(0, 100, 10)}
+        )
+
+        ds_sparse = Dataset.from_dataframe(df_base.set_index("x"), sparse=True)
+        ds_dense = Dataset.from_dataframe(df_base.set_index("x"), sparse=False)
+        assert isinstance(ds_sparse["y"].data, sparse.COO)
+        assert isinstance(ds_sparse["z"].data, sparse.COO)
+        ds_sparse["y"].data = ds_sparse["y"].data.todense()
+        ds_sparse["z"].data = ds_sparse["z"].data.todense()
+        assert_identical(ds_dense, ds_sparse)
+
+        ds_sparse = Dataset.from_dataframe(df_base.set_index(["x", "y"]), sparse=True)
+        ds_dense = Dataset.from_dataframe(df_base.set_index(["x", "y"]), sparse=False)
+        assert isinstance(ds_sparse["z"].data, sparse.COO)
+        ds_sparse["z"].data = ds_sparse["z"].data.todense()
+        assert_identical(ds_dense, ds_sparse)
+
     def test_to_and_from_empty_dataframe(self):
         # GH697
         expected = pd.DataFrame({"foo": []})
@@ -4920,7 +4886,7 @@ def test_filter_by_attrs(self):
                 "temperature_10": (["t"], [0], temp10),
                 "precipitation": (["t"], [0], precip),
             },
-            coords={"time": (["t"], [0], dict(axis="T"))},
+            coords={"time": (["t"], [0], dict(axis="T", long_name="time_in_seconds"))},
         )
 
         # Test return empty Dataset.
@@ -4934,6 +4900,11 @@ def test_filter_by_attrs(self):
 
         assert_equal(new_ds["precipitation"], ds["precipitation"])
 
+        # Test filter coordinates
+        new_ds = ds.filter_by_attrs(long_name="time_in_seconds")
+        assert new_ds["time"].long_name == "time_in_seconds"
+        assert not bool(new_ds.data_vars)
+
         # Test return more than one DataArray.
         new_ds = ds.filter_by_attrs(standard_name="air_potential_temperature")
         assert len(new_ds.data_vars) == 2
@@ -5793,3 +5764,36 @@ def test_trapz_datetime(dask, which_datetime):
 
     actual2 = da.integrate("time", datetime_unit="h")
     assert_allclose(actual, actual2 / 24.0)
+
+
+def test_no_dict():
+    d = Dataset()
+    with pytest.raises(AttributeError):
+        d.__dict__
+
+
+@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
+def test_subclass_slots():
+    """Test that Dataset subclasses must explicitly define ``__slots__``.
+
+    .. note::
+       As of 0.13.0, this is actually mitigated into a FutureWarning for any class
+       defined outside of the xarray package.
+    """
+    with pytest.raises(AttributeError) as e:
+
+        class MyDS(Dataset):
+            pass
+
+    assert str(e.value) == "MyDS must explicitly define __slots__"
+
+
+def test_weakref():
+    """Classes with __slots__ are incompatible with the weakref module unless they
+    explicitly state __weakref__ among their slots
+    """
+    from weakref import ref
+
+    ds = Dataset()
+    r = ref(ds)
+    assert r() is ds
diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
index ec63c9651eb..766a391b57f 100644
--- a/xarray/tests/test_duck_array_ops.py
+++ b/xarray/tests/test_duck_array_ops.py
@@ -25,13 +25,13 @@
 from xarray.testing import assert_allclose, assert_equal
 
 from . import (
+    arm_xfail,
     assert_array_equal,
     has_dask,
     has_np113,
     raises_regex,
     requires_cftime,
     requires_dask,
-    arm_xfail,
 )
 
 
@@ -245,9 +245,9 @@ def construct_dataarray(dim_num, dtype, contains_nan, dask):
 
 
 def from_series_or_scalar(se):
-    try:
+    if isinstance(se, pd.Series):
         return DataArray.from_series(se)
-    except AttributeError:  # scalar case
+    else:  # scalar case
         return DataArray(se)
 
 
diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py
index 56fba20ffc0..c518f528537 100644
--- a/xarray/tests/test_formatting.py
+++ b/xarray/tests/test_formatting.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
-from textwrap import dedent
 import sys
+from textwrap import dedent
 
 import numpy as np
 import pandas as pd
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index 9127eb71cb7..ee17cc39064 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -134,21 +134,21 @@ def test_da_groupby_quantile():
         [("x", [1, 1, 1, 2, 2]), ("y", [0, 0, 1])],
     )
 
-    actual_x = array.groupby("x").quantile(0)
+    actual_x = array.groupby("x").quantile(0, dim=xr.ALL_DIMS)
     expected_x = xr.DataArray([1, 4], [("x", [1, 2])])
     assert_identical(expected_x, actual_x)
 
-    actual_y = array.groupby("y").quantile(0)
+    actual_y = array.groupby("y").quantile(0, dim=xr.ALL_DIMS)
     expected_y = xr.DataArray([1, 22], [("y", [0, 1])])
     assert_identical(expected_y, actual_y)
 
-    actual_xx = array.groupby("x").quantile(0, dim="x")
+    actual_xx = array.groupby("x").quantile(0)
     expected_xx = xr.DataArray(
         [[1, 11, 22], [4, 15, 24]], [("x", [1, 2]), ("y", [0, 0, 1])]
     )
     assert_identical(expected_xx, actual_xx)
 
-    actual_yy = array.groupby("y").quantile(0, dim="y")
+    actual_yy = array.groupby("y").quantile(0)
     expected_yy = xr.DataArray(
         [[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]],
         [("x", [1, 1, 1, 2, 2]), ("y", [0, 1])],
@@ -164,7 +164,7 @@ def test_da_groupby_quantile():
     )
     g = foo.groupby(foo.time.dt.month)
 
-    actual = g.quantile(0)
+    actual = g.quantile(0, dim=xr.ALL_DIMS)
     expected = xr.DataArray(
         [
             0.0,
diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py
index f37f8d98ca8..82ee9b63f9d 100644
--- a/xarray/tests/test_indexing.py
+++ b/xarray/tests/test_indexing.py
@@ -708,7 +708,9 @@ def test_create_mask_dask():
 
     indexer = indexing.OuterIndexer((1, slice(2), np.array([0, -1, 2])))
     expected = np.array(2 * [[False, True, False]])
-    actual = indexing.create_mask(indexer, (5, 5, 5), chunks_hint=((1, 1), (2, 1)))
+    actual = indexing.create_mask(
+        indexer, (5, 5, 5), da.empty((2, 3), chunks=((1, 1), (2, 1)))
+    )
     assert actual.chunks == ((1, 1), (2, 1))
     np.testing.assert_array_equal(expected, actual)
 
@@ -716,12 +718,14 @@ def test_create_mask_dask():
         (np.array([0, -1, 2]), slice(None), np.array([0, 1, -1]))
     )
     expected = np.array([[False, True, True]] * 2).T
-    actual = indexing.create_mask(indexer, (5, 2), chunks_hint=((3,), (2,)))
+    actual = indexing.create_mask(
+        indexer, (5, 2), da.empty((3, 2), chunks=((3,), (2,)))
+    )
     assert isinstance(actual, da.Array)
     np.testing.assert_array_equal(expected, actual)
 
     with pytest.raises(ValueError):
-        indexing.create_mask(indexer, (5, 2), chunks_hint=())
+        indexing.create_mask(indexer, (5, 2), da.empty((5,), chunks=(1,)))
 
 
 def test_create_mask_error():
diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py
index ed1453ce95d..c1e6c7a5ce8 100644
--- a/xarray/tests/test_merge.py
+++ b/xarray/tests/test_merge.py
@@ -196,6 +196,8 @@ def test_merge_compat(self):
         with raises_regex(ValueError, "compat=.* invalid"):
             ds1.merge(ds2, compat="foobar")
 
+        assert ds1.identical(ds1.merge(ds2, compat="override"))
+
     def test_merge_auto_align(self):
         ds1 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]})
         ds2 = xr.Dataset({"b": ("x", [3, 4]), "x": [1, 2]})
diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
index 36e7a38151d..020a49b0114 100644
--- a/xarray/tests/test_plot.py
+++ b/xarray/tests/test_plot.py
@@ -8,7 +8,6 @@
 import xarray as xr
 import xarray.plot as xplt
 from xarray import DataArray, Dataset
-from xarray.coding.times import _import_cftime
 from xarray.plot.dataset_plot import _infer_meta_data
 from xarray.plot.plot import _infer_interval_breaks
 from xarray.plot.utils import (
@@ -1284,33 +1283,45 @@ class TestContour(Common2dMixin, PlotTestCase):
 
     plotfunc = staticmethod(xplt.contour)
 
+    # matplotlib cmap.colors gives an rgbA ndarray
+    # when seaborn is used, instead we get an rgb tuple
+    @staticmethod
+    def _color_as_tuple(c):
+        return tuple(c[:3])
+
     def test_colors(self):
-        # matplotlib cmap.colors gives an rgbA ndarray
-        # when seaborn is used, instead we get an rgb tuple
-        def _color_as_tuple(c):
-            return tuple(c[:3])
 
         # with single color, we don't want rgb array
         artist = self.plotmethod(colors="k")
         assert artist.cmap.colors[0] == "k"
 
         artist = self.plotmethod(colors=["k", "b"])
-        assert _color_as_tuple(artist.cmap.colors[1]) == (0.0, 0.0, 1.0)
+        assert self._color_as_tuple(artist.cmap.colors[1]) == (0.0, 0.0, 1.0)
 
         artist = self.darray.plot.contour(
             levels=[-0.5, 0.0, 0.5, 1.0], colors=["k", "r", "w", "b"]
         )
-        assert _color_as_tuple(artist.cmap.colors[1]) == (1.0, 0.0, 0.0)
-        assert _color_as_tuple(artist.cmap.colors[2]) == (1.0, 1.0, 1.0)
+        assert self._color_as_tuple(artist.cmap.colors[1]) == (1.0, 0.0, 0.0)
+        assert self._color_as_tuple(artist.cmap.colors[2]) == (1.0, 1.0, 1.0)
+        # the last color is now under "over"
+        assert self._color_as_tuple(artist.cmap._rgba_over) == (0.0, 0.0, 1.0)
+
+    def test_colors_np_levels(self):
+
+        # https://github.com/pydata/xarray/issues/3284
+        levels = np.array([-0.5, 0.0, 0.5, 1.0])
+        artist = self.darray.plot.contour(levels=levels, colors=["k", "r", "w", "b"])
+        assert self._color_as_tuple(artist.cmap.colors[1]) == (1.0, 0.0, 0.0)
+        assert self._color_as_tuple(artist.cmap.colors[2]) == (1.0, 1.0, 1.0)
         # the last color is now under "over"
-        assert _color_as_tuple(artist.cmap._rgba_over) == (0.0, 0.0, 1.0)
+        assert self._color_as_tuple(artist.cmap._rgba_over) == (0.0, 0.0, 1.0)
 
     def test_cmap_and_color_both(self):
         with pytest.raises(ValueError):
             self.plotmethod(colors="k", cmap="RdBu")
 
-    def list_of_colors_in_cmap_deprecated(self):
-        with pytest.raises(Exception):
+    def list_of_colors_in_cmap_raises_error(self):
+        with raises_regex(ValueError, "list of colors"):
             self.plotmethod(cmap=["k", "b"])
 
     @pytest.mark.slow
diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py
index 74805b225fa..80f80a93a1c 100644
--- a/xarray/tests/test_sparse.py
+++ b/xarray/tests/test_sparse.py
@@ -1,16 +1,17 @@
-from textwrap import dedent
 import pickle
+from textwrap import dedent
+
 import numpy as np
 import pandas as pd
+import pytest
 
-from xarray import DataArray, Variable
-from xarray.core.npcompat import IS_NEP18_ACTIVE
 import xarray as xr
 import xarray.ufuncs as xu
+from xarray import DataArray, Variable
+from xarray.core.npcompat import IS_NEP18_ACTIVE
+from xarray.core.pycompat import sparse_array_type
 
-from . import assert_equal, assert_identical, LooseVersion
-
-import pytest
+from . import assert_equal, assert_identical
 
 param = pytest.param
 xfail = pytest.mark.xfail
@@ -21,8 +22,12 @@
     )
 
 sparse = pytest.importorskip("sparse")
-from sparse.utils import assert_eq as assert_sparse_eq  # noqa
-from sparse import COO, SparseArray  # noqa
+
+
+def assert_sparse_equal(a, b):
+    assert isinstance(a, sparse_array_type)
+    assert isinstance(b, sparse_array_type)
+    np.testing.assert_equal(a.todense(), b.todense())
 
 
 def make_ndarray(shape):
@@ -107,21 +112,9 @@ def test_variable_property(prop):
         (do("to_base_variable"), True),
         (do("transpose"), True),
         (do("unstack", dimensions={"x": {"x1": 5, "x2": 2}}), True),
-        param(
-            do("broadcast_equals", make_xrvar({"x": 10, "y": 5})),
-            False,
-            marks=xfail(reason="https://github.com/pydata/sparse/issues/270"),
-        ),
-        param(
-            do("equals", make_xrvar({"x": 10, "y": 5})),
-            False,
-            marks=xfail(reason="https://github.com/pydata/sparse/issues/270"),
-        ),
-        param(
-            do("identical", make_xrvar({"x": 10, "y": 5})),
-            False,
-            marks=xfail(reason="https://github.com/pydata/sparse/issues/270"),
-        ),
+        (do("broadcast_equals", make_xrvar({"x": 10, "y": 5})), False),
+        (do("equals", make_xrvar({"x": 10, "y": 5})), False),
+        (do("identical", make_xrvar({"x": 10, "y": 5})), False),
         param(
             do("argmax"),
             True,
@@ -163,21 +156,19 @@ def test_variable_property(prop):
             True,
             marks=xfail(reason="Missing implementation for np.nancumsum"),
         ),
-        param(
-            do("fillna", 0),
-            True,
-            marks=xfail(reason="Missing implementation for np.result_type"),
-        ),
+        (do("fillna", 0), True),
         param(
             do("item", (1, 1)),
             False,
             marks=xfail(reason="'COO' object has no attribute 'item'"),
         ),
-        param(do("max"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
         param(
-            do("median"), False, marks=xfail(reason="Coercion to dense via bottleneck")
+            do("median"),
+            False,
+            marks=xfail(reason="Missing implementation for np.nanmedian"),
         ),
-        param(do("min"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
+        param(do("max"), False),
+        param(do("min"), False),
         param(
             do("no_conflicts", other=make_xrvar({"x": 10, "y": 5})),
             True,
@@ -188,11 +179,7 @@ def test_variable_property(prop):
             True,  # noqa
             marks=xfail(reason="Missing implementation for np.pad"),
         ),
-        param(
-            do("prod"),
-            False,
-            marks=xfail(reason="Missing implementation for np.result_type"),
-        ),
+        (do("prod"), False),
         param(
             do("quantile", q=0.5),
             True,
@@ -201,7 +188,7 @@ def test_variable_property(prop):
         param(
             do("rank", dim="x"),
             False,
-            marks=xfail(reason="Coercion to dense via bottleneck"),
+            marks=xfail(reason="Only implemented for NumPy arrays (via bottleneck)"),
         ),
         param(
             do("reduce", func=np.sum, dim="x"),
@@ -216,19 +203,15 @@ def test_variable_property(prop):
         param(
             do("shift", x=2), True, marks=xfail(reason="mixed sparse-dense operation")
         ),
-        param(do("std"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
         param(
-            do("sum"),
-            False,
-            marks=xfail(reason="Missing implementation for np.result_type"),
+            do("std"), False, marks=xfail(reason="Missing implementation for np.nanstd")
         ),
-        param(do("var"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
-        param(do("to_dict"), False, marks=xfail(reason="Coercion to dense")),
+        (do("sum"), False),
         param(
-            do("where", cond=make_xrvar({"x": 10, "y": 5}) > 0.5),
-            True,
-            marks=xfail(reason="Coercion of dense to sparse when using sparse mask"),
-        ),  # noqa
+            do("var"), False, marks=xfail(reason="Missing implementation for np.nanvar")
+        ),
+        param(do("to_dict"), False, marks=xfail(reason="Coercion to dense")),
+        (do("where", cond=make_xrvar({"x": 10, "y": 5}) > 0.5), True),
     ],
     ids=repr,
 )
@@ -239,7 +222,7 @@ def test_variable_method(func, sparse_output):
     ret_d = func(var_d)
 
     if sparse_output:
-        assert isinstance(ret_s.data, SparseArray)
+        assert isinstance(ret_s.data, sparse.SparseArray)
         assert np.allclose(ret_s.data.todense(), ret_d.data, equal_nan=True)
     else:
         assert np.allclose(ret_s, ret_d, equal_nan=True)
@@ -265,7 +248,7 @@ def test_1d_variable_method(func, sparse_output):
     ret_d = func(var_d)
 
     if sparse_output:
-        assert isinstance(ret_s.data, SparseArray)
+        assert isinstance(ret_s.data, sparse.SparseArray)
         assert np.allclose(ret_s.data.todense(), ret_d.data)
     else:
         assert np.allclose(ret_s, ret_d)
@@ -278,16 +261,18 @@ def setUp(self):
         self.var = xr.Variable(("x", "y"), self.data)
 
     def test_unary_op(self):
-        assert_sparse_eq(-self.var.data, -self.data)
-        assert_sparse_eq(abs(self.var).data, abs(self.data))
-        assert_sparse_eq(self.var.round().data, self.data.round())
+        assert_sparse_equal(-self.var.data, -self.data)
+        assert_sparse_equal(abs(self.var).data, abs(self.data))
+        assert_sparse_equal(self.var.round().data, self.data.round())
 
+    @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
     def test_univariate_ufunc(self):
-        assert_sparse_eq(np.sin(self.data), xu.sin(self.var).data)
+        assert_sparse_equal(np.sin(self.data), xu.sin(self.var).data)
 
+    @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
     def test_bivariate_ufunc(self):
-        assert_sparse_eq(np.maximum(self.data, 0), xu.maximum(self.var, 0).data)
-        assert_sparse_eq(np.maximum(self.data, 0), xu.maximum(0, self.var).data)
+        assert_sparse_equal(np.maximum(self.data, 0), xu.maximum(self.var, 0).data)
+        assert_sparse_equal(np.maximum(self.data, 0), xu.maximum(0, self.var).data)
 
     def test_repr(self):
         expected = dedent(
@@ -300,12 +285,11 @@ def test_repr(self):
     def test_pickle(self):
         v1 = self.var
         v2 = pickle.loads(pickle.dumps(v1))
-        assert_sparse_eq(v1.data, v2.data)
+        assert_sparse_equal(v1.data, v2.data)
 
-    @pytest.mark.xfail(reason="Missing implementation for np.result_type")
     def test_missing_values(self):
         a = np.array([0, 1, np.nan, 3])
-        s = COO.from_numpy(a)
+        s = sparse.COO.from_numpy(a)
         var_s = Variable("x", s)
         assert np.all(var_s.fillna(2).data.todense() == np.arange(4))
         assert np.all(var_s.count() == 3)
@@ -380,16 +364,8 @@ def test_dataarray_property(prop):
         # TODO
         # set_index
         # swap_dims
-        param(
-            do("broadcast_equals", make_xrvar({"x": 10, "y": 5})),
-            False,
-            marks=xfail(reason="https://github.com/pydata/sparse/issues/270"),
-        ),
-        param(
-            do("equals", make_xrvar({"x": 10, "y": 5})),
-            False,
-            marks=xfail(reason="https://github.com/pydata/sparse/issues/270"),
-        ),
+        (do("broadcast_equals", make_xrvar({"x": 10, "y": 5})), False),
+        (do("equals", make_xrvar({"x": 10, "y": 5})), False),
         param(
             do("argmax"),
             True,
@@ -410,11 +386,7 @@ def test_dataarray_property(prop):
             False,
             marks=xfail(reason="Missing implementation for np.flip"),
         ),
-        param(
-            do("combine_first", make_xrarray({"x": 10, "y": 5})),
-            True,
-            marks=xfail(reason="mixed sparse-dense operation"),
-        ),
+        (do("combine_first", make_xrarray({"x": 10, "y": 5})), True),
         param(
             do("conjugate"),
             False,
@@ -441,16 +413,8 @@ def test_dataarray_property(prop):
             marks=xfail(reason="Missing implementation for np.einsum"),
         ),
         param(do("dropna", "x"), False, marks=xfail(reason="Coercion to dense")),
-        param(
-            do("ffill", "x"),
-            False,
-            marks=xfail(reason="Coercion to dense via bottleneck.push"),
-        ),
-        param(
-            do("fillna", 0),
-            True,
-            marks=xfail(reason="Missing implementation for np.result_type"),
-        ),
+        param(do("ffill", "x"), False, marks=xfail(reason="Coercion to dense")),
+        (do("fillna", 0), True),
         param(
             do("interp", coords={"x": np.arange(10) + 0.5}),
             True,
@@ -478,26 +442,16 @@ def test_dataarray_property(prop):
             False,
             marks=xfail(reason="'COO' object has no attribute 'item'"),
         ),
-        param(do("max"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
-        param(
-            do("median"), False, marks=xfail(reason="Coercion to dense via bottleneck")
-        ),
-        param(do("min"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
+        param(do("max"), False),
+        param(do("min"), False),
         param(
-            do("notnull"),
+            do("median"),
             False,
-            marks=xfail(reason="'COO' object has no attribute 'notnull'"),
-        ),
-        param(
-            do("pipe", np.sum, axis=1),
-            True,
-            marks=xfail(reason="Missing implementation for np.result_type"),
-        ),
-        param(
-            do("prod"),
-            False,
-            marks=xfail(reason="Missing implementation for np.result_type"),
+            marks=xfail(reason="Missing implementation for np.nanmedian"),
         ),
+        (do("notnull"), True),
+        (do("pipe", np.sum, axis=1), True),
+        (do("prod"), False),
         param(
             do("quantile", q=0.5),
             False,
@@ -506,7 +460,7 @@ def test_dataarray_property(prop):
         param(
             do("rank", "x"),
             False,
-            marks=xfail(reason="Coercion to dense via bottleneck"),
+            marks=xfail(reason="Only implemented for NumPy arrays (via bottleneck)"),
         ),
         param(
             do("reduce", np.sum, dim="x"),
@@ -524,23 +478,19 @@ def test_dataarray_property(prop):
             True,
             marks=xfail(reason="Indexing COO with more than one iterable index"),
         ),  # noqa
-        param(
-            do("roll", x=2),
-            True,
-            marks=xfail(reason="Missing implementation for np.result_type"),
-        ),
+        (do("roll", x=2, roll_coords=True), True),
         param(
             do("sel", x=[0, 1, 2], y=[2, 3]),
             True,
             marks=xfail(reason="Indexing COO with more than one iterable index"),
         ),  # noqa
-        param(do("std"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
         param(
-            do("sum"),
-            False,
-            marks=xfail(reason="Missing implementation for np.result_type"),
+            do("std"), False, marks=xfail(reason="Missing implementation for np.nanstd")
+        ),
+        (do("sum"), False),
+        param(
+            do("var"), False, marks=xfail(reason="Missing implementation for np.nanvar")
         ),
-        param(do("var"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
         param(
             do("where", make_xrarray({"x": 10, "y": 5}) > 0.5),
             False,
@@ -558,7 +508,7 @@ def test_dataarray_method(func, sparse_output):
     ret_d = func(arr_d)
 
     if sparse_output:
-        assert isinstance(ret_s.data, SparseArray)
+        assert isinstance(ret_s.data, sparse.SparseArray)
         assert np.allclose(ret_s.data.todense(), ret_d.data, equal_nan=True)
     else:
         assert np.allclose(ret_s, ret_d, equal_nan=True)
@@ -582,7 +532,7 @@ def test_datarray_1d_method(func, sparse_output):
     ret_d = func(arr_d)
 
     if sparse_output:
-        assert isinstance(ret_s.data, SparseArray)
+        assert isinstance(ret_s.data, sparse.SparseArray)
         assert np.allclose(ret_s.data.todense(), ret_d.data, equal_nan=True)
     else:
         assert np.allclose(ret_s, ret_d, equal_nan=True)
@@ -600,17 +550,20 @@ def setUp(self):
             self.ds_ar, coords={"x": range(4)}, dims=("x", "y"), name="foo"
         )
 
-    @pytest.mark.xfail(reason="Missing implementation for np.result_type")
     def test_to_dataset_roundtrip(self):
         x = self.sp_xr
         assert_equal(x, x.to_dataset("x").to_array("x"))
 
     def test_align(self):
         a1 = xr.DataArray(
-            COO.from_numpy(np.arange(4)), dims=["x"], coords={"x": ["a", "b", "c", "d"]}
+            sparse.COO.from_numpy(np.arange(4)),
+            dims=["x"],
+            coords={"x": ["a", "b", "c", "d"]},
         )
         b1 = xr.DataArray(
-            COO.from_numpy(np.arange(4)), dims=["x"], coords={"x": ["a", "b", "d", "e"]}
+            sparse.COO.from_numpy(np.arange(4)),
+            dims=["x"],
+            coords={"x": ["a", "b", "d", "e"]},
         )
         a2, b2 = xr.align(a1, b1, join="inner")
         assert isinstance(a2.data, sparse.SparseArray)
@@ -647,33 +600,35 @@ def test_align_2d(self):
         assert np.all(B1.coords["x"] == B2.coords["x"])
         assert np.all(B1.coords["y"] == B2.coords["y"])
 
-    @pytest.mark.xfail(reason="fill value leads to sparse-dense operation")
     def test_align_outer(self):
         a1 = xr.DataArray(
-            COO.from_numpy(np.arange(4)), dims=["x"], coords={"x": ["a", "b", "c", "d"]}
+            sparse.COO.from_numpy(np.arange(4)),
+            dims=["x"],
+            coords={"x": ["a", "b", "c", "d"]},
         )
         b1 = xr.DataArray(
-            COO.from_numpy(np.arange(4)), dims=["x"], coords={"x": ["a", "b", "d", "e"]}
+            sparse.COO.from_numpy(np.arange(4)),
+            dims=["x"],
+            coords={"x": ["a", "b", "d", "e"]},
         )
         a2, b2 = xr.align(a1, b1, join="outer")
         assert isinstance(a2.data, sparse.SparseArray)
         assert isinstance(b2.data, sparse.SparseArray)
-        assert np.all(a2.coords["x"].data == ["a", "b", "c", "d"])
-        assert np.all(b2.coords["x"].data == ["a", "b", "c", "d"])
+        assert np.all(a2.coords["x"].data == ["a", "b", "c", "d", "e"])
+        assert np.all(b2.coords["x"].data == ["a", "b", "c", "d", "e"])
 
-    @pytest.mark.xfail(reason="Missing implementation for np.result_type")
     def test_concat(self):
         ds1 = xr.Dataset(data_vars={"d": self.sp_xr})
         ds2 = xr.Dataset(data_vars={"d": self.sp_xr})
         ds3 = xr.Dataset(data_vars={"d": self.sp_xr})
         out = xr.concat([ds1, ds2, ds3], dim="x")
-        assert_sparse_eq(
+        assert_sparse_equal(
             out["d"].data,
             sparse.concatenate([self.sp_ar, self.sp_ar, self.sp_ar], axis=0),
         )
 
         out = xr.concat([self.sp_xr, self.sp_xr, self.sp_xr], dim="y")
-        assert_sparse_eq(
+        assert_sparse_equal(
             out.data, sparse.concatenate([self.sp_ar, self.sp_ar, self.sp_ar], axis=1)
         )
 
@@ -692,15 +647,16 @@ def test_stack(self):
         roundtripped = stacked.unstack()
         assert arr.identical(roundtripped)
 
+    @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
     def test_ufuncs(self):
         x = self.sp_xr
         assert_equal(np.sin(x), xu.sin(x))
 
     def test_dataarray_repr(self):
         a = xr.DataArray(
-            COO.from_numpy(np.ones(4)),
+            sparse.COO.from_numpy(np.ones(4)),
             dims=["x"],
-            coords={"y": ("x", COO.from_numpy(np.arange(4)))},
+            coords={"y": ("x", sparse.COO.from_numpy(np.arange(4)))},
         )
         expected = dedent(
             """\
@@ -714,8 +670,8 @@ def test_dataarray_repr(self):
 
     def test_dataset_repr(self):
         ds = xr.Dataset(
-            data_vars={"a": ("x", COO.from_numpy(np.ones(4)))},
-            coords={"y": ("x", COO.from_numpy(np.arange(4)))},
+            data_vars={"a": ("x", sparse.COO.from_numpy(np.ones(4)))},
+            coords={"y": ("x", sparse.COO.from_numpy(np.arange(4)))},
         )
         expected = dedent(
             """\
@@ -731,7 +687,9 @@ def test_dataset_repr(self):
 
     def test_sparse_dask_dataset_repr(self):
         pytest.importorskip("dask", minversion="2.0")
-        ds = xr.Dataset(data_vars={"a": ("x", COO.from_numpy(np.ones(4)))}).chunk()
+        ds = xr.Dataset(
+            data_vars={"a": ("x", sparse.COO.from_numpy(np.ones(4)))}
+        ).chunk()
         expected = dedent(
             """\
             <xarray.Dataset>
@@ -744,17 +702,17 @@ def test_sparse_dask_dataset_repr(self):
 
     def test_dataarray_pickle(self):
         a1 = xr.DataArray(
-            COO.from_numpy(np.ones(4)),
+            sparse.COO.from_numpy(np.ones(4)),
             dims=["x"],
-            coords={"y": ("x", COO.from_numpy(np.arange(4)))},
+            coords={"y": ("x", sparse.COO.from_numpy(np.arange(4)))},
         )
         a2 = pickle.loads(pickle.dumps(a1))
         assert_identical(a1, a2)
 
     def test_dataset_pickle(self):
         ds1 = xr.Dataset(
-            data_vars={"a": ("x", COO.from_numpy(np.ones(4)))},
-            coords={"y": ("x", COO.from_numpy(np.arange(4)))},
+            data_vars={"a": ("x", sparse.COO.from_numpy(np.ones(4)))},
+            coords={"y": ("x", sparse.COO.from_numpy(np.arange(4)))},
         )
         ds2 = pickle.loads(pickle.dumps(ds1))
         assert_identical(ds1, ds2)
@@ -814,8 +772,8 @@ def test_groupby_first(self):
     def test_groupby_bins(self):
         x1 = self.ds_xr
         x2 = self.sp_xr
-        m1 = x1.groupby_bins("x", bins=[0, 3, 7, 10]).sum()
-        m2 = x2.groupby_bins("x", bins=[0, 3, 7, 10]).sum()
+        m1 = x1.groupby_bins("x", bins=[0, 3, 7, 10]).sum(xr.ALL_DIMS)
+        m2 = x2.groupby_bins("x", bins=[0, 3, 7, 10]).sum(xr.ALL_DIMS)
         assert isinstance(m2.data, sparse.SparseArray)
         assert np.allclose(m1.data, m2.data.todense())
 
@@ -829,7 +787,7 @@ def test_resample(self):
             dims="time",
         )
         t2 = t1.copy()
-        t2.data = COO(t2.data)
+        t2.data = sparse.COO(t2.data)
         m1 = t1.resample(time="QS-DEC").mean()
         m2 = t2.resample(time="QS-DEC").mean()
         assert isinstance(m2.data, sparse.SparseArray)
@@ -860,7 +818,7 @@ def test_where(self):
         cond = a > 3
         xr.DataArray(a).where(cond)
 
-        s = COO.from_numpy(a)
+        s = sparse.COO.from_numpy(a)
         cond = s > 3
         xr.DataArray(s).where(cond)
 
@@ -873,9 +831,9 @@ class TestSparseCoords:
     @pytest.mark.xfail(reason="Coercion of coords to dense")
     def test_sparse_coords(self):
         xr.DataArray(
-            COO.from_numpy(np.arange(4)),
+            sparse.COO.from_numpy(np.arange(4)),
             dims=["x"],
-            coords={"x": COO.from_numpy([1, 2, 3, 4])},
+            coords={"x": sparse.COO.from_numpy([1, 2, 3, 4])},
         )
 
 
diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py
index 85bb9db8360..4ba327913bc 100755
--- a/xarray/util/print_versions.py
+++ b/xarray/util/print_versions.py
@@ -1,5 +1,4 @@
 """Utility functions for printing version information."""
-import codecs
 import importlib
 import locale
 import os