From 4578d63d7e6c7962e8203d50e3be6eb2a5a43612 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Fri, 19 Feb 2021 15:59:56 +1100
Subject: [PATCH 1/6] Cleanup normalize_total

---
 scanpy/preprocessing/_normalization.py | 91 ++++++++++++++------------
 1 file changed, 50 insertions(+), 41 deletions(-)

diff --git a/scanpy/preprocessing/_normalization.py b/scanpy/preprocessing/_normalization.py
index 2d745634f6..f5d13c2297 100644
--- a/scanpy/preprocessing/_normalization.py
+++ b/scanpy/preprocessing/_normalization.py
@@ -1,4 +1,5 @@
 from typing import Optional, Union, Iterable, Dict
+from warnings import warn
 
 import numpy as np
 from anndata import AnnData
@@ -8,6 +9,7 @@
 from .. import logging as logg
 from .._compat import Literal
 from .._utils import view_to_actual
+from scanpy.get import _get_obs_rep, _set_obs_rep
 
 
 def _normalize_data(X, counts, after=None, copy=False):
@@ -31,6 +33,7 @@ def normalize_total(
     exclude_highly_expressed: bool = False,
     max_fraction: float = 0.05,
     key_added: Optional[str] = None,
+    layer: Optional[str] = None,
     layers: Union[Literal['all'], Iterable[str]] = None,
     layer_norm: Optional[str] = None,
     inplace: bool = True,
@@ -72,20 +75,8 @@ def normalize_total(
     key_added
         Name of the field in `adata.obs` where the normalization factor is
         stored.
-    layers
-        List of layers to normalize. Set to `'all'` to normalize all layers.
-    layer_norm
-        Specifies how to normalize layers:
-
-        * If `None`, after normalization, for each layer in *layers* each cell
-          has a total count equal to the median of the *counts_per_cell* before
-          normalization of the layer.
-        * If `'after'`, for each layer in *layers* each cell has
-          a total count equal to `target_sum`.
-        * If `'X'`, for each layer in *layers* each cell has a total count
-          equal to the median of total counts for observations (cells) of
-          `adata.X` before normalization.
-
+    layer
+        Layer to normalize instead of `X`. If `None`, `X` is normalized.
     inplace
         Whether to update `adata` or return dictionary with normalized copies of
         `adata.X` and `adata.layers`.
@@ -130,6 +121,22 @@ def normalize_total(
     if max_fraction < 0 or max_fraction > 1:
         raise ValueError('Choose max_fraction between 0 and 1.')
 
+    # Deprecated features
+    if layers is not None:
+        warn(
+            FutureWarning(
+                "The `layers` argument is deprecated. Instead, specify individual "
+                "layers to normalize with `layer`."
+            )
+        )
+    if layer_norm is not None:
+        warn(
+            FutureWarning(
+                "The `layer_norm` argument is deprecated. Specify the target size "
+                "factor directly with `target_sum`."
+            )
+        )
+
     if layers == 'all':
         layers = adata.layers.keys()
     elif isinstance(layers, str):
@@ -139,32 +146,47 @@ def normalize_total(
 
     view_to_actual(adata)
 
+    X = _get_obs_rep(adata, layer=layer)
+
     gene_subset = None
     msg = 'normalizing counts per cell'
     if exclude_highly_expressed:
-        counts_per_cell = adata.X.sum(1)  # original counts per cell
+        counts_per_cell = X.sum(1)  # original counts per cell
         counts_per_cell = np.ravel(counts_per_cell)
 
         # at least one cell as more than max_fraction of counts per cell
-        gene_subset = (adata.X > counts_per_cell[:, None] * max_fraction).sum(0)
+
+        gene_subset = (X > counts_per_cell[:, None] * max_fraction).sum(0)
         gene_subset = np.ravel(gene_subset) == 0
 
         msg += (
             ' The following highly-expressed genes are not considered during '
             f'normalization factor computation:\n{adata.var_names[~gene_subset].tolist()}'
         )
+        counts_per_cell = X[:, gene_subset].sum(1)
+    else:
+        counts_per_cell = X.sum(1)
     start = logg.info(msg)
-
-    # counts per cell for subset, if max_fraction!=1
-    X = adata.X if gene_subset is None else adata[:, gene_subset].X
-    counts_per_cell = X.sum(1)
-    # get rid of adata view
-    counts_per_cell = np.ravel(counts_per_cell).copy()
+    counts_per_cell = np.ravel(counts_per_cell)
 
     cell_subset = counts_per_cell > 0
     if not np.all(cell_subset):
-        logg.warning('Some cells have total count of genes equal to zero')
+        warn(UserWarning('Some cells have zero counts'))
+
+    if inplace:
+        if key_added is not None:
+            adata.obs[key_added] = counts_per_cell
+        _set_obs_rep(
+            adata, _normalize_data(X, counts_per_cell, target_sum), layer=layer
+        )
+    else:
+        # not recarray because need to support sparse
+        dat = dict(
+            X=_normalize_data(X, counts_per_cell, target_sum, copy=True),
+            norm_factor=counts_per_cell,
+        )
 
+    # Deprecated features
     if layer_norm == 'after':
         after = target_sum
     elif layer_norm == 'X':
@@ -173,26 +195,13 @@ def normalize_total(
         after = None
     else:
         raise ValueError('layer_norm should be "after", "X" or None')
-    del cell_subset
 
-    if inplace:
-        if key_added is not None:
-            adata.obs[key_added] = counts_per_cell
-        adata.X = _normalize_data(adata.X, counts_per_cell, target_sum)
-    else:
-        # not recarray because need to support sparse
-        dat = dict(
-            X=_normalize_data(adata.X, counts_per_cell, target_sum, copy=True),
-            norm_factor=counts_per_cell,
+    for layer_to_norm in layers if layers is not None else ():
+        res = normalize_total(
+            adata, layer=layer_to_norm, target_sum=after, inplace=inplace
         )
-
-    for layer_name in layers or ():
-        layer = adata.layers[layer_name]
-        counts = np.ravel(layer.sum(1))
-        if inplace:
-            adata.layers[layer_name] = _normalize_data(layer, counts, after)
-        else:
-            dat[layer_name] = _normalize_data(layer, counts, after, copy=True)
+        if not inplace:
+            dat[layer_to_norm] = res["X"]
 
     logg.info(
         '    finished ({time_passed})',

From 4d34bf0d3980675aff57284237dd4c2d4d6363f8 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Tue, 2 Mar 2021 13:43:40 +1100
Subject: [PATCH 2/6] Add modification tests and copy kwarg for normalize_total

---
 scanpy/preprocessing/_normalization.py | 13 +++++++-
 scanpy/tests/helpers.py                | 44 ++++++++++++++++----------
 scanpy/tests/test_normalization.py     | 12 ++++++-
 3 files changed, 51 insertions(+), 18 deletions(-)

diff --git a/scanpy/preprocessing/_normalization.py b/scanpy/preprocessing/_normalization.py
index f5d13c2297..badd3d835c 100644
--- a/scanpy/preprocessing/_normalization.py
+++ b/scanpy/preprocessing/_normalization.py
@@ -37,6 +37,7 @@ def normalize_total(
     layers: Union[Literal['all'], Iterable[str]] = None,
     layer_norm: Optional[str] = None,
     inplace: bool = True,
+    copy: bool = False,
 ) -> Optional[Dict[str, np.ndarray]]:
     """\
     Normalize counts per cell.
@@ -80,6 +81,8 @@ def normalize_total(
     inplace
         Whether to update `adata` or return dictionary with normalized copies of
         `adata.X` and `adata.layers`.
+    copy
+        Whether to modify copied input object. Not compatible with inplace=False.
 
     Returns
     -------
@@ -118,6 +121,11 @@ def normalize_total(
            [ 0.5,  0.5,  0.5,  1. ,  1. ],
            [ 0.5, 11. ,  0.5,  1. ,  1. ]], dtype=float32)
     """
+    if copy:
+        if not inplace:
+            raise ValueError()
+        adata = adata.copy()
+
     if max_fraction < 0 or max_fraction > 1:
         raise ValueError('Choose max_fraction between 0 and 1.')
 
@@ -212,4 +220,7 @@ def normalize_total(
             f'and added {key_added!r}, counts per cell before normalization (adata.obs)'
         )
 
-    return dat if not inplace else None
+    if copy:
+        return adata
+    elif not inplace:
+        return dat
diff --git a/scanpy/tests/helpers.py b/scanpy/tests/helpers.py
index 35253bad69..a8bf6ec088 100644
--- a/scanpy/tests/helpers.py
+++ b/scanpy/tests/helpers.py
@@ -2,6 +2,8 @@
 This file contains helper functions for the scanpy test suite.
 """
 
+from itertools import permutations
+
 import scanpy as sc
 import numpy as np
 
@@ -13,26 +15,36 @@
 # These functions can be used to check that functions are correctly using arugments like `layers`, `obsm`, etc.
 
 
-def check_rep_mutation(func, X, **kwargs):
+def check_rep_mutation(func, X, fields=["layer", "obsm"], **kwargs):
     """Check that only the array meant to be modified is modified."""
-    adata = sc.AnnData(
-        X=X.copy(),
-        layers={"layer": X.copy()},
-        obsm={"obsm": X.copy()},
-        dtype=X.dtype,
-    )
+    adata = sc.AnnData(X=X.copy(), dtype=X.dtype)
+    for field in fields:
+        sc.get._set_obs_rep(adata, X, **{field: field})
+    X_array = asarray(X)
+
     adata_X = func(adata, copy=True, **kwargs)
-    adata_layer = func(adata, layer="layer", copy=True, **kwargs)
-    adata_obsm = func(adata, obsm="obsm", copy=True, **kwargs)
+    adatas_proc = {
+        field: func(adata, copy=True, **{field: field}, **kwargs) for field in fields
+    }
 
-    assert np.array_equal(asarray(adata_X.X), asarray(adata_layer.layers["layer"]))
-    assert np.array_equal(asarray(adata_X.X), asarray(adata_obsm.obsm["obsm"]))
+    # Modified fields
+    for field in fields:
+        result_array = asarray(
+            sc.get._get_obs_rep(adatas_proc[field], **{field: field})
+        )
+        np.testing.assert_array_equal(asarray(adata_X.X), result_array)
 
-    assert np.array_equal(asarray(adata_layer.X), asarray(adata_layer.obsm["obsm"]))
-    assert np.array_equal(asarray(adata_obsm.X), asarray(adata_obsm.layers["layer"]))
-    assert np.array_equal(
-        asarray(adata_X.layers["layer"]), asarray(adata_X.obsm["obsm"])
-    )
+    # Unmodified fields
+    for field in fields:
+        np.testing.assert_array_equal(X_array, asarray(adatas_proc[field].X))
+        np.testing.assert_array_equal(
+            X_array, asarray(sc.get._get_obs_rep(adata_X, **{field: field}))
+        )
+    for field_a, field_b in permutations(fields, 2):
+        result_array = asarray(
+            sc.get._get_obs_rep(adatas_proc[field_a], **{field_b: field_b})
+        )
+        np.testing.assert_array_equal(X_array, result_array)
 
 
 def check_rep_results(func, X, **kwargs):
diff --git a/scanpy/tests/test_normalization.py b/scanpy/tests/test_normalization.py
index 9b84699d9a..a815025449 100644
--- a/scanpy/tests/test_normalization.py
+++ b/scanpy/tests/test_normalization.py
@@ -2,9 +2,11 @@
 import numpy as np
 from anndata import AnnData
 from scipy.sparse import csr_matrix
+from scipy import sparse
 
 import scanpy as sc
-from anndata.tests.helpers import assert_equal
+from scanpy.tests.helpers import check_rep_mutation
+from anndata.tests.helpers import assert_equal, asarray
 
 X_total = [[1, 0], [3, 0], [5, 6]]
 X_frac = [[1, 0, 1], [3, 0, 1], [5, 6, 1]]
@@ -24,6 +26,14 @@ def test_normalize_total(typ, dtype):
     assert np.allclose(np.ravel(adata.X[:, 1:3].sum(axis=1)), [1.0, 1.0, 1.0])
 
 
+@pytest.mark.parametrize('typ', [asarray, csr_matrix], ids=lambda x: x.__name__)
+@pytest.mark.parametrize('dtype', ['float32', 'int64'])
+def test_normalize_total_rep(typ, dtype):
+    # Test that layer kwarg works
+    X = typ(sparse.random(100, 50, format="csr", density=0.2, dtype=dtype))
+    check_rep_mutation(sc.pp.normalize_total, X, fields=["layer"])
+
+
 @pytest.mark.parametrize('typ', [np.array, csr_matrix], ids=lambda x: x.__name__)
 @pytest.mark.parametrize('dtype', ['float32', 'int64'])
 def test_normalize_total_layers(typ, dtype):

From 01288605eda5c77885be4aa5c644ea9ae03dd611 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Tue, 2 Mar 2021 14:04:26 +1100
Subject: [PATCH 3/6] Test that 'layers' argument is deprecated

---
 scanpy/tests/test_normalization.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scanpy/tests/test_normalization.py b/scanpy/tests/test_normalization.py
index a815025449..ce38f703ba 100644
--- a/scanpy/tests/test_normalization.py
+++ b/scanpy/tests/test_normalization.py
@@ -39,7 +39,8 @@ def test_normalize_total_rep(typ, dtype):
 def test_normalize_total_layers(typ, dtype):
     adata = AnnData(typ(X_total), dtype=dtype)
     adata.layers["layer"] = adata.X.copy()
-    sc.pp.normalize_total(adata, layers=["layer"])
+    with pytest.warns(FutureWarning, match=r".*layers.*deprecated"):
+        sc.pp.normalize_total(adata, layers=["layer"])
     assert np.allclose(adata.layers["layer"].sum(axis=1), [3.0, 3.0, 3.0])
 
 

From 8c5fd98f5ac3d3b2c493013a78591d6bf6f76512 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Wed, 3 Mar 2021 15:11:13 +1100
Subject: [PATCH 4/6] Added more mutation checks for normalize_total

---
 scanpy/tests/helpers.py            | 52 ++++++++++++++++--------------
 scanpy/tests/test_normalization.py |  3 +-
 2 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/scanpy/tests/helpers.py b/scanpy/tests/helpers.py
index a8bf6ec088..61fc35e23e 100644
--- a/scanpy/tests/helpers.py
+++ b/scanpy/tests/helpers.py
@@ -9,13 +9,16 @@
 
 from anndata.tests.helpers import asarray, assert_equal
 
+# TODO: Report more context on the fields being compared on error
+# TODO: Allow specifying paths to ignore on comparison
+
 ###########################
 # Representation choice
 ###########################
 # These functions can be used to check that functions are correctly using arugments like `layers`, `obsm`, etc.
 
 
-def check_rep_mutation(func, X, fields=["layer", "obsm"], **kwargs):
+def check_rep_mutation(func, X, *, fields=["layer", "obsm"], **kwargs):
     """Check that only the array meant to be modified is modified."""
     adata = sc.AnnData(X=X.copy(), dtype=X.dtype)
     for field in fields:
@@ -47,35 +50,36 @@ def check_rep_mutation(func, X, fields=["layer", "obsm"], **kwargs):
         np.testing.assert_array_equal(X_array, result_array)
 
 
-def check_rep_results(func, X, **kwargs):
+def check_rep_results(func, X, *, fields=["layer", "obsm"], **kwargs):
     """Checks that the results of a computation add values/ mutate the anndata object in a consistent way."""
     # Gen data
-    adata_X = sc.AnnData(
-        X=X.copy(),
-        layers={"layer": np.zeros(shape=X.shape, dtype=X.dtype)},
-        obsm={"obsm": np.zeros(shape=X.shape, dtype=X.dtype)},
-    )
-    adata_layer = sc.AnnData(
-        X=np.zeros(shape=X.shape, dtype=X.dtype),
-        layers={"layer": X.copy()},
-        obsm={"obsm": np.zeros(shape=X.shape, dtype=X.dtype)},
-    )
-    adata_obsm = sc.AnnData(
-        X=np.zeros(shape=X.shape, dtype=X.dtype),
-        layers={"layer": np.zeros(shape=X.shape, dtype=X.dtype)},
-        obsm={"obsm": X.copy()},
+    empty_X = np.zeros(shape=X.shape, dtype=X.dtype)
+    adata = sc.AnnData(
+        X=empty_X.copy(),
+        layers={"layer": empty_X.copy()},
+        obsm={"obsm": empty_X.copy()},
     )
 
+    adata_X = adata.copy()
+    adata_X.X = X.copy()
+
+    adatas_proc = {}
+    for field in fields:
+        cur = adata.copy()
+        sc.get._set_obs_rep(cur, X.copy(), **{field: field})
+        adatas_proc[field] = cur
+
     # Apply function
     func(adata_X, **kwargs)
-    func(adata_layer, layer="layer", **kwargs)
-    func(adata_obsm, obsm="obsm", **kwargs)
+    for field in fields:
+        func(adatas_proc[field], **{field: field}, **kwargs)
 
     # Reset X
-    adata_X.X = np.zeros(shape=X.shape, dtype=X.dtype)
-    adata_layer.layers["layer"] = np.zeros(shape=X.shape, dtype=X.dtype)
-    adata_obsm.obsm["obsm"] = np.zeros(shape=X.shape, dtype=X.dtype)
+    adata_X.X = empty_X.copy()
+    for field in fields:
+        sc.get._set_obs_rep(adatas_proc[field], empty_X.copy(), **{field: field})
 
-    # Check equality
-    assert_equal(adata_X, adata_layer)
-    assert_equal(adata_X, adata_obsm)
+    for field_a, field_b in permutations(fields, 2):
+        assert_equal(adatas_proc[field_a], adatas_proc[field_b])
+    for field in fields:
+        assert_equal(adata_X, adatas_proc[field])
diff --git a/scanpy/tests/test_normalization.py b/scanpy/tests/test_normalization.py
index ce38f703ba..0f5dbb102d 100644
--- a/scanpy/tests/test_normalization.py
+++ b/scanpy/tests/test_normalization.py
@@ -5,7 +5,7 @@
 from scipy import sparse
 
 import scanpy as sc
-from scanpy.tests.helpers import check_rep_mutation
+from scanpy.tests.helpers import check_rep_mutation, check_rep_results
 from anndata.tests.helpers import assert_equal, asarray
 
 X_total = [[1, 0], [3, 0], [5, 6]]
@@ -32,6 +32,7 @@ def test_normalize_total_rep(typ, dtype):
     # Test that layer kwarg works
     X = typ(sparse.random(100, 50, format="csr", density=0.2, dtype=dtype))
     check_rep_mutation(sc.pp.normalize_total, X, fields=["layer"])
+    check_rep_results(sc.pp.normalize_total, X, fields=["layer"])
 
 
 @pytest.mark.parametrize('typ', [np.array, csr_matrix], ids=lambda x: x.__name__)

From e3ed02b192dd8f1ff30c055c92c4fa70be50d54a Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Wed, 3 Mar 2021 15:18:17 +1100
Subject: [PATCH 5/6] release note

---
 docs/release-notes/1.8.0.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/release-notes/1.8.0.rst b/docs/release-notes/1.8.0.rst
index b98676558a..d7d709ecd0 100644
--- a/docs/release-notes/1.8.0.rst
+++ b/docs/release-notes/1.8.0.rst
@@ -5,6 +5,7 @@
 
 - Switched to flit_ for building and deploying the package,
   a simple tool with an easy to understand command line interface and metadata.
+- Added `layer` and `copy` kwargs to :func:`~scanpy.pp.normalize_total` :pr:`1667` :smaller:`I Virshup`
 
 .. _flit: https://flit.readthedocs.io/en/latest/
 
@@ -15,3 +16,5 @@
 .. rubric:: Bug fixes
 
 .. rubric:: Deprecations
+
+- Deprecated `layers` and `layers_norm` kwargs to :func:`~scanpy.pp.normalize_total` :pr:`1667` :smaller:`I Virshup`

From 6a9d3fc7643f6b728d37f5d6fc23dd8ad8707176 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Wed, 3 Mar 2021 15:35:11 +1100
Subject: [PATCH 6/6] Error message

---
 scanpy/preprocessing/_normalization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scanpy/preprocessing/_normalization.py b/scanpy/preprocessing/_normalization.py
index badd3d835c..542afd38ca 100644
--- a/scanpy/preprocessing/_normalization.py
+++ b/scanpy/preprocessing/_normalization.py
@@ -123,7 +123,7 @@ def normalize_total(
     """
     if copy:
         if not inplace:
-            raise ValueError()
+            raise ValueError("`copy=True` cannot be used with `inplace=False`.")
         adata = adata.copy()
 
     if max_fraction < 0 or max_fraction > 1: