From f2b335eead655e3a219bff2465502f0b546710bd Mon Sep 17 00:00:00 2001
From: corentinravoux <corentin.ravoux.research@gmail.com>
Date: Tue, 25 Apr 2023 10:48:13 +0200
Subject: [PATCH 01/15] removing unnecessary warning

---
 py/picca/pk1d/postproc_pk1d.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py
index 4571b6339..636aa2a72 100644
--- a/py/picca/pk1d/postproc_pk1d.py
+++ b/py/picca/pk1d/postproc_pk1d.py
@@ -737,10 +737,6 @@ def compute_average_pk_redshift(
                 data_snr = p1d_table["forest_snr"][select]
                 mask = np.isnan(data_values) | np.isnan(data_snr)
                 if len(mask[mask]) != 0:
-                    userprint(
-                        "Warning: A nan value was detected in the following table:\n",
-                        data_values[mask],
-                    )
                     data_snr = data_snr[~mask]
                     data_values = data_values[~mask]
                 # Fit function to observed dispersion:

From 1f739d0c6dcc0ba107842066e29a24142c3d8fcd Mon Sep 17 00:00:00 2001
From: corentinravoux <corentin.ravoux.research@gmail.com>
Date: Tue, 25 Apr 2023 10:48:40 +0200
Subject: [PATCH 02/15] removing obsolete simple_snr weighting method

---
 bin/picca_Pk1D_postprocess.py  |  2 +-
 py/picca/pk1d/postproc_pk1d.py | 19 -------------------
 2 files changed, 1 insertion(+), 20 deletions(-)

diff --git a/bin/picca_Pk1D_postprocess.py b/bin/picca_Pk1D_postprocess.py
index 8cdcba72a..f5001837d 100755
--- a/bin/picca_Pk1D_postprocess.py
+++ b/bin/picca_Pk1D_postprocess.py
@@ -118,7 +118,7 @@ def main(cmdargs):
                         type=str,
                         default='no_weights',
                         help='SNR weighting scheme for the mean P1D computation,'
-                             'Possible options: no_weights, simple_snr, fit_snr')
+                             'Possible options: no_weights, fit_snr')
 
     parser.add_argument('--apply_z_weights',
                         action='store_true',
diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py
index 636aa2a72..8d272a3b7 100644
--- a/py/picca/pk1d/postproc_pk1d.py
+++ b/py/picca/pk1d/postproc_pk1d.py
@@ -173,8 +173,6 @@ def compute_mean_pk1d(
     3 possible options:
         'fit_snr': Compute mean P1D with weights estimated by fitting dispersion vs SNR
         'no_weights': Compute mean P1D without weights
-        'simple_snr' (obsolete): Compute mean P1D with weights computed directly from SNR values
-                    (SNR as given in compute_Pk1D outputs)
 
     apply_z_weights: Bool
     If True, each chunk contributes to two nearest redshift bins with a linear weighting scheme.
@@ -779,23 +777,6 @@ def compute_average_pk_redshift(
                     ]
                     snrfit_array[ikbin, 4:] = standard_dev
 
-            elif weight_method == "simple_snr":
-                # - We keep this for record, we do not recommand to use it
-                # for forests with snr>snr_limit,
-                # the weight is fixed to (snr_limit - 1)**2 = 9
-                snr_limit = 4
-                forest_snr = p1d_table["forest_snr"][select]
-                # w, = np.where(forest_snr <= 1)
-                # if len(w)>0: raise RuntimeError('Cannot add weights with SNR<=1.')
-                if (forest_snr <= 1).sum() > 0:
-                    raise RuntimeError("Cannot add weights with SNR<=1.")
-                weights = (forest_snr - 1) ** 2
-                weights[forest_snr > snr_limit] = (snr_limit - 1) ** 2
-                mean = np.average(p1d_table[col][select], weights=weights)
-                # Need to rescale the weights to find the error:
-                #   weights_true = weights * (num_chunks - 1) / alpha
-                alpha = np.sum(weights * ((p1d_table[col][select] - mean) ** 2))
-                error = np.sqrt(alpha / (np.sum(weights) * (num_chunks - 1)))
 
             elif weight_method == "no_weights":
                 if apply_z_weights:

From 51e1e392bb3ef0b8a4c295b762ac329619338699 Mon Sep 17 00:00:00 2001
From: corentinravoux <corentin.ravoux.research@gmail.com>
Date: Tue, 25 Apr 2023 14:19:12 +0200
Subject: [PATCH 03/15] using output_snrfit onbly for outputing

---
 py/picca/pk1d/postproc_pk1d.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py
index 8d272a3b7..9832040d1 100644
--- a/py/picca/pk1d/postproc_pk1d.py
+++ b/py/picca/pk1d/postproc_pk1d.py
@@ -170,7 +170,7 @@ def compute_mean_pk1d(
     Edges of the wavenumber bins we want to use, either in (Angstrom)-1 or s/km
 
     weight_method: string
-    3 possible options:
+    2 possible options:
         'fit_snr': Compute mean P1D with weights estimated by fitting dispersion vs SNR
         'no_weights': Compute mean P1D without weights
 
@@ -307,7 +307,7 @@ def compute_mean_pk1d(
     metadata_table["N_chunks"] = n_chunks
 
     zbin_centers = np.around((zbin_edges[1:] + zbin_edges[:-1]) / 2, 5)
-    if output_snrfit is not None:
+    if weight_method == "fit_snr":
         snrfit_table = np.zeros(
             (nbins_z * nbins_k, 13)
         )  # 13 entries: z k a b + 9 SNR bins used for the fit
@@ -324,7 +324,6 @@ def compute_mean_pk1d(
         p1d_table_cols,
         weight_method,
         apply_z_weights,
-        output_snrfit,
         nomedians,
         nbins_z,
         zbin_centers,
@@ -345,7 +344,7 @@ def compute_mean_pk1d(
         output_mean,
         mean_p1d_table,
         p1d_table_cols,
-        output_snrfit,
+        weight_method,
         snrfit_table,
         nomedians,
     )
@@ -369,6 +368,7 @@ def compute_mean_pk1d(
             kbin_edges,
             k_index,
             nbins_k,
+            snrfit_table,
         )
         if number_worker == 1:
             output_cov = [func(p) for p in params_pool]
@@ -423,6 +423,7 @@ def compute_mean_pk1d(
             kbin_edges,
             k_index,
             nbins_k,
+            snrfit_table,
         )
         if number_worker == 1:
             output_cov = [func(p) for p in params_pool]
@@ -469,7 +470,7 @@ def fill_average_pk(
     output_mean,
     mean_p1d_table,
     p1d_table_cols,
-    output_snrfit,
+    weight_method,
     snrfit_table,
     nomedians,
 ):
@@ -492,8 +493,10 @@ def fill_average_pk(
     p1d_table_cols: List of str,
     Column names in the input table to be averaged.
 
-    output_snrfit: bool,
-    If not None, write the fit to the SNR to a file.
+    weight_method: string
+    2 possible options:
+        'fit_snr': Compute mean P1D with weights estimated by fitting dispersion vs SNR
+        'no_weights': Compute mean P1D without weights
 
     snrfit_table: numpy ndarray,
     Table containing SNR fit infos
@@ -531,7 +534,7 @@ def fill_average_pk(
             mean_p1d_table["max" + col][i_min:i_max] = max_array[icol]
             if not nomedians:
                 mean_p1d_table["median" + col][i_min:i_max] = median_array[icol]
-        if output_snrfit is not None:
+        if weight_method == "fit_snr":
             snrfit_table[i_min:i_max, :] = snrfit_array
 
 
@@ -540,7 +543,6 @@ def compute_average_pk_redshift(
     p1d_table_cols,
     weight_method,
     apply_z_weights,
-    output_snrfit,
     nomedians,
     nbins_z,
     zbin_centers,
@@ -572,9 +574,6 @@ def compute_average_pk_redshift(
     apply_z_weights: bool,
     If True, apply redshift weights.
 
-    output_snrfit: bool,
-    If not None, write the fit to the SNR to a file.
-
     nomedians: bool,
     If True, do not use median values in the fit to the SNR.
 
@@ -622,7 +621,7 @@ def compute_average_pk_redshift(
         median_array = []
     else:
         median_array = None
-    if output_snrfit is not None:
+    if weight_method == "fit_snr":
         snrfit_array = np.zeros((nbins_k, 13))
     else:
         snrfit_array = None
@@ -646,7 +645,7 @@ def compute_average_pk_redshift(
             max_array[icol][:] = np.nan
             if not nomedians:
                 median_array[icol][:] = np.nan
-        if output_snrfit is not None:
+        if weight_method == "fit_snr":
             snrfit_array[:] = np.nan
 
         return (
@@ -768,7 +767,7 @@ def compute_average_pk_redshift(
                     )
                 else:
                     error = np.sqrt(1.0 / np.sum(weights))
-                if output_snrfit is not None and col == "Pk":
+                if col == "Pk":
                     snrfit_array[ikbin, 0:4] = [
                         zbin_centers[izbin],
                         (kbin + kbin_edges[ikbin + 1]) / 2.0,
@@ -824,6 +823,7 @@ def compute_cov(
     kbin_edges,
     k_index,
     nbins_k,
+    snrfit_table,
     izbin,
     select_z,
     sub_forest_ids,

From cf145cfd1ea8630ee2173c1242353d6019023181 Mon Sep 17 00:00:00 2001
From: corentinravoux <corentin.ravoux.research@gmail.com>
Date: Tue, 25 Apr 2023 14:19:27 +0200
Subject: [PATCH 04/15] fix

---
 py/picca/pk1d/postproc_pk1d.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py
index 9832040d1..185e396cb 100644
--- a/py/picca/pk1d/postproc_pk1d.py
+++ b/py/picca/pk1d/postproc_pk1d.py
@@ -333,7 +333,7 @@ def compute_mean_pk1d(
         kbin_edges,
     )
     if number_worker == 1:
-        output_mean = [func(p) for p in params_pool]
+        output_mean = [func(p[0]) for p in params_pool]
     else:
         with Pool(number_worker) as pool:
             output_mean = pool.starmap(func, params_pool)
@@ -371,7 +371,7 @@ def compute_mean_pk1d(
             snrfit_table,
         )
         if number_worker == 1:
-            output_cov = [func(p) for p in params_pool]
+            output_cov = [func(*p) for p in params_pool]
         else:
             with Pool(number_worker) as pool:
                 output_cov = pool.starmap(func, params_pool)

From e86662ef5987e79101647fade6040ca441dce965 Mon Sep 17 00:00:00 2001
From: corentinravoux <corentin.ravoux.research@gmail.com>
Date: Tue, 25 Apr 2023 16:59:42 +0200
Subject: [PATCH 05/15] fix

---
 py/picca/pk1d/postproc_pk1d.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py
index 185e396cb..ab844adcb 100644
--- a/py/picca/pk1d/postproc_pk1d.py
+++ b/py/picca/pk1d/postproc_pk1d.py
@@ -426,7 +426,7 @@ def compute_mean_pk1d(
             snrfit_table,
         )
         if number_worker == 1:
-            output_cov = [func(p) for p in params_pool]
+            output_cov = [func(*p) for p in params_pool]
         else:
             with Pool(number_worker) as pool:
                 output_cov = pool.starmap(func, params_pool)

From 485c2c69d5a2fbfc73f40a873937582d3b61c7f2 Mon Sep 17 00:00:00 2001
From: corentinravoux <corentin.ravoux.research@gmail.com>
Date: Wed, 26 Apr 2023 11:20:30 +0200
Subject: [PATCH 06/15] Applying the weight method for the covariance matrix
 calculation

---
 py/picca/pk1d/postproc_pk1d.py | 87 ++++++++++++++++++++++++----------
 1 file changed, 63 insertions(+), 24 deletions(-)

diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py
index ab844adcb..df2f3ec69 100644
--- a/py/picca/pk1d/postproc_pk1d.py
+++ b/py/picca/pk1d/postproc_pk1d.py
@@ -257,14 +257,14 @@ def compute_mean_pk1d(
             compute_covariance, compute_bootstrap = False, False
             cov_table = None
 
-        elif weight_method != "no_weights":
-            userprint(
-                """Covariance calculations are not compatible with SNR weighting method yet.
-                Skipping calculation"""
-            )
+        # elif weight_method != "no_weights":
+        #     userprint(
+        #         """Covariance calculations are not compatible with SNR weighting method yet.
+        #         Skipping calculation"""
+        #     )
 
-            compute_covariance, compute_bootstrap = False, False
-            cov_table = None
+        #     compute_covariance, compute_bootstrap = False, False
+        #     cov_table = None
 
         elif apply_z_weights:
             userprint(
@@ -368,6 +368,7 @@ def compute_mean_pk1d(
             kbin_edges,
             k_index,
             nbins_k,
+            weight_method,
             snrfit_table,
         )
         if number_worker == 1:
@@ -423,6 +424,7 @@ def compute_mean_pk1d(
             kbin_edges,
             k_index,
             nbins_k,
+            weight_method,
             snrfit_table,
         )
         if number_worker == 1:
@@ -768,6 +770,10 @@ def compute_average_pk_redshift(
                 else:
                     error = np.sqrt(1.0 / np.sum(weights))
                 if col == "Pk":
+                    if len(mask[mask]) != 0:
+                        standard_dev = np.concatenate(
+                            [standard_dev, np.full(len(mask[mask]), np.nan)]
+                        )
                     snrfit_array[ikbin, 0:4] = [
                         zbin_centers[izbin],
                         (kbin + kbin_edges[ikbin + 1]) / 2.0,
@@ -776,7 +782,6 @@ def compute_average_pk_redshift(
                     ]
                     snrfit_array[ikbin, 4:] = standard_dev
 
-
             elif weight_method == "no_weights":
                 if apply_z_weights:
                     mean = np.average(p1d_table[col][select], weights=redshift_weights)
@@ -823,6 +828,7 @@ def compute_cov(
     kbin_edges,
     k_index,
     nbins_k,
+    weight_method,
     snrfit_table,
     izbin,
     select_z,
@@ -850,6 +856,12 @@ def compute_cov(
     nbins_k (int):
     Number of k bins.
 
+    weight_method: str,
+    Method to weight the data.
+
+    snrfit_table: numpy ndarray,
+    Table containing SNR fit infos
+
     izbin (int):
     Current redshift bin being considered.
 
@@ -896,26 +908,53 @@ def compute_cov(
             k1_array,
             k2_array,
         )
-
     for sub_forest_id in sub_forest_ids:  # First loop 1) id sub-forest bins
         select_id = select_z & (p1d_table["sub_forest_id"] == sub_forest_id)
         selected_pk = p1d_table["Pk"][select_id]
         selected_ikbin = k_index[select_id]
 
-        for ipk, _ in enumerate(selected_pk):  # First loop 2) selected pk
-            ikbin = selected_ikbin[ipk]
-            if ikbin != -1:
-                for ipk2 in range(ipk, len(selected_pk)):  # First loop 3) selected pk
-                    ikbin2 = selected_ikbin[ipk2]
-
-                    if ikbin2 != -1:
-                        # index of the (ikbin,ikbin2) coefficient on the top of the matrix
-                        index = (nbins_k * ikbin) + ikbin2
-                        covariance_array[index] = (
-                            covariance_array[index]
-                            + selected_pk[ipk] * selected_pk[ipk2]
-                        )
-                        n_array[index] = n_array[index] + 1
+        if weight_method == "fit_snr":
+            selected_snr = p1d_table["forest_snr"][select_id]
+            snrfit_z = snrfit_table[izbin * nbins_k : (izbin + 1) * nbins_k, :]
+            selected_variance_estimated = fitfunc_variance_pk1d(
+                selected_snr, snrfit_z[selected_ikbin, 2], snrfit_z[selected_ikbin, 3]
+            )
+
+            for ipk, _ in enumerate(selected_pk):  # First loop 2) selected pk
+                ikbin = selected_ikbin[ipk]
+                if ikbin != -1:
+                    for ipk2 in range(
+                        ipk, len(selected_pk)
+                    ):  # First loop 3) selected pk
+                        ikbin2 = selected_ikbin[ipk2]
+                        if ikbin2 != -1:
+                            # index of the (ikbin,ikbin2) coefficient on the top of the matrix
+                            index = (nbins_k * ikbin) + ikbin2
+                            weight = 1 / (
+                                selected_variance_estimated[ipk]
+                                * selected_variance_estimated[ipk2]
+                            )
+                            covariance_array[index] = (
+                                covariance_array[index]
+                                + selected_pk[ipk] * selected_pk[ipk2] * weight
+                            )
+                            n_array[index] = n_array[index] + weight
+        else:
+            for ipk, _ in enumerate(selected_pk):  # First loop 2) selected pk
+                ikbin = selected_ikbin[ipk]
+                if ikbin != -1:
+                    for ipk2 in range(
+                        ipk, len(selected_pk)
+                    ):  # First loop 3) selected pk
+                        ikbin2 = selected_ikbin[ipk2]
+                        if ikbin2 != -1:
+                            # index of the (ikbin,ikbin2) coefficient on the top of the matrix
+                            index = (nbins_k * ikbin) + ikbin2
+                            covariance_array[index] = (
+                                covariance_array[index]
+                                + selected_pk[ipk] * selected_pk[ipk2]
+                            )
+                            n_array[index] = n_array[index] + 1
 
     for ikbin in range(nbins_k):  # Second loop 1) k bins
         mean_ikbin = mean_p1d_table["meanPk"][(nbins_k * izbin) + ikbin]
@@ -927,7 +966,7 @@ def compute_cov(
             index = (nbins_k * ikbin) + ikbin2
             covariance_array[index] = (
                 (covariance_array[index] / n_array[index]) - mean_ikbin * mean_ikbin2
-            ) / n_array[index]
+            ) / (2 * n_array[index])
 
             zbin_array[index] = zbin_centers[izbin]
             index_zbin_array[index] = izbin

From 6b5a8b214fa6414b18a61957bcf41e51c6fee9f4 Mon Sep 17 00:00:00 2001
From: corentinravoux <corentin.ravoux.research@gmail.com>
Date: Thu, 27 Apr 2023 14:18:36 +0200
Subject: [PATCH 07/15] good solution for no_weights calculation -- tested

---
 py/picca/pk1d/postproc_pk1d.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py
index df2f3ec69..4ccbef027 100644
--- a/py/picca/pk1d/postproc_pk1d.py
+++ b/py/picca/pk1d/postproc_pk1d.py
@@ -965,9 +965,9 @@ def compute_cov(
             # index of the (ikbin,ikbin2) coefficient on the top of the matrix
             index = (nbins_k * ikbin) + ikbin2
             covariance_array[index] = (
-                (covariance_array[index] / n_array[index]) - mean_ikbin * mean_ikbin2
-            ) / (2 * n_array[index])
-
+                covariance_array[index] / n_array[index]
+            ) - mean_ikbin * mean_ikbin2
+            covariance_array[index] = covariance_array[index] / n_array[index]
             zbin_array[index] = zbin_centers[izbin]
             index_zbin_array[index] = izbin
             k1_array[index] = kbin_centers[ikbin]

From a5067e883a8f7fff0e8f03d459bd9339002dadeb Mon Sep 17 00:00:00 2001
From: corentinravoux <corentin.ravoux.research@gmail.com>
Date: Thu, 27 Apr 2023 14:18:47 +0200
Subject: [PATCH 08/15] aestetic changes

---
 py/picca/pk1d/postproc_pk1d.py | 51 ++++++++++++++++++----------------
 1 file changed, 27 insertions(+), 24 deletions(-)

diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py
index 4ccbef027..bde05dded 100644
--- a/py/picca/pk1d/postproc_pk1d.py
+++ b/py/picca/pk1d/postproc_pk1d.py
@@ -352,7 +352,8 @@ def compute_mean_pk1d(
     if compute_covariance:
         userprint("Computing covariance matrix")
         params_pool = []
-        for izbin in range(nbins_z):  # Main loop 1) z bins
+        # Main loop 1) z bins
+        for izbin in range(nbins_z):
             select_z = (p1d_table["forest_z"] < zbin_edges[izbin + 1]) & (
                 p1d_table["forest_z"] > zbin_edges[izbin]
             )
@@ -376,8 +377,8 @@ def compute_mean_pk1d(
         else:
             with Pool(number_worker) as pool:
                 output_cov = pool.starmap(func, params_pool)
-
-        for izbin in range(nbins_z):  # Main loop 1) z bins
+        # Main loop 1) z bins
+        for izbin in range(nbins_z):
             (
                 zbin_array,
                 index_zbin_array,
@@ -399,7 +400,8 @@ def compute_mean_pk1d(
         userprint("Computing covariance matrix with bootstrap method")
 
         params_pool = []
-        for izbin in range(nbins_z):  # Main loop 1) z bins - can be paralelized
+        # Main loop 1) z bins
+        for izbin in range(nbins_z):
             select_z = (p1d_table["forest_z"] < zbin_edges[izbin + 1]) & (
                 p1d_table["forest_z"] > zbin_edges[izbin]
             )
@@ -411,8 +413,9 @@ def compute_mean_pk1d(
                 ).astype(int)
             else:
                 bootid = np.full(number_bootstrap, None)
+
+            # Main loop 2) number of bootstrap samples
             for iboot in range(number_bootstrap):
-                # Main loop 2) number of bootstrap samples - can be paralelized
                 params_pool.append([izbin, select_z, sub_forest_ids[bootid[iboot]]])
 
         func = partial(
@@ -432,12 +435,11 @@ def compute_mean_pk1d(
         else:
             with Pool(number_worker) as pool:
                 output_cov = pool.starmap(func, params_pool)
-
-        for izbin in range(nbins_z):  # Main loop 1) z bins - can be paralelized
+        # Main loop 1) z bins
+        for izbin in range(nbins_z):
             boot_cov = []
-            for iboot in range(
-                number_bootstrap
-            ):  # Main loop 2) number of bootstrap samples - can be paralelized
+            # Main loop 2) number of bootstrap samples
+            for iboot in range(number_bootstrap):
                 (
                     zbin_array,
                     index_zbin_array,
@@ -887,7 +889,7 @@ def compute_cov(
     """
     zbin_array = np.zeros(nbins_k * nbins_k)
     index_zbin_array = np.zeros(nbins_k * nbins_k, dtype=int)
-    n_array = np.zeros(nbins_k * nbins_k, dtype=int)
+    n_array = np.zeros(nbins_k * nbins_k)
     covariance_array = np.zeros(nbins_k * nbins_k)
     k1_array = np.zeros(nbins_k * nbins_k)
     k2_array = np.zeros(nbins_k * nbins_k)
@@ -908,7 +910,8 @@ def compute_cov(
             k1_array,
             k2_array,
         )
-    for sub_forest_id in sub_forest_ids:  # First loop 1) id sub-forest bins
+    # First loop 1) id sub-forest bins
+    for sub_forest_id in sub_forest_ids:
         select_id = select_z & (p1d_table["sub_forest_id"] == sub_forest_id)
         selected_pk = p1d_table["Pk"][select_id]
         selected_ikbin = k_index[select_id]
@@ -919,13 +922,12 @@ def compute_cov(
             selected_variance_estimated = fitfunc_variance_pk1d(
                 selected_snr, snrfit_z[selected_ikbin, 2], snrfit_z[selected_ikbin, 3]
             )
-
-            for ipk, _ in enumerate(selected_pk):  # First loop 2) selected pk
+            # First loop 2) selected pk
+            for ipk, _ in enumerate(selected_pk):
                 ikbin = selected_ikbin[ipk]
                 if ikbin != -1:
-                    for ipk2 in range(
-                        ipk, len(selected_pk)
-                    ):  # First loop 3) selected pk
+                    # First loop 3) selected pk
+                    for ipk2 in range(ipk, len(selected_pk)):
                         ikbin2 = selected_ikbin[ipk2]
                         if ikbin2 != -1:
                             # index of the (ikbin,ikbin2) coefficient on the top of the matrix
@@ -940,12 +942,12 @@ def compute_cov(
                             )
                             n_array[index] = n_array[index] + weight
         else:
-            for ipk, _ in enumerate(selected_pk):  # First loop 2) selected pk
+            # First loop 2) selected pk
+            for ipk, _ in enumerate(selected_pk):
                 ikbin = selected_ikbin[ipk]
                 if ikbin != -1:
-                    for ipk2 in range(
-                        ipk, len(selected_pk)
-                    ):  # First loop 3) selected pk
+                    # First loop 3) selected pk
+                    for ipk2 in range(ipk, len(selected_pk)):
                         ikbin2 = selected_ikbin[ipk2]
                         if ikbin2 != -1:
                             # index of the (ikbin,ikbin2) coefficient on the top of the matrix
@@ -955,11 +957,12 @@ def compute_cov(
                                 + selected_pk[ipk] * selected_pk[ipk2]
                             )
                             n_array[index] = n_array[index] + 1
-
-    for ikbin in range(nbins_k):  # Second loop 1) k bins
+    # Second loop 1) k bins
+    for ikbin in range(nbins_k):
         mean_ikbin = mean_p1d_table["meanPk"][(nbins_k * izbin) + ikbin]
 
-        for ikbin2 in range(ikbin, nbins_k):  # Second loop 2) k bins
+        # Second loop 2) k bins
+        for ikbin2 in range(ikbin, nbins_k):
             mean_ikbin2 = mean_p1d_table["meanPk"][(nbins_k * izbin) + ikbin2]
 
             # index of the (ikbin,ikbin2) coefficient on the top of the matrix

From 793df27c5a61e4e523797aa7cc63bc08a1b4e07b Mon Sep 17 00:00:00 2001
From: corentinravoux <corentin.ravoux.research@gmail.com>
Date: Thu, 27 Apr 2023 16:00:27 +0200
Subject: [PATCH 09/15] Improvement of weighting for fit_snr. Covariance gives
 proper diagonal for no_weights but no fit_snr

---
 py/picca/pk1d/postproc_pk1d.py | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py
index bde05dded..b2097d318 100644
--- a/py/picca/pk1d/postproc_pk1d.py
+++ b/py/picca/pk1d/postproc_pk1d.py
@@ -893,6 +893,8 @@ def compute_cov(
     covariance_array = np.zeros(nbins_k * nbins_k)
     k1_array = np.zeros(nbins_k * nbins_k)
     k2_array = np.zeros(nbins_k * nbins_k)
+    if weight_method == "fit_snr":
+        weight_array = np.zeros(nbins_k * nbins_k)
 
     kbin_centers = (kbin_edges[1:] + kbin_edges[:-1]) / 2
     if n_chunks[izbin] == 0:  # Fill rows with NaNs
@@ -932,15 +934,17 @@ def compute_cov(
                         if ikbin2 != -1:
                             # index of the (ikbin,ikbin2) coefficient on the top of the matrix
                             index = (nbins_k * ikbin) + ikbin2
-                            weight = 1 / (
-                                selected_variance_estimated[ipk]
-                                * selected_variance_estimated[ipk2]
+                            weight = 1 / selected_variance_estimated[ipk]
+                            weight2 = 1 / selected_variance_estimated[ipk2]
+                            covariance_array[index] = covariance_array[
+                                index
+                            ] + selected_pk[ipk] * selected_pk[ipk2] * np.sqrt(
+                                weight * weight2
                             )
-                            covariance_array[index] = (
-                                covariance_array[index]
-                                + selected_pk[ipk] * selected_pk[ipk2] * weight
+                            n_array[index] = n_array[index] + 1
+                            weight_array[index] = weight_array[index] + np.sqrt(
+                                weight * weight2
                             )
-                            n_array[index] = n_array[index] + weight
         else:
             # First loop 2) selected pk
             for ipk, _ in enumerate(selected_pk):
@@ -967,10 +971,16 @@ def compute_cov(
 
             # index of the (ikbin,ikbin2) coefficient on the top of the matrix
             index = (nbins_k * ikbin) + ikbin2
-            covariance_array[index] = (
-                covariance_array[index] / n_array[index]
-            ) - mean_ikbin * mean_ikbin2
-            covariance_array[index] = covariance_array[index] / n_array[index]
+            if weight_method == "fit_snr":
+                covariance_array[index] = (
+                    covariance_array[index] / weight_array[index]
+                ) - mean_ikbin * mean_ikbin2
+                covariance_array[index] = covariance_array[index] / weight_array[index]
+            else:
+                covariance_array[index] = (
+                    covariance_array[index] / n_array[index]
+                ) - mean_ikbin * mean_ikbin2
+                covariance_array[index] = covariance_array[index] / n_array[index]
             zbin_array[index] = zbin_centers[izbin]
             index_zbin_array[index] = izbin
             k1_array[index] = kbin_centers[ikbin]

From ce28d301353f245713825a52a2d9c3a1f22d29d4 Mon Sep 17 00:00:00 2001
From: corentinravoux <corentin.ravoux.research@gmail.com>
Date: Wed, 3 May 2023 09:14:47 +0200
Subject: [PATCH 10/15] Renormalization of the covariance matrix

---
 py/picca/pk1d/postproc_pk1d.py | 36 ++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py
index b2097d318..3eb7cc231 100644
--- a/py/picca/pk1d/postproc_pk1d.py
+++ b/py/picca/pk1d/postproc_pk1d.py
@@ -997,6 +997,42 @@ def compute_cov(
                 k1_array[index_2] = kbin_centers[ikbin]
                 k2_array[index_2] = kbin_centers[ikbin2]
 
+    # Renormalization of the covariance matrix for the fit_snr method.
+    if weight_method == "fit_snr":
+        # Second loop 1) k bins
+        for ikbin in range(nbins_k):
+            std_ikbin = mean_p1d_table["errorPk"][(nbins_k * izbin) + ikbin]
+            # Second loop 2) k bins
+            for ikbin2 in range(ikbin, nbins_k):
+                std_ikbin2 = mean_p1d_table["errorPk"][(nbins_k * izbin) + ikbin2]
+                covariance_array[(nbins_k * ikbin) + ikbin2] = (
+                    std_ikbin
+                    * std_ikbin2
+                    * covariance_array[(nbins_k * ikbin) + ikbin2]
+                    / np.sqrt(
+                        covariance_array[(nbins_k * ikbin) + ikbin]
+                        * covariance_array[(nbins_k * ikbin2) + ikbin2]
+                    )
+                )
+                if ikbin2 != ikbin:
+                    covariance_array[(nbins_k * ikbin2) + ikbin] = (
+                        std_ikbin
+                        * std_ikbin2
+                        * covariance_array[(nbins_k * ikbin2) + ikbin]
+                        / np.sqrt(
+                            covariance_array[(nbins_k * ikbin) + ikbin]
+                            * covariance_array[(nbins_k * ikbin2) + ikbin2]
+                        )
+                    )
+
+    # # Diagonal renormalization of the covariance matrix for the fit_snr method.
+    # if weight_method == "fit_snr":
+    #     # Second loop 1) k bins
+    #     for ikbin in range(nbins_k):
+    #         std_ikbin = mean_p1d_table["errorPk"][(nbins_k * izbin) + ikbin]
+    #         # Second loop 2) k bins
+    #         covariance_array[(nbins_k * ikbin) + ikbin] = std_ikbin**2
+
     return zbin_array, index_zbin_array, n_array, covariance_array, k1_array, k2_array
 
 

From 643542b4c81acd787ceefefca4f963d410b70f08 Mon Sep 17 00:00:00 2001
From: corentinravoux <corentin.ravoux.research@gmail.com>
Date: Wed, 3 May 2023 09:23:57 +0200
Subject: [PATCH 11/15] removing unecessary comments

---
 py/picca/pk1d/postproc_pk1d.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py
index 3eb7cc231..c3c442fe3 100644
--- a/py/picca/pk1d/postproc_pk1d.py
+++ b/py/picca/pk1d/postproc_pk1d.py
@@ -257,15 +257,6 @@ def compute_mean_pk1d(
             compute_covariance, compute_bootstrap = False, False
             cov_table = None
 
-        # elif weight_method != "no_weights":
-        #     userprint(
-        #         """Covariance calculations are not compatible with SNR weighting method yet.
-        #         Skipping calculation"""
-        #     )
-
-        #     compute_covariance, compute_bootstrap = False, False
-        #     cov_table = None
-
         elif apply_z_weights:
             userprint(
                 """Covariance calculations are not compatible redshift weighting yes.
@@ -1025,14 +1016,6 @@ def compute_cov(
                         )
                     )
 
-    # # Diagonal renormalization of the covariance matrix for the fit_snr method.
-    # if weight_method == "fit_snr":
-    #     # Second loop 1) k bins
-    #     for ikbin in range(nbins_k):
-    #         std_ikbin = mean_p1d_table["errorPk"][(nbins_k * izbin) + ikbin]
-    #         # Second loop 2) k bins
-    #         covariance_array[(nbins_k * ikbin) + ikbin] = std_ikbin**2
-
     return zbin_array, index_zbin_array, n_array, covariance_array, k1_array, k2_array
 
 

From fbda063af776b07502c52cc569c0f19f8e5aa2e5 Mon Sep 17 00:00:00 2001
From: corentinravoux <corentin.ravoux.research@gmail.com>
Date: Wed, 3 May 2023 09:24:07 +0200
Subject: [PATCH 12/15] pylint error

---
 py/picca/pk1d/postproc_pk1d.py | 56 ++++++++++++++++------------------
 1 file changed, 26 insertions(+), 30 deletions(-)

diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py
index c3c442fe3..7149dd261 100644
--- a/py/picca/pk1d/postproc_pk1d.py
+++ b/py/picca/pk1d/postproc_pk1d.py
@@ -918,40 +918,36 @@ def compute_cov(
             # First loop 2) selected pk
             for ipk, _ in enumerate(selected_pk):
                 ikbin = selected_ikbin[ipk]
-                if ikbin != -1:
-                    # First loop 3) selected pk
-                    for ipk2 in range(ipk, len(selected_pk)):
-                        ikbin2 = selected_ikbin[ipk2]
-                        if ikbin2 != -1:
-                            # index of the (ikbin,ikbin2) coefficient on the top of the matrix
-                            index = (nbins_k * ikbin) + ikbin2
-                            weight = 1 / selected_variance_estimated[ipk]
-                            weight2 = 1 / selected_variance_estimated[ipk2]
-                            covariance_array[index] = covariance_array[
-                                index
-                            ] + selected_pk[ipk] * selected_pk[ipk2] * np.sqrt(
-                                weight * weight2
-                            )
-                            n_array[index] = n_array[index] + 1
-                            weight_array[index] = weight_array[index] + np.sqrt(
-                                weight * weight2
-                            )
+                # First loop 3) selected pk
+                for ipk2 in range(ipk, len(selected_pk)):
+                    ikbin2 = selected_ikbin[ipk2]
+                    if (ikbin2 != -1) & (ikbin != -1):
+                        # index of the (ikbin,ikbin2) coefficient on the top of the matrix
+                        index = (nbins_k * ikbin) + ikbin2
+                        weight = 1 / selected_variance_estimated[ipk]
+                        weight2 = 1 / selected_variance_estimated[ipk2]
+                        covariance_array[index] = covariance_array[index] + selected_pk[
+                            ipk
+                        ] * selected_pk[ipk2] * np.sqrt(weight * weight2)
+                        n_array[index] = n_array[index] + 1
+                        weight_array[index] = weight_array[index] + np.sqrt(
+                            weight * weight2
+                        )
         else:
             # First loop 2) selected pk
             for ipk, _ in enumerate(selected_pk):
                 ikbin = selected_ikbin[ipk]
-                if ikbin != -1:
-                    # First loop 3) selected pk
-                    for ipk2 in range(ipk, len(selected_pk)):
-                        ikbin2 = selected_ikbin[ipk2]
-                        if ikbin2 != -1:
-                            # index of the (ikbin,ikbin2) coefficient on the top of the matrix
-                            index = (nbins_k * ikbin) + ikbin2
-                            covariance_array[index] = (
-                                covariance_array[index]
-                                + selected_pk[ipk] * selected_pk[ipk2]
-                            )
-                            n_array[index] = n_array[index] + 1
+                # First loop 3) selected pk
+                for ipk2 in range(ipk, len(selected_pk)):
+                    ikbin2 = selected_ikbin[ipk2]
+                    if (ikbin2 != -1) & (ikbin != -1):
+                        # index of the (ikbin,ikbin2) coefficient on the top of the matrix
+                        index = (nbins_k * ikbin) + ikbin2
+                        covariance_array[index] = (
+                            covariance_array[index]
+                            + selected_pk[ipk] * selected_pk[ipk2]
+                        )
+                        n_array[index] = n_array[index] + 1
     # Second loop 1) k bins
     for ikbin in range(nbins_k):
         mean_ikbin = mean_p1d_table["meanPk"][(nbins_k * izbin) + ikbin]

From 0ee735795f3340ca223a64bdeb017fbe354310f5 Mon Sep 17 00:00:00 2001
From: corentinravoux <corentin.ravoux.research@gmail.com>
Date: Thu, 25 May 2023 10:19:00 +0200
Subject: [PATCH 13/15] Latest corrections for fit_snr covariance calculation

---
 py/picca/pk1d/postproc_pk1d.py | 41 ++++++++++++++--------------------
 1 file changed, 17 insertions(+), 24 deletions(-)

diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py
index 7149dd261..dce506be1 100644
--- a/py/picca/pk1d/postproc_pk1d.py
+++ b/py/picca/pk1d/postproc_pk1d.py
@@ -910,6 +910,11 @@ def compute_cov(
         selected_ikbin = k_index[select_id]
 
         if weight_method == "fit_snr":
+            # Definition of weighted unbiased sample covariance taken from 
+            # "George R. Price, Ann. Hum. Genet., Lond, pp485-490, 
+            # Extension of covariance selection mathematics, 1972"
+            # adapted with weights w_i = np.sqrt(w_ipk * w_ipk2) for covariance
+            # to obtain the right definition of variance on the diagonal
             selected_snr = p1d_table["forest_snr"][select_id]
             snrfit_z = snrfit_table[izbin * nbins_k : (izbin + 1) * nbins_k, :]
             selected_variance_estimated = fitfunc_variance_pk1d(
@@ -962,12 +967,15 @@ def compute_cov(
                 covariance_array[index] = (
                     covariance_array[index] / weight_array[index]
                 ) - mean_ikbin * mean_ikbin2
-                covariance_array[index] = covariance_array[index] / weight_array[index]
             else:
                 covariance_array[index] = (
                     covariance_array[index] / n_array[index]
                 ) - mean_ikbin * mean_ikbin2
-                covariance_array[index] = covariance_array[index] / n_array[index]
+            
+            # Same normalization for fit_snr, equivalent to dividing to
+            # weight_array[index] if weights are normalized to give 
+            # sum(weight_array[index]) = n_array[index]
+            covariance_array[index] = covariance_array[index] / n_array[index]
             zbin_array[index] = zbin_centers[izbin]
             index_zbin_array[index] = izbin
             k1_array[index] = kbin_centers[ikbin]
@@ -984,33 +992,18 @@ def compute_cov(
                 k1_array[index_2] = kbin_centers[ikbin]
                 k2_array[index_2] = kbin_centers[ikbin2]
 
-    # Renormalization of the covariance matrix for the fit_snr method.
+
+    # For fit_snr method, due to the SNR fitting scheme used for weighting,
+    # the diagonal of the weigthed sample covariance matrix is not equal 
+    # to the error in mean P1D. This is tested on Ohio mocks.
+    # We choose to renormalize only the diagonal of the covariance matrix.
     if weight_method == "fit_snr":
         # Second loop 1) k bins
         for ikbin in range(nbins_k):
             std_ikbin = mean_p1d_table["errorPk"][(nbins_k * izbin) + ikbin]
             # Second loop 2) k bins
-            for ikbin2 in range(ikbin, nbins_k):
-                std_ikbin2 = mean_p1d_table["errorPk"][(nbins_k * izbin) + ikbin2]
-                covariance_array[(nbins_k * ikbin) + ikbin2] = (
-                    std_ikbin
-                    * std_ikbin2
-                    * covariance_array[(nbins_k * ikbin) + ikbin2]
-                    / np.sqrt(
-                        covariance_array[(nbins_k * ikbin) + ikbin]
-                        * covariance_array[(nbins_k * ikbin2) + ikbin2]
-                    )
-                )
-                if ikbin2 != ikbin:
-                    covariance_array[(nbins_k * ikbin2) + ikbin] = (
-                        std_ikbin
-                        * std_ikbin2
-                        * covariance_array[(nbins_k * ikbin2) + ikbin]
-                        / np.sqrt(
-                            covariance_array[(nbins_k * ikbin) + ikbin]
-                            * covariance_array[(nbins_k * ikbin2) + ikbin2]
-                        )
-                    )
+            covariance_array[(nbins_k * ikbin) + ikbin] = std_ikbin**2
+
 
     return zbin_array, index_zbin_array, n_array, covariance_array, k1_array, k2_array
 

From 16559a491f4ae4b20e5e89cba858c85404e4f4c3 Mon Sep 17 00:00:00 2001
From: corentinravoux <corentin.ravoux.research@gmail.com>
Date: Thu, 25 May 2023 10:27:32 +0200
Subject: [PATCH 14/15] pylint error

---
 py/picca/pk1d/postproc_pk1d.py | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py
index dce506be1..0e56009ad 100644
--- a/py/picca/pk1d/postproc_pk1d.py
+++ b/py/picca/pk1d/postproc_pk1d.py
@@ -910,10 +910,10 @@ def compute_cov(
         selected_ikbin = k_index[select_id]
 
         if weight_method == "fit_snr":
-            # Definition of weighted unbiased sample covariance taken from 
-            # "George R. Price, Ann. Hum. Genet., Lond, pp485-490, 
+            # Definition of weighted unbiased sample covariance taken from
+            # "George R. Price, Ann. Hum. Genet., Lond, pp485-490,
             # Extension of covariance selection mathematics, 1972"
-            # adapted with weights w_i = np.sqrt(w_ipk * w_ipk2) for covariance
+            # adapted with weights w_i = np.sqrt(w_ipk * w_ipk2) for covariance
             # to obtain the right definition of variance on the diagonal
             selected_snr = p1d_table["forest_snr"][select_id]
             snrfit_z = snrfit_table[izbin * nbins_k : (izbin + 1) * nbins_k, :]
@@ -971,10 +971,10 @@ def compute_cov(
                 covariance_array[index] = (
                     covariance_array[index] / n_array[index]
                 ) - mean_ikbin * mean_ikbin2
-            
-            # Same normalization for fit_snr, equivalent to dividing to
-            # weight_array[index] if weights are normalized to give 
-            # sum(weight_array[index]) = n_array[index]
+
+            # Same normalization for fit_snr, equivalent to dividing to
+            # weight_array[index] if weights are normalized to give
+            # sum(weight_array[index]) = n_array[index]
             covariance_array[index] = covariance_array[index] / n_array[index]
             zbin_array[index] = zbin_centers[izbin]
             index_zbin_array[index] = izbin
@@ -992,11 +992,10 @@ def compute_cov(
                 k1_array[index_2] = kbin_centers[ikbin]
                 k2_array[index_2] = kbin_centers[ikbin2]
 
-
     # For fit_snr method, due to the SNR fitting scheme used for weighting,
-    # the diagonal of the weigthed sample covariance matrix is not equal 
-    # to the error in mean P1D. This is tested on Ohio mocks.
-    # We choose to renormalize only the diagonal of the covariance matrix.
+    # the diagonal of the weigthed sample covariance matrix is not equal
+    # to the error in mean P1D. This is tested on Ohio mocks.
+    # We choose to renormalize only the diagonal of the covariance matrix.
     if weight_method == "fit_snr":
         # Second loop 1) k bins
         for ikbin in range(nbins_k):
@@ -1004,7 +1003,6 @@ def compute_cov(
             # Second loop 2) k bins
             covariance_array[(nbins_k * ikbin) + ikbin] = std_ikbin**2
 
-
     return zbin_array, index_zbin_array, n_array, covariance_array, k1_array, k2_array
 
 

From 9c7dd61590dc8b8052adc83634a92ccab79b0e25 Mon Sep 17 00:00:00 2001
From: corentinravoux <corentin.ravoux.research@gmail.com>
Date: Thu, 1 Jun 2023 12:27:16 +0200
Subject: [PATCH 15/15] Following comments, renormalization of all the matrix

---
 py/picca/pk1d/postproc_pk1d.py | 34 +++++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py
index 501cda177..840efe3c8 100644
--- a/py/picca/pk1d/postproc_pk1d.py
+++ b/py/picca/pk1d/postproc_pk1d.py
@@ -727,7 +727,7 @@ def compute_average_pk_redshift(
 
                 data_values = p1d_table[col][select]
                 data_snr = p1d_table["forest_snr"][select]
-                p1d_values = p1d_table['Pk'][select]
+                p1d_values = p1d_table["Pk"][select]
                 mask = np.isnan(data_values) | np.isnan(data_snr)
                 if len(mask[mask]) != 0:
                     data_snr = data_snr[~mask]
@@ -998,13 +998,37 @@ def compute_cov(
     # For fit_snr method, due to the SNR fitting scheme used for weighting,
     # the diagonal of the weigthed sample covariance matrix is not equal
     # to the error in mean P1D. This is tested on Ohio mocks.
-    # We choose to renormalize only the diagonal of the covariance matrix.
+    # We choose to renormalize the covariance matrix.
     if weight_method == "fit_snr":
-        # Second loop 1) k bins
+        # Third loop 1) k bins
         for ikbin in range(nbins_k):
             std_ikbin = mean_p1d_table["errorPk"][(nbins_k * izbin) + ikbin]
-            # Second loop 2) k bins
-            covariance_array[(nbins_k * ikbin) + ikbin] = std_ikbin**2
+            # Third loop 2) k bins
+            for ikbin2 in range(ikbin, nbins_k):
+                std_ikbin2 = mean_p1d_table["errorPk"][(nbins_k * izbin) + ikbin2]
+
+                # index of the (ikbin,ikbin2) coefficient on the top of the matrix
+                index = (nbins_k * ikbin) + ikbin2
+                covariance_array[index] = (
+                    covariance_array[index]
+                    * (std_ikbin * std_ikbin2)
+                    / np.sqrt(
+                        covariance_array[(nbins_k * ikbin) + ikbin]
+                        * covariance_array[(nbins_k * ikbin2) + ikbin2]
+                    )
+                )
+
+                if ikbin2 != ikbin:
+                    # index of the (ikbin,ikbin2) coefficient on the bottom of the matrix
+                    index_2 = (nbins_k * ikbin2) + ikbin
+                    covariance_array[index_2] = (
+                        covariance_array[index_2]
+                        * (std_ikbin * std_ikbin2)
+                        / np.sqrt(
+                            covariance_array[(nbins_k * ikbin) + ikbin]
+                            * covariance_array[(nbins_k * ikbin2) + ikbin2]
+                        )
+                    )
 
     return zbin_array, index_zbin_array, n_array, covariance_array, k1_array, k2_array