From f2b335eead655e3a219bff2465502f0b546710bd Mon Sep 17 00:00:00 2001 From: corentinravoux Date: Tue, 25 Apr 2023 10:48:13 +0200 Subject: [PATCH 01/15] removing unnecessary warning --- py/picca/pk1d/postproc_pk1d.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py index 4571b6339..636aa2a72 100644 --- a/py/picca/pk1d/postproc_pk1d.py +++ b/py/picca/pk1d/postproc_pk1d.py @@ -737,10 +737,6 @@ def compute_average_pk_redshift( data_snr = p1d_table["forest_snr"][select] mask = np.isnan(data_values) | np.isnan(data_snr) if len(mask[mask]) != 0: - userprint( - "Warning: A nan value was detected in the following table:\n", - data_values[mask], - ) data_snr = data_snr[~mask] data_values = data_values[~mask] # Fit function to observed dispersion: From 1f739d0c6dcc0ba107842066e29a24142c3d8fcd Mon Sep 17 00:00:00 2001 From: corentinravoux Date: Tue, 25 Apr 2023 10:48:40 +0200 Subject: [PATCH 02/15] removing obsolete simple_snr weighting method --- bin/picca_Pk1D_postprocess.py | 2 +- py/picca/pk1d/postproc_pk1d.py | 19 ------------------- 2 files changed, 1 insertion(+), 20 deletions(-) diff --git a/bin/picca_Pk1D_postprocess.py b/bin/picca_Pk1D_postprocess.py index 8cdcba72a..f5001837d 100755 --- a/bin/picca_Pk1D_postprocess.py +++ b/bin/picca_Pk1D_postprocess.py @@ -118,7 +118,7 @@ def main(cmdargs): type=str, default='no_weights', help='SNR weighting scheme for the mean P1D computation,' - 'Possible options: no_weights, simple_snr, fit_snr') + 'Possible options: no_weights, fit_snr') parser.add_argument('--apply_z_weights', action='store_true', diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py index 636aa2a72..8d272a3b7 100644 --- a/py/picca/pk1d/postproc_pk1d.py +++ b/py/picca/pk1d/postproc_pk1d.py @@ -173,8 +173,6 @@ def compute_mean_pk1d( 3 possible options: 'fit_snr': Compute mean P1D with weights estimated by fitting dispersion vs SNR 'no_weights': Compute mean P1D without weights - 'simple_snr' (obsolete): Compute mean P1D with weights computed directly from SNR values - (SNR as given in compute_Pk1D outputs) apply_z_weights: Bool If True, each chunk contributes to two nearest redshift bins with a linear weighting scheme. @@ -779,23 +777,6 @@ def compute_average_pk_redshift( ] snrfit_array[ikbin, 4:] = standard_dev - elif weight_method == "simple_snr": - # - We keep this for record, we do not recommand to use it - # for forests with snr>snr_limit, - # the weight is fixed to (snr_limit - 1)**2 = 9 - snr_limit = 4 - forest_snr = p1d_table["forest_snr"][select] - # w, = np.where(forest_snr <= 1) - # if len(w)>0: raise RuntimeError('Cannot add weights with SNR<=1.') - if (forest_snr <= 1).sum() > 0: - raise RuntimeError("Cannot add weights with SNR<=1.") - weights = (forest_snr - 1) ** 2 - weights[forest_snr > snr_limit] = (snr_limit - 1) ** 2 - mean = np.average(p1d_table[col][select], weights=weights) - # Need to rescale the weights to find the error: - # weights_true = weights * (num_chunks - 1) / alpha - alpha = np.sum(weights * ((p1d_table[col][select] - mean) ** 2)) - error = np.sqrt(alpha / (np.sum(weights) * (num_chunks - 1))) elif weight_method == "no_weights": if apply_z_weights: From 51e1e392bb3ef0b8a4c295b762ac329619338699 Mon Sep 17 00:00:00 2001 From: corentinravoux Date: Tue, 25 Apr 2023 14:19:12 +0200 Subject: [PATCH 03/15] using output_snrfit onbly for outputing --- py/picca/pk1d/postproc_pk1d.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py index 8d272a3b7..9832040d1 100644 --- a/py/picca/pk1d/postproc_pk1d.py +++ b/py/picca/pk1d/postproc_pk1d.py @@ -170,7 +170,7 @@ def compute_mean_pk1d( Edges of the wavenumber bins we want to use, either in (Angstrom)-1 or s/km weight_method: string - 3 possible options: + 2 possible options: 'fit_snr': Compute mean P1D with weights estimated by fitting dispersion vs SNR 'no_weights': Compute mean P1D without weights @@ -307,7 +307,7 @@ def compute_mean_pk1d( metadata_table["N_chunks"] = n_chunks zbin_centers = np.around((zbin_edges[1:] + zbin_edges[:-1]) / 2, 5) - if output_snrfit is not None: + if weight_method == "fit_snr": snrfit_table = np.zeros( (nbins_z * nbins_k, 13) ) # 13 entries: z k a b + 9 SNR bins used for the fit @@ -324,7 +324,6 @@ def compute_mean_pk1d( p1d_table_cols, weight_method, apply_z_weights, - output_snrfit, nomedians, nbins_z, zbin_centers, @@ -345,7 +344,7 @@ def compute_mean_pk1d( output_mean, mean_p1d_table, p1d_table_cols, - output_snrfit, + weight_method, snrfit_table, nomedians, ) @@ -369,6 +368,7 @@ def compute_mean_pk1d( kbin_edges, k_index, nbins_k, + snrfit_table, ) if number_worker == 1: output_cov = [func(p) for p in params_pool] @@ -423,6 +423,7 @@ def compute_mean_pk1d( kbin_edges, k_index, nbins_k, + snrfit_table, ) if number_worker == 1: output_cov = [func(p) for p in params_pool] @@ -469,7 +470,7 @@ def fill_average_pk( output_mean, mean_p1d_table, p1d_table_cols, - output_snrfit, + weight_method, snrfit_table, nomedians, ): @@ -492,8 +493,10 @@ def fill_average_pk( p1d_table_cols: List of str, Column names in the input table to be averaged. - output_snrfit: bool, - If not None, write the fit to the SNR to a file. + weight_method: string + 2 possible options: + 'fit_snr': Compute mean P1D with weights estimated by fitting dispersion vs SNR + 'no_weights': Compute mean P1D without weights snrfit_table: numpy ndarray, Table containing SNR fit infos @@ -531,7 +534,7 @@ def fill_average_pk( mean_p1d_table["max" + col][i_min:i_max] = max_array[icol] if not nomedians: mean_p1d_table["median" + col][i_min:i_max] = median_array[icol] - if output_snrfit is not None: + if weight_method == "fit_snr": snrfit_table[i_min:i_max, :] = snrfit_array @@ -540,7 +543,6 @@ def compute_average_pk_redshift( p1d_table_cols, weight_method, apply_z_weights, - output_snrfit, nomedians, nbins_z, zbin_centers, @@ -572,9 +574,6 @@ def compute_average_pk_redshift( apply_z_weights: bool, If True, apply redshift weights. - output_snrfit: bool, - If not None, write the fit to the SNR to a file. - nomedians: bool, If True, do not use median values in the fit to the SNR. @@ -622,7 +621,7 @@ def compute_average_pk_redshift( median_array = [] else: median_array = None - if output_snrfit is not None: + if weight_method == "fit_snr": snrfit_array = np.zeros((nbins_k, 13)) else: snrfit_array = None @@ -646,7 +645,7 @@ def compute_average_pk_redshift( max_array[icol][:] = np.nan if not nomedians: median_array[icol][:] = np.nan - if output_snrfit is not None: + if weight_method == "fit_snr": snrfit_array[:] = np.nan return ( @@ -768,7 +767,7 @@ def compute_average_pk_redshift( ) else: error = np.sqrt(1.0 / np.sum(weights)) - if output_snrfit is not None and col == "Pk": + if col == "Pk": snrfit_array[ikbin, 0:4] = [ zbin_centers[izbin], (kbin + kbin_edges[ikbin + 1]) / 2.0, @@ -824,6 +823,7 @@ def compute_cov( kbin_edges, k_index, nbins_k, + snrfit_table, izbin, select_z, sub_forest_ids, From cf145cfd1ea8630ee2173c1242353d6019023181 Mon Sep 17 00:00:00 2001 From: corentinravoux Date: Tue, 25 Apr 2023 14:19:27 +0200 Subject: [PATCH 04/15] fix --- py/picca/pk1d/postproc_pk1d.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py index 9832040d1..185e396cb 100644 --- a/py/picca/pk1d/postproc_pk1d.py +++ b/py/picca/pk1d/postproc_pk1d.py @@ -333,7 +333,7 @@ def compute_mean_pk1d( kbin_edges, ) if number_worker == 1: - output_mean = [func(p) for p in params_pool] + output_mean = [func(p[0]) for p in params_pool] else: with Pool(number_worker) as pool: output_mean = pool.starmap(func, params_pool) @@ -371,7 +371,7 @@ def compute_mean_pk1d( snrfit_table, ) if number_worker == 1: - output_cov = [func(p) for p in params_pool] + output_cov = [func(*p) for p in params_pool] else: with Pool(number_worker) as pool: output_cov = pool.starmap(func, params_pool) From e86662ef5987e79101647fade6040ca441dce965 Mon Sep 17 00:00:00 2001 From: corentinravoux Date: Tue, 25 Apr 2023 16:59:42 +0200 Subject: [PATCH 05/15] fix --- py/picca/pk1d/postproc_pk1d.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py index 185e396cb..ab844adcb 100644 --- a/py/picca/pk1d/postproc_pk1d.py +++ b/py/picca/pk1d/postproc_pk1d.py @@ -426,7 +426,7 @@ def compute_mean_pk1d( snrfit_table, ) if number_worker == 1: - output_cov = [func(p) for p in params_pool] + output_cov = [func(*p) for p in params_pool] else: with Pool(number_worker) as pool: output_cov = pool.starmap(func, params_pool) From 485c2c69d5a2fbfc73f40a873937582d3b61c7f2 Mon Sep 17 00:00:00 2001 From: corentinravoux Date: Wed, 26 Apr 2023 11:20:30 +0200 Subject: [PATCH 06/15] Applying the weight method for the covariance matrix calculation --- py/picca/pk1d/postproc_pk1d.py | 87 ++++++++++++++++++++++++---------- 1 file changed, 63 insertions(+), 24 deletions(-) diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py index ab844adcb..df2f3ec69 100644 --- a/py/picca/pk1d/postproc_pk1d.py +++ b/py/picca/pk1d/postproc_pk1d.py @@ -257,14 +257,14 @@ def compute_mean_pk1d( compute_covariance, compute_bootstrap = False, False cov_table = None - elif weight_method != "no_weights": - userprint( - """Covariance calculations are not compatible with SNR weighting method yet. - Skipping calculation""" - ) + # elif weight_method != "no_weights": + # userprint( + # """Covariance calculations are not compatible with SNR weighting method yet. + # Skipping calculation""" + # ) - compute_covariance, compute_bootstrap = False, False - cov_table = None + # compute_covariance, compute_bootstrap = False, False + # cov_table = None elif apply_z_weights: userprint( @@ -368,6 +368,7 @@ def compute_mean_pk1d( kbin_edges, k_index, nbins_k, + weight_method, snrfit_table, ) if number_worker == 1: @@ -423,6 +424,7 @@ def compute_mean_pk1d( kbin_edges, k_index, nbins_k, + weight_method, snrfit_table, ) if number_worker == 1: @@ -768,6 +770,10 @@ def compute_average_pk_redshift( else: error = np.sqrt(1.0 / np.sum(weights)) if col == "Pk": + if len(mask[mask]) != 0: + standard_dev = np.concatenate( + [standard_dev, np.full(len(mask[mask]), np.nan)] + ) snrfit_array[ikbin, 0:4] = [ zbin_centers[izbin], (kbin + kbin_edges[ikbin + 1]) / 2.0, @@ -776,7 +782,6 @@ def compute_average_pk_redshift( ] snrfit_array[ikbin, 4:] = standard_dev - elif weight_method == "no_weights": if apply_z_weights: mean = np.average(p1d_table[col][select], weights=redshift_weights) @@ -823,6 +828,7 @@ def compute_cov( kbin_edges, k_index, nbins_k, + weight_method, snrfit_table, izbin, select_z, @@ -850,6 +856,12 @@ def compute_cov( nbins_k (int): Number of k bins. + weight_method: str, + Method to weight the data. + + snrfit_table: numpy ndarray, + Table containing SNR fit infos + izbin (int): Current redshift bin being considered. @@ -896,26 +908,53 @@ def compute_cov( k1_array, k2_array, ) - for sub_forest_id in sub_forest_ids: # First loop 1) id sub-forest bins select_id = select_z & (p1d_table["sub_forest_id"] == sub_forest_id) selected_pk = p1d_table["Pk"][select_id] selected_ikbin = k_index[select_id] - for ipk, _ in enumerate(selected_pk): # First loop 2) selected pk - ikbin = selected_ikbin[ipk] - if ikbin != -1: - for ipk2 in range(ipk, len(selected_pk)): # First loop 3) selected pk - ikbin2 = selected_ikbin[ipk2] - - if ikbin2 != -1: - # index of the (ikbin,ikbin2) coefficient on the top of the matrix - index = (nbins_k * ikbin) + ikbin2 - covariance_array[index] = ( - covariance_array[index] - + selected_pk[ipk] * selected_pk[ipk2] - ) - n_array[index] = n_array[index] + 1 + if weight_method == "fit_snr": + selected_snr = p1d_table["forest_snr"][select_id] + snrfit_z = snrfit_table[izbin * nbins_k : (izbin + 1) * nbins_k, :] + selected_variance_estimated = fitfunc_variance_pk1d( + selected_snr, snrfit_z[selected_ikbin, 2], snrfit_z[selected_ikbin, 3] + ) + + for ipk, _ in enumerate(selected_pk): # First loop 2) selected pk + ikbin = selected_ikbin[ipk] + if ikbin != -1: + for ipk2 in range( + ipk, len(selected_pk) + ): # First loop 3) selected pk + ikbin2 = selected_ikbin[ipk2] + if ikbin2 != -1: + # index of the (ikbin,ikbin2) coefficient on the top of the matrix + index = (nbins_k * ikbin) + ikbin2 + weight = 1 / ( + selected_variance_estimated[ipk] + * selected_variance_estimated[ipk2] + ) + covariance_array[index] = ( + covariance_array[index] + + selected_pk[ipk] * selected_pk[ipk2] * weight + ) + n_array[index] = n_array[index] + weight + else: + for ipk, _ in enumerate(selected_pk): # First loop 2) selected pk + ikbin = selected_ikbin[ipk] + if ikbin != -1: + for ipk2 in range( + ipk, len(selected_pk) + ): # First loop 3) selected pk + ikbin2 = selected_ikbin[ipk2] + if ikbin2 != -1: + # index of the (ikbin,ikbin2) coefficient on the top of the matrix + index = (nbins_k * ikbin) + ikbin2 + covariance_array[index] = ( + covariance_array[index] + + selected_pk[ipk] * selected_pk[ipk2] + ) + n_array[index] = n_array[index] + 1 for ikbin in range(nbins_k): # Second loop 1) k bins mean_ikbin = mean_p1d_table["meanPk"][(nbins_k * izbin) + ikbin] @@ -927,7 +966,7 @@ def compute_cov( index = (nbins_k * ikbin) + ikbin2 covariance_array[index] = ( (covariance_array[index] / n_array[index]) - mean_ikbin * mean_ikbin2 - ) / n_array[index] + ) / (2 * n_array[index]) zbin_array[index] = zbin_centers[izbin] index_zbin_array[index] = izbin From 6b5a8b214fa6414b18a61957bcf41e51c6fee9f4 Mon Sep 17 00:00:00 2001 From: corentinravoux Date: Thu, 27 Apr 2023 14:18:36 +0200 Subject: [PATCH 07/15] good solution for no_weights calculation -- tested --- py/picca/pk1d/postproc_pk1d.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py index df2f3ec69..4ccbef027 100644 --- a/py/picca/pk1d/postproc_pk1d.py +++ b/py/picca/pk1d/postproc_pk1d.py @@ -965,9 +965,9 @@ def compute_cov( # index of the (ikbin,ikbin2) coefficient on the top of the matrix index = (nbins_k * ikbin) + ikbin2 covariance_array[index] = ( - (covariance_array[index] / n_array[index]) - mean_ikbin * mean_ikbin2 - ) / (2 * n_array[index]) - + covariance_array[index] / n_array[index] + ) - mean_ikbin * mean_ikbin2 + covariance_array[index] = covariance_array[index] / n_array[index] zbin_array[index] = zbin_centers[izbin] index_zbin_array[index] = izbin k1_array[index] = kbin_centers[ikbin] From a5067e883a8f7fff0e8f03d459bd9339002dadeb Mon Sep 17 00:00:00 2001 From: corentinravoux Date: Thu, 27 Apr 2023 14:18:47 +0200 Subject: [PATCH 08/15] aestetic changes --- py/picca/pk1d/postproc_pk1d.py | 51 ++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py index 4ccbef027..bde05dded 100644 --- a/py/picca/pk1d/postproc_pk1d.py +++ b/py/picca/pk1d/postproc_pk1d.py @@ -352,7 +352,8 @@ def compute_mean_pk1d( if compute_covariance: userprint("Computing covariance matrix") params_pool = [] - for izbin in range(nbins_z): # Main loop 1) z bins + # Main loop 1) z bins + for izbin in range(nbins_z): select_z = (p1d_table["forest_z"] < zbin_edges[izbin + 1]) & ( p1d_table["forest_z"] > zbin_edges[izbin] ) @@ -376,8 +377,8 @@ def compute_mean_pk1d( else: with Pool(number_worker) as pool: output_cov = pool.starmap(func, params_pool) - - for izbin in range(nbins_z): # Main loop 1) z bins + # Main loop 1) z bins + for izbin in range(nbins_z): ( zbin_array, index_zbin_array, @@ -399,7 +400,8 @@ def compute_mean_pk1d( userprint("Computing covariance matrix with bootstrap method") params_pool = [] - for izbin in range(nbins_z): # Main loop 1) z bins - can be paralelized + # Main loop 1) z bins + for izbin in range(nbins_z): select_z = (p1d_table["forest_z"] < zbin_edges[izbin + 1]) & ( p1d_table["forest_z"] > zbin_edges[izbin] ) @@ -411,8 +413,9 @@ def compute_mean_pk1d( ).astype(int) else: bootid = np.full(number_bootstrap, None) + + # Main loop 2) number of bootstrap samples for iboot in range(number_bootstrap): - # Main loop 2) number of bootstrap samples - can be paralelized params_pool.append([izbin, select_z, sub_forest_ids[bootid[iboot]]]) func = partial( @@ -432,12 +435,11 @@ def compute_mean_pk1d( else: with Pool(number_worker) as pool: output_cov = pool.starmap(func, params_pool) - - for izbin in range(nbins_z): # Main loop 1) z bins - can be paralelized + # Main loop 1) z bins + for izbin in range(nbins_z): boot_cov = [] - for iboot in range( - number_bootstrap - ): # Main loop 2) number of bootstrap samples - can be paralelized + # Main loop 2) number of bootstrap samples + for iboot in range(number_bootstrap): ( zbin_array, index_zbin_array, @@ -887,7 +889,7 @@ def compute_cov( """ zbin_array = np.zeros(nbins_k * nbins_k) index_zbin_array = np.zeros(nbins_k * nbins_k, dtype=int) - n_array = np.zeros(nbins_k * nbins_k, dtype=int) + n_array = np.zeros(nbins_k * nbins_k) covariance_array = np.zeros(nbins_k * nbins_k) k1_array = np.zeros(nbins_k * nbins_k) k2_array = np.zeros(nbins_k * nbins_k) @@ -908,7 +910,8 @@ def compute_cov( k1_array, k2_array, ) - for sub_forest_id in sub_forest_ids: # First loop 1) id sub-forest bins + # First loop 1) id sub-forest bins + for sub_forest_id in sub_forest_ids: select_id = select_z & (p1d_table["sub_forest_id"] == sub_forest_id) selected_pk = p1d_table["Pk"][select_id] selected_ikbin = k_index[select_id] @@ -919,13 +922,12 @@ def compute_cov( selected_variance_estimated = fitfunc_variance_pk1d( selected_snr, snrfit_z[selected_ikbin, 2], snrfit_z[selected_ikbin, 3] ) - - for ipk, _ in enumerate(selected_pk): # First loop 2) selected pk + # First loop 2) selected pk + for ipk, _ in enumerate(selected_pk): ikbin = selected_ikbin[ipk] if ikbin != -1: - for ipk2 in range( - ipk, len(selected_pk) - ): # First loop 3) selected pk + # First loop 3) selected pk + for ipk2 in range(ipk, len(selected_pk)): ikbin2 = selected_ikbin[ipk2] if ikbin2 != -1: # index of the (ikbin,ikbin2) coefficient on the top of the matrix @@ -940,12 +942,12 @@ def compute_cov( ) n_array[index] = n_array[index] + weight else: - for ipk, _ in enumerate(selected_pk): # First loop 2) selected pk + # First loop 2) selected pk + for ipk, _ in enumerate(selected_pk): ikbin = selected_ikbin[ipk] if ikbin != -1: - for ipk2 in range( - ipk, len(selected_pk) - ): # First loop 3) selected pk + # First loop 3) selected pk + for ipk2 in range(ipk, len(selected_pk)): ikbin2 = selected_ikbin[ipk2] if ikbin2 != -1: # index of the (ikbin,ikbin2) coefficient on the top of the matrix @@ -955,11 +957,12 @@ def compute_cov( + selected_pk[ipk] * selected_pk[ipk2] ) n_array[index] = n_array[index] + 1 - - for ikbin in range(nbins_k): # Second loop 1) k bins + # Second loop 1) k bins + for ikbin in range(nbins_k): mean_ikbin = mean_p1d_table["meanPk"][(nbins_k * izbin) + ikbin] - for ikbin2 in range(ikbin, nbins_k): # Second loop 2) k bins + # Second loop 2) k bins + for ikbin2 in range(ikbin, nbins_k): mean_ikbin2 = mean_p1d_table["meanPk"][(nbins_k * izbin) + ikbin2] # index of the (ikbin,ikbin2) coefficient on the top of the matrix From 793df27c5a61e4e523797aa7cc63bc08a1b4e07b Mon Sep 17 00:00:00 2001 From: corentinravoux Date: Thu, 27 Apr 2023 16:00:27 +0200 Subject: [PATCH 09/15] Improvement of weighting for fit_snr. Covariance gives proper diagonal for no_weights but no fit_snr --- py/picca/pk1d/postproc_pk1d.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py index bde05dded..b2097d318 100644 --- a/py/picca/pk1d/postproc_pk1d.py +++ b/py/picca/pk1d/postproc_pk1d.py @@ -893,6 +893,8 @@ def compute_cov( covariance_array = np.zeros(nbins_k * nbins_k) k1_array = np.zeros(nbins_k * nbins_k) k2_array = np.zeros(nbins_k * nbins_k) + if weight_method == "fit_snr": + weight_array = np.zeros(nbins_k * nbins_k) kbin_centers = (kbin_edges[1:] + kbin_edges[:-1]) / 2 if n_chunks[izbin] == 0: # Fill rows with NaNs @@ -932,15 +934,17 @@ def compute_cov( if ikbin2 != -1: # index of the (ikbin,ikbin2) coefficient on the top of the matrix index = (nbins_k * ikbin) + ikbin2 - weight = 1 / ( - selected_variance_estimated[ipk] - * selected_variance_estimated[ipk2] + weight = 1 / selected_variance_estimated[ipk] + weight2 = 1 / selected_variance_estimated[ipk2] + covariance_array[index] = covariance_array[ + index + ] + selected_pk[ipk] * selected_pk[ipk2] * np.sqrt( + weight * weight2 ) - covariance_array[index] = ( - covariance_array[index] - + selected_pk[ipk] * selected_pk[ipk2] * weight + n_array[index] = n_array[index] + 1 + weight_array[index] = weight_array[index] + np.sqrt( + weight * weight2 ) - n_array[index] = n_array[index] + weight else: # First loop 2) selected pk for ipk, _ in enumerate(selected_pk): @@ -967,10 +971,16 @@ def compute_cov( # index of the (ikbin,ikbin2) coefficient on the top of the matrix index = (nbins_k * ikbin) + ikbin2 - covariance_array[index] = ( - covariance_array[index] / n_array[index] - ) - mean_ikbin * mean_ikbin2 - covariance_array[index] = covariance_array[index] / n_array[index] + if weight_method == "fit_snr": + covariance_array[index] = ( + covariance_array[index] / weight_array[index] + ) - mean_ikbin * mean_ikbin2 + covariance_array[index] = covariance_array[index] / weight_array[index] + else: + covariance_array[index] = ( + covariance_array[index] / n_array[index] + ) - mean_ikbin * mean_ikbin2 + covariance_array[index] = covariance_array[index] / n_array[index] zbin_array[index] = zbin_centers[izbin] index_zbin_array[index] = izbin k1_array[index] = kbin_centers[ikbin] From ce28d301353f245713825a52a2d9c3a1f22d29d4 Mon Sep 17 00:00:00 2001 From: corentinravoux Date: Wed, 3 May 2023 09:14:47 +0200 Subject: [PATCH 10/15] Renormalization of the covariance matrix --- py/picca/pk1d/postproc_pk1d.py | 36 ++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py index b2097d318..3eb7cc231 100644 --- a/py/picca/pk1d/postproc_pk1d.py +++ b/py/picca/pk1d/postproc_pk1d.py @@ -997,6 +997,42 @@ def compute_cov( k1_array[index_2] = kbin_centers[ikbin] k2_array[index_2] = kbin_centers[ikbin2] + # Renormalization of the covariance matrix for the fit_snr method. + if weight_method == "fit_snr": + # Second loop 1) k bins + for ikbin in range(nbins_k): + std_ikbin = mean_p1d_table["errorPk"][(nbins_k * izbin) + ikbin] + # Second loop 2) k bins + for ikbin2 in range(ikbin, nbins_k): + std_ikbin2 = mean_p1d_table["errorPk"][(nbins_k * izbin) + ikbin2] + covariance_array[(nbins_k * ikbin) + ikbin2] = ( + std_ikbin + * std_ikbin2 + * covariance_array[(nbins_k * ikbin) + ikbin2] + / np.sqrt( + covariance_array[(nbins_k * ikbin) + ikbin] + * covariance_array[(nbins_k * ikbin2) + ikbin2] + ) + ) + if ikbin2 != ikbin: + covariance_array[(nbins_k * ikbin2) + ikbin] = ( + std_ikbin + * std_ikbin2 + * covariance_array[(nbins_k * ikbin2) + ikbin] + / np.sqrt( + covariance_array[(nbins_k * ikbin) + ikbin] + * covariance_array[(nbins_k * ikbin2) + ikbin2] + ) + ) + + # # Diagonal renormalization of the covariance matrix for the fit_snr method. + # if weight_method == "fit_snr": + # # Second loop 1) k bins + # for ikbin in range(nbins_k): + # std_ikbin = mean_p1d_table["errorPk"][(nbins_k * izbin) + ikbin] + # # Second loop 2) k bins + # covariance_array[(nbins_k * ikbin) + ikbin] = std_ikbin**2 + return zbin_array, index_zbin_array, n_array, covariance_array, k1_array, k2_array From 643542b4c81acd787ceefefca4f963d410b70f08 Mon Sep 17 00:00:00 2001 From: corentinravoux Date: Wed, 3 May 2023 09:23:57 +0200 Subject: [PATCH 11/15] removing unecessary comments --- py/picca/pk1d/postproc_pk1d.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py index 3eb7cc231..c3c442fe3 100644 --- a/py/picca/pk1d/postproc_pk1d.py +++ b/py/picca/pk1d/postproc_pk1d.py @@ -257,15 +257,6 @@ def compute_mean_pk1d( compute_covariance, compute_bootstrap = False, False cov_table = None - # elif weight_method != "no_weights": - # userprint( - # """Covariance calculations are not compatible with SNR weighting method yet. - # Skipping calculation""" - # ) - - # compute_covariance, compute_bootstrap = False, False - # cov_table = None - elif apply_z_weights: userprint( """Covariance calculations are not compatible redshift weighting yes. @@ -1025,14 +1016,6 @@ def compute_cov( ) ) - # # Diagonal renormalization of the covariance matrix for the fit_snr method. - # if weight_method == "fit_snr": - # # Second loop 1) k bins - # for ikbin in range(nbins_k): - # std_ikbin = mean_p1d_table["errorPk"][(nbins_k * izbin) + ikbin] - # # Second loop 2) k bins - # covariance_array[(nbins_k * ikbin) + ikbin] = std_ikbin**2 - return zbin_array, index_zbin_array, n_array, covariance_array, k1_array, k2_array From fbda063af776b07502c52cc569c0f19f8e5aa2e5 Mon Sep 17 00:00:00 2001 From: corentinravoux Date: Wed, 3 May 2023 09:24:07 +0200 Subject: [PATCH 12/15] pylint error --- py/picca/pk1d/postproc_pk1d.py | 56 ++++++++++++++++------------------ 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py index c3c442fe3..7149dd261 100644 --- a/py/picca/pk1d/postproc_pk1d.py +++ b/py/picca/pk1d/postproc_pk1d.py @@ -918,40 +918,36 @@ def compute_cov( # First loop 2) selected pk for ipk, _ in enumerate(selected_pk): ikbin = selected_ikbin[ipk] - if ikbin != -1: - # First loop 3) selected pk - for ipk2 in range(ipk, len(selected_pk)): - ikbin2 = selected_ikbin[ipk2] - if ikbin2 != -1: - # index of the (ikbin,ikbin2) coefficient on the top of the matrix - index = (nbins_k * ikbin) + ikbin2 - weight = 1 / selected_variance_estimated[ipk] - weight2 = 1 / selected_variance_estimated[ipk2] - covariance_array[index] = covariance_array[ - index - ] + selected_pk[ipk] * selected_pk[ipk2] * np.sqrt( - weight * weight2 - ) - n_array[index] = n_array[index] + 1 - weight_array[index] = weight_array[index] + np.sqrt( - weight * weight2 - ) + # First loop 3) selected pk + for ipk2 in range(ipk, len(selected_pk)): + ikbin2 = selected_ikbin[ipk2] + if (ikbin2 != -1) & (ikbin != -1): + # index of the (ikbin,ikbin2) coefficient on the top of the matrix + index = (nbins_k * ikbin) + ikbin2 + weight = 1 / selected_variance_estimated[ipk] + weight2 = 1 / selected_variance_estimated[ipk2] + covariance_array[index] = covariance_array[index] + selected_pk[ + ipk + ] * selected_pk[ipk2] * np.sqrt(weight * weight2) + n_array[index] = n_array[index] + 1 + weight_array[index] = weight_array[index] + np.sqrt( + weight * weight2 + ) else: # First loop 2) selected pk for ipk, _ in enumerate(selected_pk): ikbin = selected_ikbin[ipk] - if ikbin != -1: - # First loop 3) selected pk - for ipk2 in range(ipk, len(selected_pk)): - ikbin2 = selected_ikbin[ipk2] - if ikbin2 != -1: - # index of the (ikbin,ikbin2) coefficient on the top of the matrix - index = (nbins_k * ikbin) + ikbin2 - covariance_array[index] = ( - covariance_array[index] - + selected_pk[ipk] * selected_pk[ipk2] - ) - n_array[index] = n_array[index] + 1 + # First loop 3) selected pk + for ipk2 in range(ipk, len(selected_pk)): + ikbin2 = selected_ikbin[ipk2] + if (ikbin2 != -1) & (ikbin != -1): + # index of the (ikbin,ikbin2) coefficient on the top of the matrix + index = (nbins_k * ikbin) + ikbin2 + covariance_array[index] = ( + covariance_array[index] + + selected_pk[ipk] * selected_pk[ipk2] + ) + n_array[index] = n_array[index] + 1 # Second loop 1) k bins for ikbin in range(nbins_k): mean_ikbin = mean_p1d_table["meanPk"][(nbins_k * izbin) + ikbin] From 0ee735795f3340ca223a64bdeb017fbe354310f5 Mon Sep 17 00:00:00 2001 From: corentinravoux Date: Thu, 25 May 2023 10:19:00 +0200 Subject: [PATCH 13/15] Latest corrections for fit_snr covariance calculation --- py/picca/pk1d/postproc_pk1d.py | 41 ++++++++++++++-------------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py index 7149dd261..dce506be1 100644 --- a/py/picca/pk1d/postproc_pk1d.py +++ b/py/picca/pk1d/postproc_pk1d.py @@ -910,6 +910,11 @@ def compute_cov( selected_ikbin = k_index[select_id] if weight_method == "fit_snr": + # Definition of weighted unbiased sample covariance taken from + # "George R. Price, Ann. Hum. Genet., Lond, pp485-490, + # Extension of covariance selection mathematics, 1972" + # adapted with weights w_i = np.sqrt(w_ipk * w_ipk2) for covariance + # to obtain the right definition of variance on the diagonal selected_snr = p1d_table["forest_snr"][select_id] snrfit_z = snrfit_table[izbin * nbins_k : (izbin + 1) * nbins_k, :] selected_variance_estimated = fitfunc_variance_pk1d( @@ -962,12 +967,15 @@ def compute_cov( covariance_array[index] = ( covariance_array[index] / weight_array[index] ) - mean_ikbin * mean_ikbin2 - covariance_array[index] = covariance_array[index] / weight_array[index] else: covariance_array[index] = ( covariance_array[index] / n_array[index] ) - mean_ikbin * mean_ikbin2 - covariance_array[index] = covariance_array[index] / n_array[index] + + # Same normalization for fit_snr, equivalent to dividing to + # weight_array[index] if weights are normalized to give + # sum(weight_array[index]) = n_array[index] + covariance_array[index] = covariance_array[index] / n_array[index] zbin_array[index] = zbin_centers[izbin] index_zbin_array[index] = izbin k1_array[index] = kbin_centers[ikbin] @@ -984,33 +992,18 @@ def compute_cov( k1_array[index_2] = kbin_centers[ikbin] k2_array[index_2] = kbin_centers[ikbin2] - # Renormalization of the covariance matrix for the fit_snr method. + + # For fit_snr method, due to the SNR fitting scheme used for weighting, + # the diagonal of the weigthed sample covariance matrix is not equal + # to the error in mean P1D. This is tested on Ohio mocks. + # We choose to renormalize only the diagonal of the covariance matrix. if weight_method == "fit_snr": # Second loop 1) k bins for ikbin in range(nbins_k): std_ikbin = mean_p1d_table["errorPk"][(nbins_k * izbin) + ikbin] # Second loop 2) k bins - for ikbin2 in range(ikbin, nbins_k): - std_ikbin2 = mean_p1d_table["errorPk"][(nbins_k * izbin) + ikbin2] - covariance_array[(nbins_k * ikbin) + ikbin2] = ( - std_ikbin - * std_ikbin2 - * covariance_array[(nbins_k * ikbin) + ikbin2] - / np.sqrt( - covariance_array[(nbins_k * ikbin) + ikbin] - * covariance_array[(nbins_k * ikbin2) + ikbin2] - ) - ) - if ikbin2 != ikbin: - covariance_array[(nbins_k * ikbin2) + ikbin] = ( - std_ikbin - * std_ikbin2 - * covariance_array[(nbins_k * ikbin2) + ikbin] - / np.sqrt( - covariance_array[(nbins_k * ikbin) + ikbin] - * covariance_array[(nbins_k * ikbin2) + ikbin2] - ) - ) + covariance_array[(nbins_k * ikbin) + ikbin] = std_ikbin**2 + return zbin_array, index_zbin_array, n_array, covariance_array, k1_array, k2_array From 16559a491f4ae4b20e5e89cba858c85404e4f4c3 Mon Sep 17 00:00:00 2001 From: corentinravoux Date: Thu, 25 May 2023 10:27:32 +0200 Subject: [PATCH 14/15] pylint error --- py/picca/pk1d/postproc_pk1d.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py index dce506be1..0e56009ad 100644 --- a/py/picca/pk1d/postproc_pk1d.py +++ b/py/picca/pk1d/postproc_pk1d.py @@ -910,10 +910,10 @@ def compute_cov( selected_ikbin = k_index[select_id] if weight_method == "fit_snr": - # Definition of weighted unbiased sample covariance taken from - # "George R. Price, Ann. Hum. Genet., Lond, pp485-490, + # Definition of weighted unbiased sample covariance taken from + # "George R. Price, Ann. Hum. Genet., Lond, pp485-490, # Extension of covariance selection mathematics, 1972" - # adapted with weights w_i = np.sqrt(w_ipk * w_ipk2) for covariance + # adapted with weights w_i = np.sqrt(w_ipk * w_ipk2) for covariance # to obtain the right definition of variance on the diagonal selected_snr = p1d_table["forest_snr"][select_id] snrfit_z = snrfit_table[izbin * nbins_k : (izbin + 1) * nbins_k, :] @@ -971,10 +971,10 @@ def compute_cov( covariance_array[index] = ( covariance_array[index] / n_array[index] ) - mean_ikbin * mean_ikbin2 - - # Same normalization for fit_snr, equivalent to dividing to - # weight_array[index] if weights are normalized to give - # sum(weight_array[index]) = n_array[index] + + # Same normalization for fit_snr, equivalent to dividing to + # weight_array[index] if weights are normalized to give + # sum(weight_array[index]) = n_array[index] covariance_array[index] = covariance_array[index] / n_array[index] zbin_array[index] = zbin_centers[izbin] index_zbin_array[index] = izbin @@ -992,11 +992,10 @@ def compute_cov( k1_array[index_2] = kbin_centers[ikbin] k2_array[index_2] = kbin_centers[ikbin2] - # For fit_snr method, due to the SNR fitting scheme used for weighting, - # the diagonal of the weigthed sample covariance matrix is not equal - # to the error in mean P1D. This is tested on Ohio mocks. - # We choose to renormalize only the diagonal of the covariance matrix. + # the diagonal of the weigthed sample covariance matrix is not equal + # to the error in mean P1D. This is tested on Ohio mocks. + # We choose to renormalize only the diagonal of the covariance matrix. if weight_method == "fit_snr": # Second loop 1) k bins for ikbin in range(nbins_k): @@ -1004,7 +1003,6 @@ def compute_cov( # Second loop 2) k bins covariance_array[(nbins_k * ikbin) + ikbin] = std_ikbin**2 - return zbin_array, index_zbin_array, n_array, covariance_array, k1_array, k2_array From 9c7dd61590dc8b8052adc83634a92ccab79b0e25 Mon Sep 17 00:00:00 2001 From: corentinravoux Date: Thu, 1 Jun 2023 12:27:16 +0200 Subject: [PATCH 15/15] Following comments, renormalization of all the matrix --- py/picca/pk1d/postproc_pk1d.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/py/picca/pk1d/postproc_pk1d.py b/py/picca/pk1d/postproc_pk1d.py index 501cda177..840efe3c8 100644 --- a/py/picca/pk1d/postproc_pk1d.py +++ b/py/picca/pk1d/postproc_pk1d.py @@ -727,7 +727,7 @@ def compute_average_pk_redshift( data_values = p1d_table[col][select] data_snr = p1d_table["forest_snr"][select] - p1d_values = p1d_table['Pk'][select] + p1d_values = p1d_table["Pk"][select] mask = np.isnan(data_values) | np.isnan(data_snr) if len(mask[mask]) != 0: data_snr = data_snr[~mask] @@ -998,13 +998,37 @@ def compute_cov( # For fit_snr method, due to the SNR fitting scheme used for weighting, # the diagonal of the weigthed sample covariance matrix is not equal # to the error in mean P1D. This is tested on Ohio mocks. - # We choose to renormalize only the diagonal of the covariance matrix. + # We choose to renormalize the covariance matrix. if weight_method == "fit_snr": - # Second loop 1) k bins + # Third loop 1) k bins for ikbin in range(nbins_k): std_ikbin = mean_p1d_table["errorPk"][(nbins_k * izbin) + ikbin] - # Second loop 2) k bins - covariance_array[(nbins_k * ikbin) + ikbin] = std_ikbin**2 + # Third loop 2) k bins + for ikbin2 in range(ikbin, nbins_k): + std_ikbin2 = mean_p1d_table["errorPk"][(nbins_k * izbin) + ikbin2] + + # index of the (ikbin,ikbin2) coefficient on the top of the matrix + index = (nbins_k * ikbin) + ikbin2 + covariance_array[index] = ( + covariance_array[index] + * (std_ikbin * std_ikbin2) + / np.sqrt( + covariance_array[(nbins_k * ikbin) + ikbin] + * covariance_array[(nbins_k * ikbin2) + ikbin2] + ) + ) + + if ikbin2 != ikbin: + # index of the (ikbin,ikbin2) coefficient on the bottom of the matrix + index_2 = (nbins_k * ikbin2) + ikbin + covariance_array[index_2] = ( + covariance_array[index_2] + * (std_ikbin * std_ikbin2) + / np.sqrt( + covariance_array[(nbins_k * ikbin) + ikbin] + * covariance_array[(nbins_k * ikbin2) + ikbin2] + ) + ) return zbin_array, index_zbin_array, n_array, covariance_array, k1_array, k2_array