-
Notifications
You must be signed in to change notification settings - Fork 0
/
ldsc_interpret.py
112 lines (104 loc) · 4.38 KB
/
ldsc_interpret.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python3
import sys
import numpy as np
import scipy.stats
import os
import pickle
import matplotlib
matplotlib.use('Agg')
matplotlib.rcParams['agg.path.chunksize'] = 10000
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
STUDY_NAMES = {
"AlzheimersMaternal_Marioni2018": "Alzheimers Maternal",
"AlzheimersPaternal_Marioni2018": "Alzheimers Paternal",
"AlzheimersProxyMetaIGAP_Marioni2018": "Alzheimers Proxy",
"BD_Ruderfer2018": "Bipolar (Ruderfer)",
"BDSCZ_Ruderfer2018": "Bipolar + Schizophrenia",
"CD_deLange2017": "Crohn's",
"DepressedAffect_Nagel2018": "Depressed Affect",
"Depression_Nagel2018": "Depression (Nagel)",
"IBD_deLange2017": "Inflammatory Bowel",
"Intelligence_SavageJansen2018": "Intelligence",
"MDD_Wray2018": "Depression (Wray)",
"Neuroticism_Nagel2018": "Neuroticism",
"PASS_Alzheimers_Jansen2019": "Alzheimers",
"PASS_BIP_Stahl2019": "Bipolar (Stahl)",
"PASS_Schizophrenia_Pardinas2018": "Schizophrenia (Pardinas)",
"ReactionTime_Davies2018": "Reaction Time",
"SCZ_Ruderfer2018": "Schizophrenia (Ruderfer)",
"SCZvsBD_Ruderfer2018": "Bipolar vs. Schizophrenia",
"UC_deLange2017": "Ulcerative Colitis",
"VerbalNumericReasoning_Davies2018": "Verbal & Numeric Reasoning",
"Worry_Nagel2018": "Worry",
}
STUDY_ORDER = [
"PASS_Alzheimers_Jansen2019",
"AlzheimersProxyMetaIGAP_Marioni2018",
"AlzheimersMaternal_Marioni2018",
"AlzheimersPaternal_Marioni2018",
"BD_Ruderfer2018",
"PASS_BIP_Stahl2019",
"BDSCZ_Ruderfer2018",
"SCZvsBD_Ruderfer2018",
"PASS_Schizophrenia_Pardinas2018",
"SCZ_Ruderfer2018",
"MDD_Wray2018",
"Depression_Nagel2018",
"DepressedAffect_Nagel2018",
"Neuroticism_Nagel2018",
"Worry_Nagel2018",
"Intelligence_SavageJansen2018",
"VerbalNumericReasoning_Davies2018",
"ReactionTime_Davies2018",
"IBD_deLange2017",
"UC_deLange2017",
"CD_deLange2017",
]
def plot_heatmap(df, title, result_path, var, fmt=None):
df_plot = df.pivot(index="Study", columns="Cluster", values=var).sort_index()
# print(df_plot) ####
df_plot = df_plot.reindex(STUDY_ORDER)
df_plot.rename(index=STUDY_NAMES, inplace=True)
sns.set(style="whitegrid", font="Roboto")
# print(df_plot) ####
kws_extras = {}
if fmt is not None:
kws_extras["fmt"] = fmt
g = sns.heatmap(df_plot, annot=True, cmap="vlag", center=1, vmin=-1, vmax=5, annot_kws={"size": 10, "weight": "medium"}, **kws_extras)
# g.fig.suptitle(title)
plt.title(title)
# g.savefig(result_path, bbox_inches='tight')
plt.savefig(result_path, bbox_inches='tight')
plt.clf()
plt.close()
def ldsc_interpret(in_dir, name, params, out_dir):
in_path = os.path.join(in_dir, f"{name}.csv")
df = pd.read_csv(in_path)
df["EnrichmentZ"] = df["Enrichment"] / df["EnrichmentStdError"]
df["EnrichmentSig"] = df["Enrichment"].mask(df["EnrichmentP"] <= 0.05)
# print(in_path) ####
# print(df.to_string()) ####
for thresh, window in params:
# print(thresh, window) ####
df_sub = df.loc[np.logical_and(df["Threshold"] == thresh, df["Window"] == window)]
title = f"Top {thresh}, {window} kb window"
result_path = os.path.join(out_dir, name, f"heatmap_t_{thresh}_w_{window}.svg")
plot_heatmap(df_sub, title, result_path, "Enrichment")
result_path = os.path.join(out_dir, name, f"heatmap_p_t_{thresh}_w_{window}.svg")
plot_heatmap(df_sub, title, result_path, "EnrichmentP", fmt=".2e")
result_path = os.path.join(out_dir, name, f"heatmap_z_t_{thresh}_w_{window}.svg")
plot_heatmap(df_sub, title, result_path, "EnrichmentZ")
result_path = os.path.join(out_dir, name, f"heatmap_se_t_{thresh}_w_{window}.svg")
plot_heatmap(df_sub, title, result_path, "EnrichmentStdError")
result_path = os.path.join(out_dir, name, f"heatmap_sg_t_{thresh}_w_{window}.svg")
plot_heatmap(df_sub, title, result_path, "EnrichmentSig")
if __name__ == '__main__':
in_dir = "/agusevlab/awang/sc_kellis/ldsc_res/agg/"
# name = "results_chisq"
name = "scqtl_2020_09_28_ldsc_enrichment"
out_dir = "/agusevlab/awang/ase_finemap_results/sc_results/kellis_429/ldsc"
# params = [(i, j) for i in [0.1, 0.2, 0.5] for j in [0, 10, 50]]
params = [(i, j) for i in [0.1, 0.2, 0.5] for j in [10, 50]]
ldsc_interpret(in_dir, name, params, out_dir)