forked from USGS-R/GLRI_CEC_2016
-
Notifications
You must be signed in to change notification settings - Fork 0
/
22_merge_data_deg_test.yml
113 lines (87 loc) · 3.7 KB
/
22_merge_data_deg_test.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
target_default: 22_merge_data_deg_test
include:
- 21_merge_data_dl.yml
packages:
- dplyr
- tidyr
- readr
- dataRetrieval
- openxlsx
sources:
- 22_merge_data_deg_test/src/merge_data_fix_degs.R
- 22_merge_data_deg_test/src/deg_sums.R
targets:
22_merge_data_deg_test:
depends:
- chemicalSummary_deg_meto
- chemicalSummary_deg_acet
- chemicalSummary_conc_deg_meto
- chemicalSummary_conc_deg_acet
- chemicalSummary_bench_deg_meto
- chemicalSummary_bench_deg_acet
missing_toxcast:
command: find_missing_tox(chem_conc = chemicalSummary_conc, chem_ear = chemicalSummary, chem_info = chem_master)
all_parents:
command: complete_parents(chem_master)
# when CAS numbers are missing, everything is fine until you go back to match concentrations to
# toxicity levels, where the information is matched by CAS, and the system breaks down.
# need to add fake CAS numbers to all compounds missing CAS numbers
chemicalSummary_deg_meto:
command: get_chem_sum_deg(
data_file = "data/pesticides.xlsx",
missing_chems = missing_toxcast,
parents = all_parents,
metolachlor = TRUE,
chem_master)
chemicalSummary_deg_acet:
command: get_chem_sum_deg(
data_file = "data/pesticides.xlsx",
missing_chems = missing_toxcast,
parents = all_parents,
metolachlor = FALSE,
chem_master)
# fix concentration data to include all chemicals
chemicalSummary_conc_deg_meto:
command: merge_deg_parents(chemicalSummary_conc, chem_master, metolachlor = TRUE)
chemicalSummary_conc_deg_acet:
command: merge_deg_parents(chemicalSummary_conc, chem_master, metolachlor = FALSE)
# need to find parent benchmark values for degradates with no toxicity info
missing_bench:
command: find_missing_bench(chem_conc = chemicalSummary_conc, chem_bench = chemicalSummary_bench, chem_info = chem_master)
chemicalSummary_bench_deg_meto:
command: get_bench_sum_deg(data_file = "data/pesticides_bench.xlsx",
missing_chems = missing_bench,
parents = all_parents,
metolachlor = TRUE,
chem_master)
chemicalSummary_bench_deg_acet:
command: get_bench_sum_deg(data_file = "data/pesticides_bench.xlsx",
missing_chems = missing_bench,
parents = all_parents,
metolachlor = FALSE,
chem_master)
parent_class:
command: get_parent_class(chemicalSummary_conc_deg_meto, chem_master)
parent_sums:
command: sum_by_parents(
deg_parent_ear = chemicalSummary_deg_meto,
deg_parent_conc = chemicalSummary_conc_deg_meto,
deg_parent_bench = chemicalSummary_bench_deg_meto)
parent_sums_nozero:
command: filter_parent_sums(parent_sums)
chnm_to_cas:
command: crosswalk_names(conc_dat = chemicalSummary_conc_deg_meto)
parent_summaries:
command: summarize_parents(parent_sums, classes = parent_class, zeros = FALSE)
parent_summaries_zeros:
command: summarize_parents(parent_sums, classes = parent_class, zeros = TRUE)
# for the degradates that are either acetochlor or metolachlor, look at the
# the relationship between the degradate and parent
# if related to one parent more, choose it.
unclassified_degs:
command: get_unclassified(data_file = "data/pesticides.xlsx", parents = chem_crosswalk)
22_merge_data_deg_test/out/unclassified_degradates.png:
command: plot_unclassified_degs(conc_dat = chemicalSummary_conc, unclassified_degs, out_file = target_name)
# create the "boxplot" fig but use the summed parent + degradates
figures/ms_figures/parent_deg_sums_dotplot.png:
command: plot_deg_sums(all_dat = parent_summaries, top_parents, target_name)