-
Notifications
You must be signed in to change notification settings - Fork 11
/
GetThresholdedIntensities.R
175 lines (146 loc) · 6.2 KB
/
GetThresholdedIntensities.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
##' Extract Thresholded Intensities from a GatingSet
##'
##' This function extracts thresholded intensities for children of a
##' node \code{node}, as specified through the \code{map} argument.
##'
##' \code{map} should be an \R \code{list}, mapping node names (as
##' specified in the gating hierarchy of the gating set) to channel
##' names (as specified in either the \code{desc} or \code{name}
##' columns of the parameters of the associated \code{flowFrame}s
##' in the \code{GatingSet}).
##'
##' @param gs A \code{GatingSet} or \code{GatingSetList}.
##' @param node The name, or path, of a single node in a
##' \code{GatingSet} / \code{GatingSetList}.
##' @param map A \code{list}, mapping node names to markers.
##' @example examples/GetThresholdedIntensities.R
##' @return A \code{list} with two components:
##'
##' \item{data}{A \code{list} of thresholded intensity measures.}
##' \item{counts}{A named vector of total cell counts at the node \code{node}.}
##'
##' @export
GetThresholdedIntensities <- function(gs, node, map) {
if (requireNamespace("flowWorkspace",quietly = TRUE)) {
node_names <- names(map)
channel_names <- unlist( unname(map) )
if (!(inherits(gs, "GatingSet") || inherits(gs, "GatingSetList"))) {
stop("'gs' must be either a 'GatingSet' or a 'GatingSetList'")
}
## Turn the gs into a list of gs's
if (inherits(gs, "GatingSet")) {
gslist <- vector("list", length(gs))
for (i in 1:length(gs)) {
gslist[[i]] <- gs[[i]]
}
names(gslist) <- flowWorkspace::sampleNames(gs)
} else {
stop("Internal Error: not implemented for GatingSetLists yet.", call.=FALSE)
}
## Make 'node' act more like a regular expression if it isn't one already
## But only do this if it's not '/' or 'root'
if (!(node %in% c("root", "/"))) {
n <- nchar(node)
if (!substring(node, 1, 1) == "/") node <- paste0("/", node)
if (!substring(node, n, n) == "$") node <- paste0(node, "$")
node <- gsub("(?<!\\\\)\\+", "\\\\+", node, perl=TRUE)
}
paths <- flowWorkspace::gh_get_pop_paths(gslist[[1]])
path <- paths[ grepl(node, paths, fixed = FALSE) ]
if (length(path) > 1) {
stop(gettextf("The node expression %s is not unique.", node))
}
if (length(path) == 0) {
stop(gettextf("The node expression %s doesn't identify any nodes.",
node))
}
# Extract the parent node name from the full population name
node_name <- gsub(".*/", "", path)
message(gettextf("Fetching data from children of '%s'.", path))
# extract all the counts
message("Extracting cell counts")
counts <- unlist(lapply(gslist, function(x) {
flowWorkspace::gh_pop_get_count(x, path)
}))
# Get the children of that parent and filter out boolean gates Test if
# children exist, and test if non-empty set returned.
message("Looking for expected children nodes")
child.nodes <- flowWorkspace::gh_pop_get_children(gs[[1]], node_name)
child.nodes <- basename(child.nodes)
if (length(child.nodes) == 0) {
stop(gettextf("Population %s has no children! Choose a different parent population.",
node_name))
}
child.nodes <- child.nodes[ !sapply(child.nodes, function(x)
gh_pop_is_bool_gate(gs[[1]], x))]
if (length(child.nodes) == 0) {
stop(gettextf("All the children of %s are boolean gates. Choose a population with non-boolean child gates.",
node_name))
}
message("We will extract data from the following nodes:\n\n",
paste( strwrap( paste(child.nodes, collapse=", "), 60), collapse="\n" ))
## This next block of code tries to figure out which channel names we will be using
## Try to guess whether we should be pulling names from the 'desc'
## column or the 'name' column of the flowSets
ff <- flowWorkspace::gh_pop_get_data(gs[[1]], use.exprs=FALSE)
params <- flowCore::parameters(ff)@data
## First, check for a perfect match using a basic regex
.check_match <- function(x, vec) {
all( sapply(channel_names, function(x) {
any( x == na.omit(vec) )
}) )
}
matched_flag <- FALSE ## did we match everything in 'map' to the channel names?
column_to_use <- NULL ## are we going to use 'desc' or 'name'?
indices <- NULL ## what indices are we using to pull from the flowFrame?
if (!all(params$name == colnames( flowCore::exprs(ff) ))) {
stop("Internal Error: expected 'params$desc' and 'colnames( exprs(ff) )' to ",
"be identical but they are not!", call.=FALSE)
}
if (.check_match(channel_names, params$desc)) {
message("Channel names were matched to the 'desc' column.")
matched_flag <- TRUE
column_to_use <- "desc"
indices <- match(channel_names, params$desc)
}
if (!matched_flag && .check_match(channel_names, params$name)) {
message("Channel names were matched to the 'name' column.")
matched_flag <- TRUE
column_to_use <- "name"
indices <- match(channel_names, params$name)
}
if (!matched_flag) {
stop("Unable to match values in 'names(map)' to channel names in the data.")
}
expr_nms <- unname(as.character(params[[column_to_use]])[indices])
## Extract the intensities
message("Extracting cell intensities and thresholding...")
intensities <- lapply(gslist, function(x) {
exprs <- flowCore::exprs( flowWorkspace::gh_pop_get_data(x, path) )[, expr_nms, drop=FALSE]
for (i in seq_along(node_names)) {
cNode <- node_names[i]
cChannel <- channel_names[i]
## Need to handle 'root' specially
if (identical(path, "root")) {
node_path <- paste0("/", cNode)
} else {
node_path <- file.path(path, cNode)
}
## find out what cells didn't fall into the gate
ind <- !flowWorkspace::gh_pop_get_indices(x, node_path)
exprs[ind, expr_nms[i]] <- 0
}
exprs <- exprs[ rowSums(exprs) > 0, , drop=FALSE]
return(exprs)
})
if (!all(names(counts) == names(intensities))) {
stop("Internal Error: expected `!all(names(counts) == names(intensities))` to be TRUE.", call.=FALSE)
}
message("Done!")
return( list(
data=intensities,
counts=counts
) )
}
.needsFlowWorkspace()
}