... | ... |
@@ -170,6 +170,8 @@ importFrom(S4Vectors,do.call) |
170 | 170 |
importFrom(S4Vectors,mcols) |
171 | 171 |
importFrom(dplyr,mutate) |
172 | 172 |
importFrom(dplyr,n) |
173 |
+importFrom(genefilter,rowFtests) |
|
174 |
+importFrom(genefilter,rowttests) |
|
173 | 175 |
importFrom(randomForest,importance) |
174 | 176 |
importFrom(randomForest,varUsed) |
175 | 177 |
importFrom(rlang,sym) |
... | ... |
@@ -57,6 +57,7 @@ |
57 | 57 |
#' there are two classes.} |
58 | 58 |
#' \item{\code{"AUC"}: Area Under the Curve. An area ranging from 0 to 1, under the ROC.} |
59 | 59 |
#' \item{\code{"C-index"}: For survival data, the concordance index, for models which produce risk scores. Ranges from 0 to 1.} |
60 |
+#' \item{\code{"Sample C-index"}: Per-individual C-index.} |
|
60 | 61 |
#' } |
61 | 62 |
#' |
62 | 63 |
#' @param actualOutcomes A factor vector or survival information specifying each sample's known outcome. |
... | ... |
@@ -128,7 +129,7 @@ setMethod("calcCVperformance", "ClassifyResult", |
128 | 129 |
"Micro Precision", "Micro Recall", |
129 | 130 |
"Micro F1", "Macro Precision", |
130 | 131 |
"Macro Recall", "Macro F1", "Matthews Correlation Coefficient", "AUC", |
131 |
- "C-index")) |
|
132 |
+ "C-index", "Sample C-index")) |
|
132 | 133 |
{ |
133 | 134 |
performanceType <- match.arg(performanceType) |
134 | 135 |
actualOutcomes <- actualOutcomes(result) # Extract the known outcomes of all samples. |
... | ... |
@@ -143,7 +144,7 @@ setMethod("calcCVperformance", "ClassifyResult", |
143 | 144 |
} |
144 | 145 |
|
145 | 146 |
### Performance for survival data |
146 |
- if(performanceType == "C-index") { |
|
147 |
+ if(performanceType %in% c("C-index", "Sample C-index")) { |
|
147 | 148 |
samples <- factor(result@predictions[, "sample"], levels = sampleNames(result)) |
148 | 149 |
performance <- .calcPerformance(actualOutcomes = actualOutcomes[match(result@predictions[, "sample"], sampleNames(result))], |
149 | 150 |
predictedOutcomes = result@predictions[, "risk"], |
... | ... |
@@ -180,30 +181,63 @@ setMethod("calcCVperformance", "ClassifyResult", |
180 | 181 |
{ |
181 | 182 |
if(performanceType %in% c("Sample Error", "Sample Accuracy")) |
182 | 183 |
{ |
183 |
- resultTable <- data.frame(sample = samples, |
|
184 |
- actual = actualOutcomes, |
|
185 |
- predicted = predictedOutcomes) |
|
186 |
- allIDs <- levels(resultTable[, "sample"]) |
|
187 | 184 |
|
188 |
- sampleMetricValues <- sapply(allIDs, function(sampleID) |
|
185 |
+ sampleMetricValues <- sapply(levels(samples), function(sampleID) |
|
189 | 186 |
{ |
190 |
- sampleResult <- subset(resultTable, sample == sampleID) |
|
191 |
- if(nrow(sampleResult) == 0) |
|
192 |
- return(NA) |
|
187 |
+ consider <- which(samples == sampleID) |
|
193 | 188 |
if(performanceType == "Sample Error") |
194 |
- sum(as.character(sampleResult[, "predicted"]) != as.character(sampleResult[, "actual"])) |
|
189 |
+ sum(predictedOutcomes[consider] != as.character(actualOutcomes[consider])) |
|
195 | 190 |
else |
196 |
- sum(as.character(sampleResult[, "predicted"]) == as.character(sampleResult[, "actual"])) |
|
191 |
+ sum(predictedOutcomes[consider] == as.character(actualOutcomes[consider])) |
|
197 | 192 |
}) |
198 |
- performanceValues <- as.numeric(sampleMetricValues / table(factor(resultTable[, "sample"], levels = allIDs))) |
|
199 |
- names(performanceValues) <- allIDs |
|
193 |
+ performanceValues <- as.numeric(sampleMetricValues / table(samples)) |
|
194 |
+ names(performanceValues) <- levels(samples) |
|
200 | 195 |
return(list(name = performanceType, values = performanceValues)) |
201 | 196 |
} |
202 |
- |
|
197 |
+ |
|
203 | 198 |
if(!is.null(grouping)) |
204 | 199 |
{ |
205 | 200 |
actualOutcomes <- split(actualOutcomes, grouping) |
206 | 201 |
predictedOutcomes <- split(predictedOutcomes, grouping) |
202 |
+ allSamples <- levels(samples) |
|
203 |
+ samples <- split(samples, grouping) |
|
204 |
+ } |
|
205 |
+ |
|
206 |
+ if(performanceType == "Sample C-index") |
|
207 |
+ { |
|
208 |
+ performanceValues <- do.call(rbind, mapply(function(iterationSurv, iterationPredictions, iterationSamples) |
|
209 |
+ { |
|
210 |
+ do.call(rbind, lapply(allSamples, function(sampleID) |
|
211 |
+ { |
|
212 |
+ sampleIndex <- which(iterationSamples == sampleID) |
|
213 |
+ otherIndices <- setdiff(seq_along(allSamples), sampleIndex) |
|
214 |
+ concordants <- discordants <- 0 |
|
215 |
+ iterationSurv <- as.matrix(iterationSurv) |
|
216 |
+ for(compareIndex in otherIndices) |
|
217 |
+ { |
|
218 |
+ if(iterationSurv[sampleIndex, "time"] < iterationSurv[compareIndex, "time"] && iterationPredictions[sampleIndex] > iterationPredictions[compareIndex] && iterationSurv[sampleIndex, "status"] == 1) |
|
219 |
+ { # Reference sample has shorter time, it is not censored, greater risk. Concordant. |
|
220 |
+ concordants <- concordants + 1 |
|
221 |
+ } else if(iterationSurv[sampleIndex, "time"] > iterationSurv[compareIndex, "time"] && iterationPredictions[sampleIndex] < iterationPredictions[compareIndex] && iterationSurv[compareIndex, "status"] == 1) |
|
222 |
+ { # Reference sample has longer time, the comparison sample is not censored, lower risk. Concordant. |
|
223 |
+ concordants <- concordants + 1 |
|
224 |
+ } else if(iterationSurv[sampleIndex, "time"] < iterationSurv[compareIndex, "time"] && iterationPredictions[sampleIndex] < iterationPredictions[compareIndex] && iterationSurv[sampleIndex, "status"] == 1) |
|
225 |
+ { # Reference sample has shorter time, it is not censored, but lower risk than comparison sample. Discordant. |
|
226 |
+ discordants <- discordants + 1 |
|
227 |
+ } else if(iterationSurv[sampleIndex, "time"] > iterationSurv[compareIndex, "time"] && iterationPredictions[sampleIndex] > iterationPredictions[compareIndex] && iterationSurv[compareIndex, "status"] == 1) |
|
228 |
+ { # Reference sample has longer time, the comparison sample is not censored, but higher risk than comparison sample. Discordant. |
|
229 |
+ discordants <- discordants + 1 |
|
230 |
+ } |
|
231 |
+ } |
|
232 |
+ data.frame(sample = sampleID, concordant = concordants, discordant = discordants) |
|
233 |
+ })) |
|
234 |
+ }, actualOutcomes, predictedOutcomes, samples, SIMPLIFY = FALSE)) |
|
235 |
+ |
|
236 |
+ sampleValues <- by(performanceValues[, c("concordant", "discordant")], performanceValues[, "sample"], colSums) |
|
237 |
+ Cindex <- round(sapply(sampleValues, '[', 1) / (sapply(sampleValues, '[', 1) + sapply(sampleValues, '[', 2)), 2) |
|
238 |
+ names(Cindex) <- names(sampleValues) |
|
239 |
+ Cindex <- Cindex[!is.nan(Cindex)] # The individual with the smallest censored time will always have no useful inequalities. |
|
240 |
+ return(list(name = performanceType, values = Cindex)) |
|
207 | 241 |
} |
208 | 242 |
|
209 | 243 |
if(!is(actualOutcomes, "list")) actualOutcomes <- list(actualOutcomes) |
... | ... |
@@ -40,25 +40,19 @@ |
40 | 40 |
outcomes <- factor(outcomes) |
41 | 41 |
|
42 | 42 |
# Outcomes has columns, so it is tabular. It is inferred to represent survival data. |
43 |
- if(!is.null(ncol(outcomes))) |
|
43 |
+ if(!is.null(ncol(outcomes)) && ncol(outcomes) %in% 2:3) |
|
44 | 44 |
{ |
45 | 45 |
# Assume that event status is in the last column (second for two columns, third for three columns) |
46 |
- if(!is.null(dim(outcomes)) && length(dim(data)) == 2 && ncol(outcomes) %in% 2:3) |
|
47 |
- { |
|
48 |
- numberEventTypes <- length(unique(outcomes[, ncol(outcomes)])) |
|
49 |
- # Could be one or two kinds of events. All events might be uncensored or censored |
|
50 |
- # in a rare but not impossible scenario. |
|
51 |
- if(numberEventTypes > 2) |
|
52 |
- stop("Number of distinct event types in the last column exceeds 2 but must be 1 or 2.") |
|
53 |
- |
|
46 |
+ numberEventTypes <- length(unique(outcomes[, ncol(outcomes)])) |
|
47 |
+ # Could be one or two kinds of events. All events might be uncensored or censored |
|
48 |
+ # in a rare but not impossible scenario. |
|
49 |
+ if(numberEventTypes > 2) |
|
50 |
+ stop("Number of distinct event types in the last column exceeds 2 but must be 1 or 2.") |
|
54 | 51 |
|
55 |
- if(ncol(outcomes) == 2) # Typical, right-censored survival data. |
|
56 |
- { |
|
57 |
- outcomes <- survival::Surv(outcomes[, 1], outcomes[, 2]) |
|
58 |
- } else { # Three columns. Therefore, counting process data. |
|
59 |
- outcomes <- survival::Surv(outcomes[, 1], outcomes[, 2], outcomes[, 3]) |
|
60 |
- } |
|
61 |
- } |
|
52 |
+ if(ncol(outcomes) == 2) # Typical, right-censored survival data. |
|
53 |
+ outcomes <- survival::Surv(outcomes[, 1], outcomes[, 2]) |
|
54 |
+ else # Three columns. Therefore, counting process data. |
|
55 |
+ outcomes <- survival::Surv(outcomes[, 1], outcomes[, 2], outcomes[, 3]) |
|
62 | 56 |
} |
63 | 57 |
|
64 | 58 |
if(!is.null(restrict)) |
... | ... |
@@ -29,7 +29,7 @@ Pair of Factor Vectors} |
29 | 29 |
performanceType = c("Balanced Error", "Balanced Accuracy", "Error", "Accuracy", |
30 | 30 |
"Sample Error", "Sample Accuracy", "Micro Precision", "Micro Recall", "Micro F1", |
31 | 31 |
"Macro Precision", "Macro Recall", "Macro F1", "Matthews Correlation Coefficient", |
32 |
- "AUC", "C-index") |
|
32 |
+ "AUC", "C-index", "Sample C-index") |
|
33 | 33 |
) |
34 | 34 |
} |
35 | 35 |
\arguments{ |
... | ... |
@@ -64,6 +64,7 @@ between -1 and 1 indicating how concordant the predicted classes are to the actu |
64 | 64 |
there are two classes.} |
65 | 65 |
\item{\code{"AUC"}: Area Under the Curve. An area ranging from 0 to 1, under the ROC.} |
66 | 66 |
\item{\code{"C-index"}: For survival data, the concordance index, for models which produce risk scores. Ranges from 0 to 1.} |
67 |
+\item{\code{"Sample C-index"}: Per-individual C-index.} |
|
67 | 68 |
}} |
68 | 69 |
|
69 | 70 |
\item{result}{An object of class \code{\link{ClassifyResult}}.} |